Add postponed send loop to QEMU driver

2023-07-03 02:29:01 +03:00
133 changed files with 1452 additions and 5936 deletions
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@@ -622,114 +622,6 @@ jobs:
          echo ""
        done

-  test_heal_csum_32k_dmj:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_heal_csum_32k_dj:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_32k_dj  OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_heal_csum_32k:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_32k     OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_heal_csum_4k_dmj:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_4k_dmj  OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_heal_csum_4k_dj:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_4k_dj   OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
-  test_heal_csum_4k:
-    runs-on: ubuntu-latest
-    needs: build
-    container: ${{env.TEST_IMAGE}}:${{github.sha}}
-    steps:
-    - name: Run test
-      id: test
-      timeout-minutes: 10
-      run: TEST_NAME=csum_4k      OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
-    - name: Print logs
-      if: always() && steps.test.outcome == 'failure'
-      run: |
-        for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
-          echo "-------- $i --------"
-          cat $i
-          echo ""
-        done
-
  test_scrub:
    runs-on: ubuntu-latest
    needs: build
--- a/.gitea/workflows/tests-to-yaml.pl
+++ b/.gitea/workflows/tests-to-yaml.pl
@@ -7,8 +7,7 @@ for my $line (<>)
    if ($line =~ /\.\/(test_[^\.]+)/s)
    {
        chomp $line;
-        my $base_name = $1;
-        my $test_name = $base_name;
+        my $test_name = $1;
        my $timeout = 3;
        if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
            $test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
@@ -17,12 +16,7 @@ for my $line (<>)
        }
        while ($line =~ /([^\s=]+)=(\S+)/gs)
        {
-            if ($1 eq 'TEST_NAME')
-            {
-                $test_name = $base_name.'_'.$2;
-                last;
-            }
-            elsif ($1 eq 'SCHEME' && $2 eq 'ec')
+            if ($1 eq 'SCHEME' && $2 eq 'ec')
            {
                $test_name .= '_ec';
            }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)

 project(vitastor)

-set(VERSION "1.0.0")
+set(VERSION "0.9.2")

 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
 и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
 избыточности - репликацией или с произвольными кодами коррекции ошибок.

-Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
+Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
 TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
 что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.

--- a/README.md
+++ b/README.md
@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
 primary-replication, symmetric clustering and automatic data distribution over any
 number of drives of any size with configurable redundancy (replication or erasure codes/XOR).

-Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
-supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
+Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
+TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
 with proper hardware which is ~10 times faster than other popular SDS's like Ceph
 or internal systems of public clouds.

--- a/csi/Makefile
+++ b/csi/Makefile
@@ -1,4 +1,4 @@
-VERSION ?= v1.0.0
+VERSION ?= v0.9.2

 all: build push

--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v1.0.0
+          image: vitalif/vitastor-csi:v0.9.2
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@@ -116,7 +116,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v1.0.0
+          image: vitalif/vitastor-csi:v0.9.2
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/src/config.go
+++ b/csi/src/config.go
@@ -5,7 +5,7 @@ package vitastor

 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "1.0.0"
+    vitastorCSIDriverVersion = "0.9.2"
 )

 // Config struct fills the parameters of request or user input
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,10 +1,10 @@
-vitastor (1.0.0-1) unstable; urgency=medium
+vitastor (0.9.2-1) unstable; urgency=medium

  * Bugfixes

 -- Vitaliy Filippov <vitalif@yourcmc.ru>  Fri, 03 Jun 2022 02:09:44 +0300

-vitastor (1.0.0-1) unstable; urgency=medium
+vitastor (0.9.2-1) unstable; urgency=medium

  * Implement NFS proxy
  * Add documentation
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@@ -28,19 +28,13 @@ RUN apt-get --download-only source qemu

 ADD patches /root/vitastor/patches
 ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
-
-#RUN set -e; \
-#    apt-get install -y wget; \
-#    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
-#    (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
-#    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
-#    apt-get update; \
-#    apt-get install -y vitastor-client vitastor-client-dev quilt
-
 RUN set -e; \
-    dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
+    apt-get install -y wget; \
+    wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
+    (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
+    (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
    apt-get update; \
-    apt-get install -y quilt; \
+    apt-get install -y vitastor-client vitastor-client-dev quilt; \
    mkdir -p /root/packages/qemu-$REL; \
    rm -rf /root/packages/qemu-$REL/*; \
    cd /root/packages/qemu-$REL; \
@@ -54,7 +48,7 @@ RUN set -e; \
    quilt add block/vitastor.c; \
    cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
    quilt refresh; \
-    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor3; \
+    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
    DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
    rm -rf /root/packages/qemu-$REL/qemu-*/
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@@ -35,8 +35,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    cp -r /root/vitastor vitastor-1.0.0; \
-    cd vitastor-1.0.0; \
+    cp -r /root/vitastor vitastor-0.9.2; \
+    cd vitastor-0.9.2; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -49,8 +49,8 @@ RUN set -e -x; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.0.0.orig.tar.xz vitastor-1.0.0; \
-    cd vitastor-1.0.0; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
+    cd vitastor-0.9.2; \
    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/docs/config/layout-cluster.en.md
+++ b/docs/config/layout-cluster.en.md
@@ -33,13 +33,12 @@ Size of objects (data blocks) into which all physical and virtual drives
 in Vitastor, affects memory usage, write amplification and I/O load
 distribution effectiveness.

-Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
-it's possible to use 1 MB for SSD too - it will lower memory usage, but
+Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
+it's possible to use 4 MB for SSD too - it will lower memory usage, but
 may increase average WA and reduce linear performance.

 OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
 544 MB per 1 TB of used disk space with the default 128 KB block size.
-With 1 MB it's 8 times lower.

 ## bitmap_granularity

@@ -96,9 +95,8 @@ SSD cache or "media-cache" - for example, a lot of Seagate EXOS drives have
 it (they have internal SSD cache even though it's not stated in datasheets).

 Setting this parameter to "all" or "small" in OSD parameters requires enabling
-[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
-[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
-"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
+disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
+enabling disable_data_fsync.

 TLDR: For optimal performance, set immediate_commit to "all" if you only use
 SSDs with supercapacitor-based power loss protection (nonvolatile
--- a/docs/config/layout-cluster.ru.md
+++ b/docs/config/layout-cluster.ru.md
@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
 настроек, влияет на потребление памяти, объём избыточной записи (write
 amplification) и эффективность распределения нагрузки по OSD.

-Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
-для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
+Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
+мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
 это понизит использование памяти, но ухудшит распределение нагрузки и в
 среднем увеличит WA.

 Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
 т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
-стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
+стандартном 128 КБ блоке.

 ## bitmap_granularity

@@ -103,9 +103,8 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
 указано в спецификациях).

 Указание "all" или "small" в настройках / командной строке OSD требует
-включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
-[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
-также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
+включения disable_journal_fsync и disable_meta_fsync, значение "all" также
+требует включения disable_data_fsync.

 Итого, вкратце: для оптимальной производительности установите
 immediate_commit в значение "all", если вы используете в кластере только SSD
--- a/docs/config/layout-osd.en.md
+++ b/docs/config/layout-osd.en.md
@@ -24,8 +24,6 @@ initialization and can't be changed after it without losing data.
 - [disable_journal_fsync](#disable_journal_fsync)
 - [disable_device_lock](#disable_device_lock)
 - [disk_alignment](#disk_alignment)
- [data_csum_type](#data_csum_type)
- [csum_block_size](#csum_block_size)

 ## data_device

@@ -176,43 +174,3 @@ Intel Optane (probably, not tested yet).

 Clients don't need to be aware of disk_alignment, so it's not required to
 put a modified value into etcd key /vitastor/config/global.
-
-## data_csum_type
-
- Type: string
- Default: none
-
-Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
-enable data checksums.
-
-## csum_block_size
-
- Type: integer
- Default: 4096
-
-Checksum calculation block size.
-
-Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
-(which is usually 4 KB).
-
-Checksums increase metadata size by 4 bytes per each csum_block_size of data.
-
-Checksums are always a tradeoff:
-1. You either sacrifice +1 GB RAM per 1 TB of data
-2. Or you raise csum_block_size, for example, to 32k and sacrifice
-   50% random write iops due to checksum read-modify-write
-3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
-   sacrifice 50% random read iops due to checksum reads
-
-All-flash clusters usually have enough RAM to use default csum_block_size,
-which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
-
-Thus, recommended setups are:
-1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
-2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
-3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
-4. HDD-only, faster random read: csum_block_size=32k
-5. HDD-only, faster random write: csum_block_size=4k +
-   inmemory_metadata=false + meta_io=cached
-
-See also [meta_io](osd.en.md#meta_io).
--- a/docs/config/layout-osd.ru.md
+++ b/docs/config/layout-osd.ru.md
@@ -25,8 +25,6 @@
 - [disable_journal_fsync](#disable_journal_fsync)
 - [disable_device_lock](#disable_device_lock)
 - [disk_alignment](#disk_alignment)
- [data_csum_type](#data_csum_type)
- [csum_block_size](#csum_block_size)

 ## data_device

@@ -185,47 +183,3 @@ journal_block_size и meta_block_size. Однако единственные SSD

 Клиентам не обязательно знать про disk_alignment, так что помещать значение
 этого параметра в etcd в /vitastor/config/global не нужно.
-
-## data_csum_type
-
- Тип: строка
- Значение по умолчанию: none
-
-Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
-Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
-
-Следует понимать, что контрольные суммы в зависимости от размера блока их
-расчёта либо увеличивают потребление памяти, либо снижают производительность.
-Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
-
-## csum_block_size
-
- Тип: целое число
- Значение по умолчанию: 4096
-
-Размер блока расчёта контрольных сумм.
-
-Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
-(который обычно равен 4 КБ).
-
-Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
-csum_block_size данных.
-
-Контрольные суммы - это всегда компромисс:
-1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
-2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
-   скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
-   новых контрольных сумм
-3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
-   жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
-   с диска
-
-Таким образом, рекомендуются следующие варианты настроек:
-1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
-2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
-3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
-4. Только HDD, быстрее случайное чтение: csum_block_size=32k
-5. Только HDD, быстрее случайная запись: csum_block_size=4k +
-   inmemory_metadata=false + meta_io=cached
-
-Смотрите также [meta_io](osd.ru.md#meta_io).
--- a/docs/config/osd.en.md
+++ b/docs/config/osd.en.md
@@ -31,9 +31,6 @@ them, even without restarting by updating configuration in etcd.
 - [max_flusher_count](#max_flusher_count)
 - [inmemory_metadata](#inmemory_metadata)
 - [inmemory_journal](#inmemory_journal)
- [data_io](#data_io)
- [meta_io](#meta_io)
- [journal_io](#journal_io)
 - [journal_sector_buffer_count](#journal_sector_buffer_count)
 - [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
 - [throttle_small_writes](#throttle_small_writes)
@@ -258,60 +255,6 @@ is typically very small because it's sufficient to have 16-32 MB journal
 for SSD OSDs. However, in theory it's possible that you'll want to turn it
 off for hybrid (HDD+SSD) OSDs with large journals on quick devices.

-## data_io
-
- Type: string
- Default: direct
-
-I/O mode for *data*. One of "direct", "cached" or "directsync". Corresponds
-to O_DIRECT, O_SYNC and O_DIRECT|O_SYNC, respectively.
-
-Choose "cached" to use Linux page cache. This may improve read performance
-for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
-decrease write performance for fast disks because page cache is an overhead
-itself.
-
-Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
-(which requires disable_data_fsync) with drives having write-back cache
-which can't be turned off, for example, Intel Optane. Also note that *some*
-desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
-disable_data_fsync unsafe even with "directsync".
-
-## meta_io
-
- Type: string
- Default: direct
-
-I/O mode for *metadata*. One of "direct", "cached" or "directsync".
-
-"cached" may improve read performance, but only under the following conditions:
-1. your drives are relatively slow (HDD, SATA SSD), and
-2. checksums are enabled, and
-3. [inmemory_metadata](#inmemory_metadata) is disabled.
-Under all these conditions, metadata blocks are read from disk on every
-read request to verify checksums and caching them may reduce this extra
-read load. Without (3) metadata is never read from the disk after starting,
-and without (2) metadata blocks are read from disk only during journal
-flushing.
-
-"directsync" is the same as above.
-
-If the same device is used for data and metadata, meta_io by default is set
-to the same value as [data_io](#data_io).
-
-## journal_io
-
- Type: string
- Default: direct
-
-I/O mode for *journal*. One of "direct", "cached" or "directsync".
-
-Here, "cached" may only improve read performance for recent writes and
-only if [inmemory_journal](#inmemory_journal) is turned off.
-
-If the same device is used for metadata and journal, journal_io by default
-is set to the same value as [meta_io](#meta_io).
-
 ## journal_sector_buffer_count

 - Type: integer
--- a/docs/config/osd.ru.md
+++ b/docs/config/osd.ru.md
@@ -32,9 +32,6 @@
 - [max_flusher_count](#max_flusher_count)
 - [inmemory_metadata](#inmemory_metadata)
 - [inmemory_journal](#inmemory_journal)
- [data_io](#data_io)
- [meta_io](#meta_io)
- [journal_io](#journal_io)
 - [journal_sector_buffer_count](#journal_sector_buffer_count)
 - [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
 - [throttle_small_writes](#throttle_small_writes)
@@ -266,63 +263,6 @@ Flusher - это микро-поток (корутина), которая коп
 параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
 журналами, расположенными на быстром по сравнению с HDD устройстве.

-## data_io
-
- Тип: строка
- Значение по умолчанию: direct
-
-Режим ввода-вывода для *данных*. Одно из значений "direct", "cached" или
-"directsync", означающих O_DIRECT, O_SYNC и O_DIRECT|O_SYNC, соответственно.
-
-Выберите "cached", чтобы использовать системный кэш Linux (page cache) при
-чтении и записи. Это может улучшить скорость чтения горячих данных с
-относительно медленных дисков - HDD и, возможно, SATA SSD - но немного
-снижает производительность записи для быстрых дисков, так как кэш сам по
-себе тоже добавляет накладные расходы.
-
-Выберите "directsync", если хотите задействовать
-[immediate_commit](layout-cluster.ru.md#immediate_commit) (требующий
-включенияd disable_data_fsync) на дисках с неотключаемым кэшем. Пример таких
-дисков - Intel Optane. При этом также стоит иметь в виду, что *некоторые*
-настольные SSD (например, HP EX950) игнорируют флаг O_SYNC, делая отключение
-fsync небезопасным даже с режимом "directsync".
-
-## meta_io
-
- Тип: строка
- Значение по умолчанию: direct
-
-Режим ввода-вывода для *метаданных*. Одно из значений "direct", "cached" или
-"directsync".
-
-"cached" может улучшить скорость чтения, если:
-1. у вас медленные диски (HDD, SATA SSD)
-2. контрольные суммы включены
-3. параметр [inmemory_metadata](#inmemory_metadata) отключён.
-При этих условиях блоки метаданных читаются с диска при каждом запросе чтения
-для проверки контрольных сумм и их кэширование может снизить дополнительную
-нагрузку на диск. Без (3) метаданные никогда не читаются с диска после
-запуска OSD, а без (2) блоки метаданных читаются только при сбросе журнала.
-
-Если одно и то же устройство используется для данных и метаданных, режим
-ввода-вывода метаданных по умолчанию устанавливается равным [data_io](#data_io).
-
-## journal_io
-
- Тип: строка
- Значение по умолчанию: direct
-
-Режим ввода-вывода для *журнала*. Одно из значений "direct", "cached" или
-"directsync".
-
-Здесь "cached" может улучшить скорость чтения только недавно записанных
-данных и только если параметр [inmemory_journal](#inmemory_journal)
-отключён.
-
-Если одно и то же устройство используется для метаданных и журнала,
-режим ввода-вывода журнала по умолчанию устанавливается равным
-[meta_io](#meta_io).
-
 ## journal_sector_buffer_count

 - Тип: целое число
--- a/docs/config/pool.en.md
+++ b/docs/config/pool.en.md
@@ -205,8 +205,9 @@ This parameter usually doesn't require to be changed.
 - Default: 131072

 Block size for this pool. The value from /vitastor/config/global is used when
-unspecified. Only OSDs with matching block_size are used for each pool. If you
-want to further restrict OSDs for the pool, use [osd_tags](#osd_tags).
+unspecified. If your cluster has OSDs with different block sizes then pool must
+be restricted by [osd_tags](#osd_tags) to only include OSDs with matching block
+size.

 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#block_size).

@@ -215,9 +216,10 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
 - Type: integer
 - Default: 4096

-"Sector" size of virtual disks in this pool. The value from /vitastor/config/global
-is used when unspecified. Similarly to block_size, only OSDs with matching
-bitmap_granularity are used for each pool.
+"Sector" size of virtual disks in this pool. The value from
+/vitastor/config/global is used when unspecified. Similar to block_size, the
+pool must be restricted by [osd_tags](#osd_tags) to only include OSDs with
+matching bitmap_granularity.

 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#bitmap_granularity).

@@ -227,11 +229,10 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
 - Default: none

 Immediate commit setting for this pool. The value from /vitastor/config/global
-is used when unspecified. Similarly to block_size, only OSDs with compatible
-bitmap_granularity are used for each pool. "Compatible" means that a pool with
-non-immediate commit will use OSDs with immediate commit enabled, but not vice
-versa. I.e., pools with "none" use all OSDs, pools with "small" only use OSDs
-with "all" or "small", and pools with "all" only use OSDs with "all".
+is used when unspecified. Similar to block_size, the pool must be restricted by
+[osd_tags](#osd_tags) to only include OSDs with compatible immediate_commit.
+Compatible means that a pool with non-immediate commit will work with OSDs with
+immediate commit enabled, but not vice versa.

 Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#immediate_commit).

--- a/docs/config/pool.ru.md
+++ b/docs/config/pool.ru.md
@@ -208,9 +208,8 @@ PG в Vitastor эферемерны, то есть вы можете менят

 Размер блока для данного пула. Если не задан, используется значение из
 /vitastor/config/global. Если в вашем кластере есть OSD с разными размерами
-блока, пул будет использовать только OSD с размером блока, равным размеру блока
-пула. Если вы хотите сильнее ограничить набор используемых для пула OSD -
-используйте [osd_tags](#osd_tags).
+блока, пул должен быть ограничен только OSD, блок которых равен блоку пула,
+с помощью [osd_tags](#osd_tags).

 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#block_size).

@@ -220,8 +219,9 @@ PG в Vitastor эферемерны, то есть вы можете менят
 - По умолчанию: 4096

 Размер "сектора" виртуальных дисков в данном пуле. Если не задан, используется
-значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
-использовать только OSD с совпадающей с пулом настройкой bitmap_granularity.
+значение из /vitastor/config/global. Аналогично block_size, пул должен быть
+ограничен OSD со значением bitmap_granularity, равным значению пула, с помощью
+[osd_tags](#osd_tags).

 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#bitmap_granularity).

@@ -231,13 +231,11 @@ PG в Vitastor эферемерны, то есть вы можете менят
 - По умолчанию: none

 Настройка мгновенного коммита для данного пула. Если не задана, используется
-значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
-использовать только OSD с *совместимыми* настройками immediate_commit.
-"Совместимыми" означает, что пул с отключенным мгновенным коммитом будет
-использовать OSD с включённым мгновенным коммитом, но не наоборот. То есть,
-пул со значением "none" будет использовать все OSD, пул со "small" будет
-использовать OSD с "all" или "small", а пул с "all" будет использовать только
-OSD с "all".
+значение из /vitastor/config/global. Аналогично block_size, пул должен быть
+ограничен OSD со значением bitmap_granularity, совместимым со значением пула, с
+помощью [osd_tags](#osd_tags). Совместимость означает, что пул с отключенным
+мгновенным коммитом может работать на OSD с включённым мгновенным коммитом, но
+не наоборот.

 О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#immediate_commit).

--- a/docs/config/src/layout-cluster.yml
+++ b/docs/config/src/layout-cluster.yml
@@ -7,27 +7,26 @@
    in Vitastor, affects memory usage, write amplification and I/O load
    distribution effectiveness.

-    Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
-    it's possible to use 1 MB for SSD too - it will lower memory usage, but
+    Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
+    it's possible to use 4 MB for SSD too - it will lower memory usage, but
    may increase average WA and reduce linear performance.

    OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
    544 MB per 1 TB of used disk space with the default 128 KB block size.
-    With 1 MB it's 8 times lower.
  info_ru: |
    Размер объектов (блоков данных), на которые делятся физические и виртуальные
    диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
    настроек, влияет на потребление памяти, объём избыточной записи (write
    amplification) и эффективность распределения нагрузки по OSD.

-    Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
-    для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
+    Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
+    мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
    это понизит использование памяти, но ухудшит распределение нагрузки и в
    среднем увеличит WA.

    Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
    т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
-    стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
+    стандартном 128 КБ блоке.
 - name: bitmap_granularity
  type: int
  default: 4096
@@ -87,9 +86,8 @@
    it (they have internal SSD cache even though it's not stated in datasheets).

    Setting this parameter to "all" or "small" in OSD parameters requires enabling
-    [disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
-    [disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
-    "all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
+    disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
+    enabling disable_data_fsync.

    TLDR: For optimal performance, set immediate_commit to "all" if you only use
    SSDs with supercapacitor-based power loss protection (nonvolatile
@@ -141,9 +139,8 @@
    указано в спецификациях).

    Указание "all" или "small" в настройках / командной строке OSD требует
-    включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
-    [disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
-    также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
+    включения disable_journal_fsync и disable_meta_fsync, значение "all" также
+    требует включения disable_data_fsync.

    Итого, вкратце: для оптимальной производительности установите
    immediate_commit в значение "all", если вы используете в кластере только SSD
--- a/docs/config/src/layout-osd.yml
+++ b/docs/config/src/layout-osd.yml
@@ -204,73 +204,3 @@

    Клиентам не обязательно знать про disk_alignment, так что помещать значение
    этого параметра в etcd в /vitastor/config/global не нужно.
- name: data_csum_type
-  type: string
-  default: none
-  info: |
-    Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
-    enable data checksums.
-  info_ru: |
-    Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
-    Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
-
-    Следует понимать, что контрольные суммы в зависимости от размера блока их
-    расчёта либо увеличивают потребление памяти, либо снижают производительность.
-    Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
- name: csum_block_size
-  type: int
-  default: 4096
-  info: |
-    Checksum calculation block size.
-
-    Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
-    (which is usually 4 KB).
-
-    Checksums increase metadata size by 4 bytes per each csum_block_size of data.
-
-    Checksums are always a tradeoff:
-    1. You either sacrifice +1 GB RAM per 1 TB of data
-    2. Or you raise csum_block_size, for example, to 32k and sacrifice
-       50% random write iops due to checksum read-modify-write
-    3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
-       sacrifice 50% random read iops due to checksum reads
-
-    All-flash clusters usually have enough RAM to use default csum_block_size,
-    which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
-
-    Thus, recommended setups are:
-    1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
-    2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
-    3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
-    4. HDD-only, faster random read: csum_block_size=32k
-    5. HDD-only, faster random write: csum_block_size=4k +
-       inmemory_metadata=false + meta_io=cached
-
-    See also [meta_io](osd.en.md#meta_io).
-  info_ru: |
-    Размер блока расчёта контрольных сумм.
-
-    Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
-    (который обычно равен 4 КБ).
-
-    Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
-    csum_block_size данных.
-
-    Контрольные суммы - это всегда компромисс:
-    1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
-    2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
-       скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
-       новых контрольных сумм
-    3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
-       жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
-       с диска
-
-    Таким образом, рекомендуются следующие варианты настроек:
-    1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
-    2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
-    3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
-    4. Только HDD, быстрее случайное чтение: csum_block_size=32k
-    5. Только HDD, быстрее случайная запись: csum_block_size=4k +
-       inmemory_metadata=false + meta_io=cached
-
-    Смотрите также [meta_io](osd.ru.md#meta_io).
--- a/docs/config/src/osd.yml
+++ b/docs/config/src/osd.yml
@@ -260,96 +260,6 @@
    достаточно 16- или 32-мегабайтного журнала. Однако в теории отключение
    параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
    журналами, расположенными на быстром по сравнению с HDD устройстве.
- name: data_io
-  type: string
-  default: direct
-  info: |
-    I/O mode for *data*. One of "direct", "cached" or "directsync". Corresponds
-    to O_DIRECT, O_SYNC and O_DIRECT|O_SYNC, respectively.
-
-    Choose "cached" to use Linux page cache. This may improve read performance
-    for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
-    decrease write performance for fast disks because page cache is an overhead
-    itself.
-
-    Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
-    (which requires disable_data_fsync) with drives having write-back cache
-    which can't be turned off, for example, Intel Optane. Also note that *some*
-    desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
-    disable_data_fsync unsafe even with "directsync".
-  info_ru: |
-    Режим ввода-вывода для *данных*. Одно из значений "direct", "cached" или
-    "directsync", означающих O_DIRECT, O_SYNC и O_DIRECT|O_SYNC, соответственно.
-
-    Выберите "cached", чтобы использовать системный кэш Linux (page cache) при
-    чтении и записи. Это может улучшить скорость чтения горячих данных с
-    относительно медленных дисков - HDD и, возможно, SATA SSD - но немного
-    снижает производительность записи для быстрых дисков, так как кэш сам по
-    себе тоже добавляет накладные расходы.
-
-    Выберите "directsync", если хотите задействовать
-    [immediate_commit](layout-cluster.ru.md#immediate_commit) (требующий
-    включенияd disable_data_fsync) на дисках с неотключаемым кэшем. Пример таких
-    дисков - Intel Optane. При этом также стоит иметь в виду, что *некоторые*
-    настольные SSD (например, HP EX950) игнорируют флаг O_SYNC, делая отключение
-    fsync небезопасным даже с режимом "directsync".
- name: meta_io
-  type: string
-  default: direct
-  info: |
-    I/O mode for *metadata*. One of "direct", "cached" or "directsync".
-
-    "cached" may improve read performance, but only under the following conditions:
-    1. your drives are relatively slow (HDD, SATA SSD), and
-    2. checksums are enabled, and
-    3. [inmemory_metadata](#inmemory_metadata) is disabled.
-    Under all these conditions, metadata blocks are read from disk on every
-    read request to verify checksums and caching them may reduce this extra
-    read load. Without (3) metadata is never read from the disk after starting,
-    and without (2) metadata blocks are read from disk only during journal
-    flushing.
-
-    "directsync" is the same as above.
-
-    If the same device is used for data and metadata, meta_io by default is set
-    to the same value as [data_io](#data_io).
-  info_ru: |
-    Режим ввода-вывода для *метаданных*. Одно из значений "direct", "cached" или
-    "directsync".
-
-    "cached" может улучшить скорость чтения, если:
-    1. у вас медленные диски (HDD, SATA SSD)
-    2. контрольные суммы включены
-    3. параметр [inmemory_metadata](#inmemory_metadata) отключён.
-    При этих условиях блоки метаданных читаются с диска при каждом запросе чтения
-    для проверки контрольных сумм и их кэширование может снизить дополнительную
-    нагрузку на диск. Без (3) метаданные никогда не читаются с диска после
-    запуска OSD, а без (2) блоки метаданных читаются только при сбросе журнала.
-
-    Если одно и то же устройство используется для данных и метаданных, режим
-    ввода-вывода метаданных по умолчанию устанавливается равным [data_io](#data_io).
- name: journal_io
-  type: string
-  default: direct
-  info: |
-    I/O mode for *journal*. One of "direct", "cached" or "directsync".
-
-    Here, "cached" may only improve read performance for recent writes and
-    only if [inmemory_journal](#inmemory_journal) is turned off.
-
-    If the same device is used for metadata and journal, journal_io by default
-    is set to the same value as [meta_io](#meta_io).
-  info_ru: |
-    Режим ввода-вывода для *журнала*. Одно из значений "direct", "cached" или
-    "directsync".
-
-    Здесь "cached" может улучшить скорость чтения только недавно записанных
-    данных и только если параметр [inmemory_journal](#inmemory_journal)
-    отключён.
-
-    Если одно и то же устройство используется для метаданных и журнала,
-    режим ввода-вывода журнала по умолчанию устанавливается равным
-    [meta_io](#meta_io).
 - name: journal_sector_buffer_count
  type: int
  default: 32
--- a/docs/installation/packages.en.md
+++ b/docs/installation/packages.en.md
@@ -14,8 +14,6 @@
  - Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
  - Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
-  - Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
-    stable version from 0.9.x branch instead of 1.x
 - For Debian 10 (Buster) also enable backports repository:
  `deb http://deb.debian.org/debian buster-backports main`
 - Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu`
--- a/docs/installation/packages.ru.md
+++ b/docs/installation/packages.ru.md
@@ -14,8 +14,6 @@
  - Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
  - Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
  - Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
-  - Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
-    установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
 - Для Debian 10 (Buster) также включите репозиторий backports:
  `deb http://deb.debian.org/debian buster-backports main`
 - Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu`
--- a/docs/installation/proxmox.ru.md
+++ b/docs/installation/proxmox.ru.md
@@ -1,4 +1,4 @@
-[Документация](../../README-ru.md#документация) → Установка → Proxmox VE
+[Документация](../../README-ru.md#документация) → Установка → Proxmox

 -----

--- a/docs/installation/source.en.md
+++ b/docs/installation/source.en.md
@@ -21,7 +21,7 @@

 ## Basic instructions

-Download source, for example using git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
+Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`

 Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
 you can disable it by passing `-DWITH_FIO=no` to cmake.
@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
 QEMU build process. To do that:
 - Install vitastor client library headers (from source or from vitastor-client-dev package)
 - Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
+- Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c`
 - Build QEMU as usual

 But it is also possible to build it out-of-tree. To do that:
--- a/docs/installation/source.ru.md
+++ b/docs/installation/source.ru.md
@@ -21,7 +21,7 @@

 ## Базовая инструкция

-Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
+Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`

 Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
 в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
@@ -41,7 +41,7 @@ cmake .. && make -j8 install
 Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
 - Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
 - Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
+- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/block-vitastor.c`
 - Соберите QEMU как обычно

 Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
@@ -60,7 +60,7 @@ cmake .. && make -j8 install
      * Для QEMU 2.0+: `<qemu>/qapi-types.h` &rarr; `<vitastor>/qemu/b/qemu/qapi-types.h`
   - `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
 - Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
-  используете RHEL-подобный дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
+  используете RHEL-подобый дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
 - После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
  версии QEMU `block-vitastor.so`.
 - Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае
--- a/docs/intro/features.en.md
+++ b/docs/intro/features.en.md
@@ -29,8 +29,7 @@
 - Snapshots and copy-on-write image clones
 - [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
 - [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
- [Scrubbing](../config/osd.en.md#auto_scrub) (verification of copies)
- [Checksums](../config/layout-osd.en.md#data_csum_type)
+- [Scrubbing without checksums](../config/osd.en.md#auto_scrub) (verification of copies)

 ## Plugins and tools

@@ -56,6 +55,7 @@ The following features are planned for the future:
 - iSCSI proxy
 - Multi-threaded client
 - Faster failover
+- Checksums
 - Tiered storage (SSD caching)
 - NVDIMM support
 - Compression (possibly)
--- a/docs/intro/features.ru.md
+++ b/docs/intro/features.ru.md
@@ -31,8 +31,7 @@
 - Снапшоты и copy-on-write клоны
 - [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
 - [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
- [Фоновая проверка целостности](../config/osd.ru.md#auto_scrub) (сверка копий)
- [Контрольные суммы](../config/layout-osd.ru.md#data_csum_type)
+- [Фоновая проверка целостности без контрольных сумм](../config/osd.ru.md#auto_scrub) (сверка копий)

 ## Драйверы и инструменты

@@ -56,6 +55,7 @@
 - iSCSI-прокси
 - Многопоточный клиент
 - Более быстрое переключение при отказах
+- Контрольные суммы
 - Поддержка SSD-кэширования (tiered storage)
 - Поддержка NVDIMM
 - Возможно, сжатие
--- a/docs/intro/quickstart.en.md
+++ b/docs/intro/quickstart.en.md
@@ -7,7 +7,6 @@
 # Quick Start

 - [Preparation](#preparation)
- [Recommended drives](#recommended-drives)
 - [Configure monitors](#configure-monitors)
 - [Configure OSDs](#configure-osds)
 - [Create a pool](#create-a-pool)
@@ -20,20 +19,10 @@
 - Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
  with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
  [here](../config/layout-cluster.en.md#immediate_commit).
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
-  Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
-  you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
 - Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
 - Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
 - [Install Vitastor packages](../installation/packages.en.md).

-## Recommended drives
-
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
-
 ## Configure monitors

 On the monitor hosts:
@@ -56,10 +45,9 @@ On the monitor hosts:
  }
  ```
 - Initialize OSDs:
-  - SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
-    Add `--disable_data_fsync off` to leave disk write cache enabled if you use
-    desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you
-    use HDDs or SSD+HDD.
+  - SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
+    `--disable_data_fsync off` to leave disk cache enabled if you use desktop
+    SSDs without capacitors.
  - Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
    Pass all your devices (HDD and SSD) to this script &mdash; it will partition disks and initialize journals on its own.
    This script skips HDDs which are already partitioned so if you want to use non-empty disks for
--- a/docs/intro/quickstart.ru.md
+++ b/docs/intro/quickstart.ru.md
@@ -7,7 +7,6 @@
 # Быстрый старт

 - [Подготовка](#подготовка)
- [Рекомендуемые диски](#рекомендуемые-диски)
 - [Настройте мониторы](#настройте-мониторы)
 - [Настройте OSD](#настройте-osd)
 - [Создайте пул](#создайте-пул)
@@ -20,20 +19,10 @@
 - Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
  использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
  О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
-  Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
-  обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
 - Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
 - Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
 - [Установите пакеты Vitastor](../installation/packages.ru.md).

-## Рекомендуемые диски
-
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
-  Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
-
 ## Настройте мониторы

 На хостах, выделенных под мониторы:
@@ -56,10 +45,9 @@
  }
  ```
 - Инициализуйте OSD:
-  - Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
-    Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`,
-    чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете
-    жёсткие диски (HDD).
+  - SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
+    десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
+    опцию `--disable_data_fsync off`.
  - Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
    Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
    разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
--- a/docs/usage/disk.en.md
+++ b/docs/usage/disk.en.md
@@ -86,8 +86,6 @@ Options (both modes):
 --journal_size 1G/32M      Set journal size (area or partition size)
 --block_size 1M/128k       Set blockstore object size
 --bitmap_granularity 4k    Set bitmap granularity
--data_csum_type none      Set data checksum type (crc32c or none)
--csum_block_size 4k       Set data checksum block size
 --data_device_block 4k     Override data device block size
 --meta_device_block 4k     Override metadata device block size
 --journal_device_block 4k  Override journal device block size
@@ -102,9 +100,8 @@ checks the device cache status on start and tries to disable cache for SATA/SAS
 If it doesn't succeed it issues a warning in the system log.

 You can also pass other OSD options here as arguments and they'll be persisted
-in the superblock: cached_io_data, cached_io_meta, cached_io_journal,
-inmemory_metadata, inmemory_journal, max_write_iodepth,
-min_flusher_count, max_flusher_count, journal_sector_buffer_count,
+to the superblock: max_write_iodepth, max_write_iodepth, min_flusher_count,
+max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
 journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
 throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
 See [Runtime OSD Parameters](../config/osd.en.md) for details.
@@ -252,9 +249,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
 ```
 --object_size 128k       Set blockstore block size
 --bitmap_granularity 4k  Set bitmap granularity
--journal_size 16M       Set journal size
--data_csum_type none    Set data checksum type (crc32c or none)
--csum_block_size 4k     Set data checksum block size
+--journal_size 32M       Set journal size
 --device_block_size 4k   Set device block size
 --journal_offset 0       Set journal offset
 --device_size 0          Set device size
--- a/docs/usage/disk.ru.md
+++ b/docs/usage/disk.ru.md
@@ -87,8 +87,6 @@ vitastor-disk - инструмент командной строки для уп
 --journal_size 1G/32M      Задать размер журнала (области или раздела журнала)
 --block_size 1M/128k       Задать размер объекта хранилища
 --bitmap_granularity 4k    Задать гранулярность битовых карт
--data_csum_type none      Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k       Задать размер блока расчёта контрольных сумм
 --data_device_block 4k     Задать размер блока устройства данных
 --meta_device_block 4k     Задать размер блока метаданных
 --journal_device_block 4k  Задать размер блока журнала
@@ -103,9 +101,8 @@ vitastor-disk - инструмент командной строки для уп
 это не удаётся, в системный журнал выводится предупреждение.

 Вы можете передать данной команде и некоторые другие опции OSD в качестве аргументов
-и они тоже будут сохранены в суперблок: cached_io_data, cached_io_meta,
-cached_io_journal, inmemory_metadata, inmemory_journal, max_write_iodepth,
-min_flusher_count, max_flusher_count, journal_sector_buffer_count,
+и они тоже будут сохранены в суперблок: max_write_iodepth, max_write_iodepth, min_flusher_count,
+max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
 journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
 throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
 Читайте об этих параметрах подробнее в разделе [Изменяемые параметры OSD](../config/osd.ru.md).
@@ -257,9 +254,7 @@ OSD отключены fsync-и.
 ```
 --object_size 128k       Размер блока хранилища
 --bitmap_granularity 4k  Гранулярность битовых карт
--journal_size 16M       Размер журнала
--data_csum_type none    Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k     Задать размер блока расчёта контрольных сумм
+--journal_size 32M       Размер журнала
 --device_block_size 4k   Размер блока устройства
 --journal_offset 0       Смещение журнала
 --device_size 0          Размер устройства
--- a/docs/usage/nbd.en.md
+++ b/docs/usage/nbd.en.md
@@ -13,8 +13,6 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-

 Vitastor Kubernetes CSI driver is based on NBD.

-See also [VDUSE](qemu.en.md#vduse).
-
 ## Map image

 To create a local block device for a Vitastor image run:
--- a/docs/usage/nbd.ru.md
+++ b/docs/usage/nbd.ru.md
@@ -16,8 +16,6 @@ NBD немного снижает производительность из-за

 CSI-драйвер Kubernetes Vitastor основан на NBD.

-Смотрите также [VDUSE](qemu.ru.md#vduse).
-
 ## Подключить устройство

 Чтобы создать локальное блочное устройство для образа, выполните команду:
--- a/docs/usage/qemu.en.md
+++ b/docs/usage/qemu.en.md
@@ -83,44 +83,3 @@ qemu-img rebase -u -b '' testimg.qcow2
 This can be used for backups. Just note that exporting an image that is currently being written to
 is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
 on a live VM.
-
-## VDUSE
-
-Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
-to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
-exporting QEMU block devices over this protocol using qemu-storage-daemon.
-
-VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
-for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
-hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
-In this case reboot will be the only way to remove VDUSE devices from system.
-
-On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
-performance is important for you. Approximate performance numbers:
-direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
-
-To try VDUSE you need at least Linux 5.15, built with VDUSE support
-(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
-disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
-[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
-
-Commands to attach Vitastor image as a VDUSE device:
-
-```
-modprobe vduse
-modprobe virtio-vdpa
-qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
-  "etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
-  --export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
-vdpa dev add name test1 mgmtdev vduse
-```
-
-After running these commands /dev/vda device will appear in the system and you'll be able to
-use it as a normal disk.
-
-To remove the device:
-
-```
-vdpa dev del test1
-kill <qemu-storage-daemon_process_PID>
-```
--- a/docs/usage/qemu.ru.md
+++ b/docs/usage/qemu.ru.md
@@ -87,44 +87,3 @@ qemu-img rebase -u -b '' testimg.qcow2
 Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
 в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
 с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
-
-## VDUSE
-
-В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
-к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
-экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
-
-VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
-подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
-процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
-через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
-
-С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
-быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
-прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
-
-Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
-VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
-отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
-[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
-
-Команды для подключения виртуального диска через VDUSE:
-
-```
-modprobe vduse
-modprobe virtio-vdpa
-qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
-  "etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
-  --export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
-vdpa dev add name test1 mgmtdev vduse
-```
-
-После этого в системе появится устройство /dev/vda, которое можно будет использовать как
-обычный диск.
-
-Для удаления устройства из системы:
-
-```
-vdpa dev del test1
-kill <PID_процесса_qemu-storage-daemon>
-```
--- a/mon/make-etcd
+++ b/mon/make-etcd
@@ -63,9 +63,8 @@ Wants=network-online.target local-fs.target time-sync.target

 [Service]
 Restart=always
-Environment=GOGC=50
 ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
-    --snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
+    --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
    --initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
    --initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
    --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
--- a/mon/mon.js
+++ b/mon/mon.js
@@ -78,15 +78,9 @@ const etcd_tree = {
            disk_alignment: 4096,
            bitmap_granularity: 4096,
            immediate_commit: false, // 'all' or 'small'
-            // client - configurable online
-            client_max_dirty_bytes: 33554432,
-            client_max_dirty_ops: 1024,
-            client_enable_writeback: false,
-            client_max_buffered_bytes: 33554432,
-            client_max_buffered_ops: 1024,
-            client_max_writeback_iodepth: 256,
            // client and osd - configurable online
            log_level: 0,
+            client_dirty_limit: 33554432,
            peer_connect_interval: 5, // seconds. min: 1
            peer_connect_timeout: 5, // seconds. min: 1
            osd_idle_timeout: 5, // seconds. min: 1
@@ -545,18 +539,10 @@ class Mon
        {
            retries = 1;
        }
-        const tried = {};
        while (retries < 0 || retry < retries)
        {
            const cur_addr = this.pick_next_etcd();
            const base = 'ws'+cur_addr.substr(4);
-            let now = Date.now();
-            if (tried[base] && now-tried[base] < timeout)
-            {
-                await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
-                now = Date.now();
-            }
-            tried[base] = now;
            const ok = await new Promise((ok, no) =>
            {
                const timer_id = setTimeout(() =>
@@ -1162,33 +1148,6 @@ class Mon
        }
    }

-    filter_osds_by_block_layout(flat_tree, block_size, bitmap_granularity, immediate_commit)
-    {
-        for (const host in flat_tree)
-        {
-            let found = 0;
-            for (const osd in flat_tree[host])
-            {
-                const osd_stat = this.state.osd.stats[osd];
-                if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
-                    osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
-                    osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
-                    osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
-                {
-                    delete flat_tree[host][osd];
-                }
-                else
-                {
-                    found++;
-                }
-            }
-            if (!found)
-            {
-                delete flat_tree[host];
-            }
-        }
-    }
-
    get_affinity_osds(pool_cfg, up_osds, osd_tree)
    {
        let aff_osds = up_osds;
@@ -1249,12 +1208,6 @@ class Mon
                pool_tree = pool_tree ? pool_tree.children : [];
                pool_tree = LPOptimizer.flatten_tree(pool_tree, levels, pool_cfg.failure_domain, 'osd');
                this.filter_osds_by_tags(osd_tree, pool_tree, pool_cfg.osd_tags);
-                this.filter_osds_by_block_layout(
-                    pool_tree,
-                    pool_cfg.block_size || this.config.block_size || 131072,
-                    pool_cfg.bitmap_granularity || this.config.bitmap_granularity || 4096,
-                    pool_cfg.immediate_commit || this.config.immediate_commit || 'none'
-                );
                // These are for the purpose of building history.osd_sets
                const real_prev_pgs = [];
                let pg_history = [];
@@ -1544,14 +1497,10 @@ class Mon
                    break;
                }
            }
-            const pool_cfg = (this.state.config.pools[pool_id]||{});
            if (!object_size)
            {
-                object_size = pool_cfg.block_size || this.config.block_size || 131072;
-            }
-            if (pool_cfg.scheme !== 'replicated')
-            {
-                object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
+                object_size = (this.state.config.pools[pool_id]||{}).block_size ||
+                    this.config.block_size || 131072;
            }
            object_size = BigInt(object_size);
            for (const pg_num in this.state.pg.stats[pool_id])
@@ -1659,7 +1608,7 @@ class Mon
                }
            }
        }
-        return { inode_stats, seen_pools };
+        return inode_stats;
    }

    serialize_bigints(obj)
@@ -1685,7 +1634,7 @@ class Mon
        const timestamp = Date.now();
        const { object_counts, object_bytes } = this.sum_object_counts();
        let stats = this.sum_op_stats(timestamp, this.prev_stats);
-        let { inode_stats, seen_pools } = this.sum_inode_stats(
+        let inode_stats = this.sum_inode_stats(
            this.prev_stats ? this.prev_stats.inode_stats : null,
            timestamp, this.prev_stats ? this.prev_stats.timestamp : null
        );
@@ -1720,22 +1669,12 @@ class Mon
        }
        for (const pool_id in this.state.pool.stats)
        {
-            if (!seen_pools[pool_id])
-            {
-                txn.push({ requestDeleteRange: {
-                    key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
-                } });
-                delete this.state.pool.stats[pool_id];
-            }
-            else
-            {
-                const pool_stats = { ...this.state.pool.stats[pool_id] };
-                this.serialize_bigints(pool_stats);
-                txn.push({ requestPut: {
-                    key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
-                    value: b64(JSON.stringify(pool_stats)),
-                } });
-            }
+            const pool_stats = { ...this.state.pool.stats[pool_id] };
+            this.serialize_bigints(pool_stats);
+            txn.push({ requestPut: {
+                key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
+                value: b64(JSON.stringify(pool_stats)),
+            } });
        }
        if (txn.length)
        {
@@ -1835,18 +1774,10 @@ class Mon
        {
            retries = 1;
        }
-        const tried = {};
        while (retries < 0 || retry < retries)
        {
            retry++;
            const base = this.pick_next_etcd();
-            let now = Date.now();
-            if (tried[base] && now-tried[base] < timeout)
-            {
-                await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
-                now = Date.now();
-            }
-            tried[base] = now;
            const res = await POST(base+path, body, timeout);
            if (res.error)
            {
--- a/patches/cinder-vitastor.py
+++ b/patches/cinder-vitastor.py
@@ -50,7 +50,7 @@ from cinder.volume import configuration
 from cinder.volume import driver
 from cinder.volume import volume_utils

-VERSION = '1.0.0'
+VERSION = '0.9.2'

 LOG = logging.getLogger(__name__)

--- a/patches/qemu-2.12-vitastor.patch
+++ b/patches/qemu-2.12-vitastor.patch
@@ -1,176 +0,0 @@
-diff --git a/block/Makefile.objs b/block/Makefile.objs
-index d644bac60a..e404236291 100644
--- a/block/Makefile.objs
-+++ b/block/Makefile.objs
-@@ -19,6 +19,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
- block-obj-$(CONFIG_LIBNFS) += nfs.o
- block-obj-$(CONFIG_CURL) += curl.o
- block-obj-$(CONFIG_RBD) += rbd.o
-+block-obj-$(CONFIG_VITASTOR) += vitastor.o
- block-obj-$(CONFIG_GLUSTERFS) += gluster.o
- block-obj-$(CONFIG_VXHS) += vxhs.o
- block-obj-$(CONFIG_LIBSSH2) += ssh.o
-@@ -39,6 +40,8 @@ curl.o-cflags      := $(CURL_CFLAGS)
- curl.o-libs        := $(CURL_LIBS)
- rbd.o-cflags       := $(RBD_CFLAGS)
- rbd.o-libs         := $(RBD_LIBS)
-+vitastor.o-cflags  := $(VITASTOR_CFLAGS)
-+vitastor.o-libs    := $(VITASTOR_LIBS)
- gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
- gluster.o-libs     := $(GLUSTERFS_LIBS)
- vxhs.o-libs        := $(VXHS_LIBS)
-diff --git a/configure b/configure
-index 0a19b033bc..58b7fbf24c 100755
--- a/configure
-+++ b/configure
-@@ -398,6 +398,7 @@ trace_backends="log"
- trace_file="trace"
- spice=""
- rbd=""
-+vitastor=""
- smartcard=""
- libusb=""
- usb_redir=""
-@@ -1213,6 +1214,10 @@ for opt do
-   ;;
-   --enable-rbd) rbd="yes"
-   ;;
-+  --disable-vitastor) vitastor="no"
-+  ;;
-+  --enable-vitastor) vitastor="yes"
-+  ;;
-   --disable-xfsctl) xfs="no"
-   ;;
-   --enable-xfsctl) xfs="yes"
-@@ -1601,6 +1606,7 @@ disabled with --disable-FEATURE, default is enabled if available:
-   vhost-crypto    vhost-crypto acceleration support
-   spice           spice
-   rbd             rados block device (rbd)
-+  vitastor        vitastor block device
-   libiscsi        iscsi support
-   libnfs          nfs support
-   smartcard       smartcard support (libcacard)
-@@ -3594,6 +3600,27 @@ EOF
-   fi
- fi
- 
-+##########################################
-+# vitastor probe
-+if test "$vitastor" != "no" ; then
-+  cat > $TMPC <<EOF
-+#include <vitastor_c.h>
-+int main(void) {
-+  vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-+  return 0;
-+}
-+EOF
-+  vitastor_libs="-lvitastor_client"
-+  if compile_prog "" "$vitastor_libs" ; then
-+    vitastor=yes
-+  else
-+    if test "$vitastor" = "yes" ; then
-+      feature_not_found "vitastor block device" "Install vitastor-client-dev"
-+    fi
-+    vitastor=no
-+  fi
-+fi
-+
- ##########################################
- # libssh2 probe
- min_libssh2_version=1.2.8
-@@ -5837,6 +5864,7 @@ echo "Trace output file $trace_file-<pid>"
- fi
- echo "spice support     $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
- echo "rbd support       $rbd"
-+echo "vitastor support  $vitastor"
- echo "xfsctl support    $xfs"
- echo "smartcard support $smartcard"
- echo "libusb            $libusb"
-@@ -6416,6 +6444,11 @@ if test "$rbd" = "yes" ; then
-   echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
-   echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
- fi
-+if test "$vitastor" = "yes" ; then
-+  echo "CONFIG_VITASTOR=m" >> $config_host_mak
-+  echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
-+  echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
-+fi
- 
- echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
- if test "$coroutine_pool" = "yes" ; then
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index c50517bff3..c780bb2c1c 100644
--- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -2514,7 +2514,7 @@
-             'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
-             'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
-             'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
-            'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
-+            'quorum', 'raw', 'rbd', 'vitastor', 'replication', 'sheepdog', 'ssh',
-             'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
- 
- ##
-@@ -3217,6 +3217,28 @@
-             '*snap-id': 'uint32',
-             '*tag': 'str' } }
- 
-+##
-+# @BlockdevOptionsVitastor:
-+#
-+# Driver specific block device options for vitastor
-+#
-+# @image:       Image name
-+# @inode:       Inode number
-+# @pool:        Pool ID
-+# @size:        Desired image size in bytes
-+# @config-path: Path to Vitastor configuration
-+# @etcd-host:   etcd connection address(es)
-+# @etcd-prefix: etcd key/value prefix
-+##
-+{ 'struct': 'BlockdevOptionsVitastor',
-+  'data': { '*inode': 'uint64',
-+            '*pool': 'uint64',
-+            '*size': 'uint64',
-+            '*image': 'str',
-+            '*config-path': 'str',
-+            '*etcd-host': 'str',
-+            '*etcd-prefix': 'str' } }
-+
- ##
- # @ReplicationMode:
- #
-@@ -3547,6 +3569,7 @@
-       'rbd':        'BlockdevOptionsRbd',
-       'replication':'BlockdevOptionsReplication',
-       'sheepdog':   'BlockdevOptionsSheepdog',
-+      'vitastor':   'BlockdevOptionsVitastor',
-       'ssh':        'BlockdevOptionsSsh',
-       'throttle':   'BlockdevOptionsThrottle',
-       'vdi':        'BlockdevOptionsGenericFormat',
-@@ -3991,6 +4014,17 @@
-             '*subformat':           'BlockdevVhdxSubformat',
-             '*block-state-zero':    'bool' } }
- 
-+##
-+# @BlockdevCreateOptionsVitastor:
-+#
-+# Driver specific image creation options for Vitastor.
-+#
-+# @size: Size of the virtual disk in bytes
-+##
-+{ 'struct': 'BlockdevCreateOptionsVitastor',
-+  'data': { 'location':         'BlockdevOptionsVitastor',
-+            'size':             'size' } }
-+
- ##
- # @BlockdevVpcSubformat:
- #
-@@ -4074,6 +4108,7 @@
-       'rbd':            'BlockdevCreateOptionsRbd',
-       'replication':    'BlockdevCreateNotSupported',
-       'sheepdog':       'BlockdevCreateOptionsSheepdog',
-+      'vitastor':       'BlockdevCreateOptionsVitastor',
-       'ssh':            'BlockdevCreateOptionsSsh',
-       'throttle':       'BlockdevCreateNotSupported',
-       'vdi':            'BlockdevCreateOptionsVdi',
--- a/patches/qemu-5.2-vitastor.patch
+++ b/patches/qemu-5.2-vitastor.patch
@@ -1,181 +0,0 @@
-Index: qemu-5.2+dfsg/qapi/block-core.json
-===================================================================
--- qemu-5.2+dfsg.orig/qapi/block-core.json
-+++ qemu-5.2+dfsg/qapi/block-core.json
-@@ -2831,7 +2831,7 @@
-             'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
-             'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
-             { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
-            'sheepdog',
-+            'sheepdog', 'vitastor',
-             'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
- 
- ##
-@@ -3668,6 +3668,28 @@
-             '*tag': 'str' } }
- 
- ##
-+# @BlockdevOptionsVitastor:
-+#
-+# Driver specific block device options for vitastor
-+#
-+# @image:       Image name
-+# @inode:       Inode number
-+# @pool:        Pool ID
-+# @size:        Desired image size in bytes
-+# @config-path: Path to Vitastor configuration
-+# @etcd-host:   etcd connection address(es)
-+# @etcd-prefix: etcd key/value prefix
-+##
-+{ 'struct': 'BlockdevOptionsVitastor',
-+  'data': { '*inode': 'uint64',
-+            '*pool': 'uint64',
-+            '*size': 'uint64',
-+            '*image': 'str',
-+            '*config-path': 'str',
-+            '*etcd-host': 'str',
-+            '*etcd-prefix': 'str' } }
-+
-+##
- # @ReplicationMode:
- #
- # An enumeration of replication modes.
-@@ -4015,6 +4037,7 @@
-       'replication': { 'type': 'BlockdevOptionsReplication',
-                        'if': 'defined(CONFIG_REPLICATION)' },
-       'sheepdog':   'BlockdevOptionsSheepdog',
-+      'vitastor':   'BlockdevOptionsVitastor',
-       'ssh':        'BlockdevOptionsSsh',
-       'throttle':   'BlockdevOptionsThrottle',
-       'vdi':        'BlockdevOptionsGenericFormat',
-@@ -4404,6 +4427,17 @@
-             '*cluster-size' :   'size' } }
- 
- ##
-+# @BlockdevCreateOptionsVitastor:
-+#
-+# Driver specific image creation options for Vitastor.
-+#
-+# @size: Size of the virtual disk in bytes
-+##
-+{ 'struct': 'BlockdevCreateOptionsVitastor',
-+  'data': { 'location':         'BlockdevOptionsVitastor',
-+            'size':             'size' } }
-+
-+##
- # @BlockdevVmdkSubformat:
- #
- # Subformat options for VMDK images
-@@ -4665,6 +4699,7 @@
-       'qed':            'BlockdevCreateOptionsQed',
-       'rbd':            'BlockdevCreateOptionsRbd',
-       'sheepdog':       'BlockdevCreateOptionsSheepdog',
-+      'vitastor':       'BlockdevCreateOptionsVitastor',
-       'ssh':            'BlockdevCreateOptionsSsh',
-       'vdi':            'BlockdevCreateOptionsVdi',
-       'vhdx':           'BlockdevCreateOptionsVhdx',
-Index: qemu-5.2+dfsg/block/meson.build
-===================================================================
--- qemu-5.2+dfsg.orig/block/meson.build
-+++ qemu-5.2+dfsg/block/meson.build
-@@ -76,6 +76,7 @@ foreach m : [
-   ['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
-   ['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
-   ['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
-+  ['CONFIG_VITASTOR', 'vitastor', vitastor, 'vitastor.c'],
- ]
-   if config_host.has_key(m[0])
-     if enable_modules
-Index: qemu-5.2+dfsg/configure
-===================================================================
--- qemu-5.2+dfsg.orig/configure
-+++ qemu-5.2+dfsg/configure
-@@ -372,6 +372,7 @@ trace_backends="log"
- trace_file="trace"
- spice=""
- rbd=""
-+vitastor=""
- smartcard=""
- u2f="auto"
- libusb=""
-@@ -1263,6 +1264,10 @@ for opt do
-   ;;
-   --enable-rbd) rbd="yes"
-   ;;
-+  --disable-vitastor) vitastor="no"
-+  ;;
-+  --enable-vitastor) vitastor="yes"
-+  ;;
-   --disable-xfsctl) xfs="no"
-   ;;
-   --enable-xfsctl) xfs="yes"
-@@ -1827,6 +1832,7 @@ disabled with --disable-FEATURE, default
-   vhost-vdpa      vhost-vdpa kernel backend support
-   spice           spice
-   rbd             rados block device (rbd)
-+  vitastor        vitastor block device
-   libiscsi        iscsi support
-   libnfs          nfs support
-   smartcard       smartcard support (libcacard)
-@@ -3719,6 +3725,27 @@ EOF
- fi
- 
- ##########################################
-+# vitastor probe
-+if test "$vitastor" != "no" ; then
-+  cat > $TMPC <<EOF
-+#include <vitastor_c.h>
-+int main(void) {
-+  vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-+  return 0;
-+}
-+EOF
-+  vitastor_libs="-lvitastor_client"
-+  if compile_prog "" "$vitastor_libs" ; then
-+    vitastor=yes
-+  else
-+    if test "$vitastor" = "yes" ; then
-+      feature_not_found "vitastor block device" "Install vitastor-client-dev"
-+    fi
-+    vitastor=no
-+  fi
-+fi
-+
-+##########################################
- # libssh probe
- if test "$libssh" != "no" ; then
-   if $pkg_config --exists libssh; then
-@@ -6456,6 +6483,10 @@ if test "$rbd" = "yes" ; then
-   echo "CONFIG_RBD=y" >> $config_host_mak
-   echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
- fi
-+if test "$vitastor" = "yes" ; then
-+  echo "CONFIG_VITASTOR=y" >> $config_host_mak
-+  echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
-+fi
- 
- echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
- if test "$coroutine_pool" = "yes" ; then
-Index: qemu-5.2+dfsg/meson.build
-===================================================================
--- qemu-5.2+dfsg.orig/meson.build
-+++ qemu-5.2+dfsg/meson.build
-@@ -596,6 +596,10 @@ rbd = not_found
- if 'CONFIG_RBD' in config_host
-   rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
- endif
-+vitastor = not_found
-+if 'CONFIG_VITASTOR' in config_host
-+  vitastor = declare_dependency(link_args: config_host['VITASTOR_LIBS'].split())
-+endif
- glusterfs = not_found
- if 'CONFIG_GLUSTERFS' in config_host
-   glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
-@@ -2145,6 +2149,7 @@ endif
- # TODO: add back protocol and server version
- summary_info += {'spice support':     config_host.has_key('CONFIG_SPICE')}
- summary_info += {'rbd support':       config_host.has_key('CONFIG_RBD')}
-+summary_info += {'vitastor support':  config_host.has_key('CONFIG_VITASTOR')}
- summary_info += {'xfsctl support':    config_host.has_key('CONFIG_XFS')}
- summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
- summary_info += {'U2F support':       u2f.found()}
--- a/rpm/build-tarball.sh
+++ b/rpm/build-tarball.sh
@@ -24,4 +24,4 @@ rm fio
 mv fio-copy fio
 FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
 perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
-tar --transform 's#^#vitastor-1.0.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.0.0$(rpm --eval '%dist').tar.gz *
+tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *
--- a/rpm/qemu-kvm-4.2-el7.spec.patch
+++ b/rpm/qemu-kvm-4.2-el7.spec.patch
@@ -22,7 +22,7 @@
 Name: qemu-kvm
 Version: 4.2.0
 -Release: 29.vitastor%{?dist}.6
-+Release: 34.vitastor%{?dist}.6
+Release: 32.vitastor%{?dist}.6
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 Epoch: 15
 License: GPLv2 and GPLv2+ and CC-BY
--- a/rpm/qemu-kvm-4.2-el8.spec.patch
+++ b/rpm/qemu-kvm-4.2-el8.spec.patch
@@ -13,7 +13,7 @@
 Name: qemu-kvm
 Version: 4.2.0
 -Release: 29%{?dist}.6
-+Release: 33.vitastor%{?dist}.6
+Release: 32.vitastor%{?dist}.6
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 Epoch: 15
 License: GPLv2 and GPLv2+ and CC-BY
--- a/rpm/qemu-kvm-6.2-el8.spec.patch
+++ b/rpm/qemu-kvm-6.2-el8.spec.patch
@@ -1,103 +0,0 @@
--- qemu-kvm-6.2.spec.orig	2023-07-18 13:52:57.636625440 +0000
-+++ qemu-kvm-6.2.spec	2023-07-18 13:52:19.011683886 +0000
-@@ -73,6 +73,7 @@ Requires: %{name}-hw-usbredir = %{epoch}
- %endif                                                           \
- Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release}   \
- Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release}     \
-+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
- Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release}
- 
- # Macro to properly setup RHEL/RHEV conflict handling
-@@ -83,7 +84,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version
- Summary: QEMU is a machine emulator and virtualizer
- Name: qemu-kvm
- Version: 6.2.0
-Release: 32%{?rcrel}%{?dist}
-+Release: 32.vitastor%{?rcrel}%{?dist}
- # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
- Epoch: 15
- License: GPLv2 and GPLv2+ and CC-BY
-@@ -122,6 +123,7 @@ Source37: tests_data_acpi_pc_SSDT.dimmpx
- Source38: tests_data_acpi_q35_FACP.slic
- Source39: tests_data_acpi_q35_SSDT.dimmpxm
- Source40: tests_data_acpi_virt_SSDT.memhp
-+Source41: qemu-vitastor.c
- 
- Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch
- Patch0005: 0005-Initial-redhat-build.patch
-@@ -652,6 +654,7 @@ Patch255: kvm-scsi-protect-req-aiocb-wit
- Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
- # For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
- Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
-+Patch258: qemu-6.2-vitastor.patch
- 
- BuildRequires: wget
- BuildRequires: rpm-build
-@@ -689,6 +692,7 @@ BuildRequires: libcurl-devel
- BuildRequires: libssh-devel
- BuildRequires: librados-devel
- BuildRequires: librbd-devel
-+BuildRequires: vitastor-client-devel
- %if %{have_gluster}
- # For gluster block driver
- BuildRequires: glusterfs-api-devel
-@@ -926,6 +930,14 @@ Install this package if you want to acce
- using the rbd protocol.
- 
- 
-+%package  block-vitastor
-+Summary: QEMU Vitastor block driver
-+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
-+
-+%description block-vitastor
-+This package provides the additional Vitastor block driver for QEMU.
-+
-+
- %package  block-ssh
- Summary: QEMU SSH block driver
- Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
-@@ -979,6 +991,7 @@ This package provides usbredir support.
- rm -fr slirp
- mkdir slirp
- %autopatch -p1
-+cp %{SOURCE41} ./block/vitastor.c
- 
- %global qemu_kvm_build qemu_kvm_build
- mkdir -p %{qemu_kvm_build}
-@@ -994,7 +1007,7 @@ cp -f %{SOURCE40} tests/data/acpi/virt/S
- # --build-id option is used for giving info to the debug packages.
- buildldflags="VL_LDFLAGS=-Wl,--build-id"
- 
-%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle
-+%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle
- 
- %if 0%{have_gluster}
-     %global block_drivers_list %{block_drivers_list},gluster
-@@ -1149,9 +1162,7 @@ pushd %{qemu_kvm_build}
-   --firmwarepath=%{_prefix}/share/qemu-firmware \
-   --meson="git" \
-   --target-list="%{buildarch}" \
-  --block-drv-rw-whitelist=%{block_drivers_list} \
-   --audio-drv-list= \
-  --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
-   --with-coroutine=ucontext \
-   --with-git=git \
-   --tls-priority=@QEMU,SYSTEM \
-@@ -1197,6 +1208,7 @@ pushd %{qemu_kvm_build}
- %endif
-   --enable-pie \
-   --enable-rbd \
-+  --enable-vitastor \
- %if 0%{have_librdma}
-   --enable-rdma \
- %endif
-@@ -1794,6 +1806,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.
- %files block-rbd
- %{_libdir}/qemu-kvm/block-rbd.so
- 
-+%files block-vitastor
-+%{_libdir}/qemu-kvm/block-vitastor.so
-+
- %files block-ssh
- %{_libdir}/qemu-kvm/block-ssh.so
- 
--- a/rpm/qemu-kvm-7.2-el9.spec.patch
+++ b/rpm/qemu-kvm-7.2-el9.spec.patch
@@ -1,93 +0,0 @@
--- qemu-kvm-7.2.spec.orig	2023-06-22 13:56:19.000000000 +0000
-+++ qemu-kvm-7.2.spec	2023-07-18 07:55:22.347090196 +0000
-@@ -100,8 +100,6 @@
- %endif
- 
- %global target_list %{kvm_target}-softmmu
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
- %define qemudocdir %{_docdir}/%{name}
- %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
- 
-@@ -126,6 +124,7 @@ Requires: %{name}-device-usb-host = %{ep
- Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release}   \
- %endif                                                           \
- Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release}     \
-+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
- Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
- 
- # Since SPICE is removed from RHEL-9, the following Obsoletes:
-@@ -148,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
- Summary: QEMU is a machine emulator and virtualizer
- Name: qemu-kvm
- Version: 7.2.0
-Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.1
-+Release: 14.vitastor%{?rcrel}%{?dist}%{?cc_suffix}.1
- # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
- # Epoch 15 used for RHEL 8
- # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
-@@ -171,6 +170,7 @@ Source28: 95-kvm-memlock.conf
- Source30: kvm-s390x.conf
- Source31: kvm-x86.conf
- Source36: README.tests
-+Source37: qemu-vitastor.c
- 
- 
- Patch0004: 0004-Initial-redhat-build.patch
-@@ -418,6 +418,7 @@ Patch134: kvm-target-i386-Fix-BZHI-instr
- Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
- # For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
- Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
-+Patch137: qemu-7.2-vitastor.patch
- 
- %if %{have_clang}
- BuildRequires: clang
-@@ -449,6 +450,7 @@ BuildRequires: libcurl-devel
- %if %{have_block_rbd}
- BuildRequires: librbd-devel
- %endif
-+BuildRequires: vitastor-client-devel
- # We need both because the 'stap' binary is probed for by configure
- BuildRequires: systemtap
- BuildRequires: systemtap-sdt-devel
-@@ -642,6 +644,14 @@ using the rbd protocol.
- %endif
- 
- 
-+%package  block-vitastor
-+Summary: QEMU Vitastor block driver
-+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
-+
-+%description block-vitastor
-+This package provides the additional Vitastor block driver for QEMU.
-+
-+
- %package  audio-pa
- Summary: QEMU PulseAudio audio driver
- Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
-@@ -719,6 +729,7 @@ This package provides usbredir support.
- %prep
- %setup -q -n qemu-%{version}%{?rcstr}
- %autopatch -p1
-+cp %{SOURCE37} ./block/vitastor.c
- 
- %global qemu_kvm_build qemu_kvm_build
- mkdir -p %{qemu_kvm_build}
-@@ -946,6 +957,7 @@ run_configure \
- %if %{have_block_rbd}
-   --enable-rbd \
- %endif
-+  --enable-vitastor \
- %if %{have_librdma}
-   --enable-rdma \
- %endif
-@@ -1426,6 +1438,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
- %files block-rbd
- %{_libdir}/%{name}/block-rbd.so
- %endif
-+%files block-vitastor
-+%{_libdir}/%{name}/block-vitastor.so
-+
- %files audio-pa
- %{_libdir}/%{name}/audio-pa.so
- 
--- a/rpm/vitastor-el7.Dockerfile
+++ b/rpm/vitastor-el7.Dockerfile
@@ -35,7 +35,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.0.0.el7.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el7.spec
+++ b/rpm/vitastor-el7.spec
@@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.0.0
+Version:        0.9.2
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.0.0.el7.tar.gz
+Source0:        vitastor-0.9.2.el7.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el8.Dockerfile
+++ b/rpm/vitastor-el8.Dockerfile
@@ -35,7 +35,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.0.0.el8.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el8.spec
+++ b/rpm/vitastor-el8.spec
@@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.0.0
+Version:        0.9.2
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.0.0.el8.tar.gz
+Source0:        vitastor-0.9.2.el8.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el9.Dockerfile
+++ b/rpm/vitastor-el9.Dockerfile
@@ -18,7 +18,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.0.0.el9.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el9.spec
+++ b/rpm/vitastor-el9.spec
@@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.0.0
+Version:        0.9.2
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.0.0.el9.tar.gz
+Source0:        vitastor-0.9.2.el9.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
 	set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
 endif()

-add_definitions(-DVERSION="1.0.0")
+add_definitions(-DVERSION="0.9.2")
 add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
 if (${WITH_ASAN})
 	add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -137,7 +137,6 @@ endif (${WITH_FIO})
 add_library(vitastor_client SHARED
 	cluster_client.cpp
 	cluster_client_list.cpp
-	cluster_client_wb.cpp
 	vitastor_c.cpp
 	cli_common.cpp
 	cli_alloc_osd.cpp
@@ -301,7 +300,7 @@ target_link_libraries(test_crc32
 add_executable(test_cluster_client
 	EXCLUDE_FROM_ALL
 	test_cluster_client.cpp
-	pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
+	pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
 	etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
 )
 target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
--- a/src/addr_util.cpp
+++ b/src/addr_util.cpp
@@ -19,8 +19,8 @@ bool string_to_addr(std::string str, bool parse_port, int default_port, struct s
        if (p != std::string::npos && !(str.length() > 0 && str[p-1] == ']')) // "[ipv6]" which contains ':'
        {
            char null_byte = 0;
-            int scanned = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
-            if (scanned != 1 || default_port >= 0x10000)
+            int n = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
+            if (n != 1 || default_port >= 0x10000)
                return false;
            str = str.substr(0, p);
        }
--- a/src/allocator.cpp
+++ b/src/allocator.cpp
@@ -143,83 +143,34 @@ uint64_t allocator::get_free_count()
    return free;
 }

-// FIXME: Move to utils?
 void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
 {
-    if (start == 0 && len == 32*bitmap_granularity)
-        *((uint32_t*)bitmap) = UINT32_MAX;
-    else if (start == 0 && len == 64*bitmap_granularity)
-        *((uint64_t*)bitmap) = UINT64_MAX;
-    else
+    if (start == 0)
    {
-        unsigned bit_start = start / bitmap_granularity;
-        unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
-        while (bit_start < bit_end)
+        if (len == 32*bitmap_granularity)
        {
-            if (!(bit_start & 7) && bit_end >= bit_start+8)
-            {
-                ((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
-                bit_start += 8;
-            }
-            else
-            {
-                ((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
-                bit_start++;
-            }
+            *((uint32_t*)bitmap) = UINT32_MAX;
+            return;
+        }
+        else if (len == 64*bitmap_granularity)
+        {
+            *((uint64_t*)bitmap) = UINT64_MAX;
+            return;
+        }
+    }
+    unsigned bit_start = start / bitmap_granularity;
+    unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
+    while (bit_start < bit_end)
+    {
+        if (!(bit_start & 7) && bit_end >= bit_start+8)
+        {
+            ((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
+            bit_start += 8;
+        }
+        else
+        {
+            ((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
+            bit_start++;
        }
    }
 }
-
-void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
-{
-    if (start == 0 && len == 32*bitmap_granularity)
-        *((uint32_t*)bitmap) = 0;
-    else if (start == 0 && len == 64*bitmap_granularity)
-        *((uint64_t*)bitmap) = 0;
-    else
-    {
-        unsigned bit_start = start / bitmap_granularity;
-        unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
-        while (bit_start < bit_end)
-        {
-            if (!(bit_start & 7) && bit_end >= bit_start+8)
-            {
-                ((uint8_t*)bitmap)[bit_start / 8] = 0;
-                bit_start += 8;
-            }
-            else
-            {
-                ((uint8_t*)bitmap)[bit_start / 8] &= (0xFF ^ (1 << (bit_start % 8)));
-                bit_start++;
-            }
-        }
-    }
-}
-
-bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
-{
-    bool r = false;
-    if (start == 0 && len == 32*bitmap_granularity)
-        r = !!*((uint32_t*)bitmap);
-    else if (start == 0 && len == 64*bitmap_granularity)
-        r = !!*((uint64_t*)bitmap);
-    else
-    {
-        unsigned bit_start = start / bitmap_granularity;
-        unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
-        while (bit_start < bit_end)
-        {
-            if (!(bit_start & 7) && bit_end >= bit_start+8)
-            {
-                r = r || !!((uint8_t*)bitmap)[bit_start / 8];
-                bit_start += 8;
-            }
-            else
-            {
-                r = r || (((uint8_t*)bitmap)[bit_start / 8] & (1 << (bit_start % 8)));
-                bit_start++;
-            }
-        }
-    }
-    return r;
-}
--- a/src/allocator.h
+++ b/src/allocator.h
@@ -23,5 +23,3 @@ public:
 };

 void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
-void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
-bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
--- a/src/blockstore.cpp
+++ b/src/blockstore.cpp
@@ -82,8 +82,3 @@ uint32_t blockstore_t::get_bitmap_granularity()
 {
    return impl->get_bitmap_granularity();
 }
-
-bool blockstore_t::wants_fsync()
-{
-    return impl->wants_fsync();
-}
--- a/src/blockstore.h
+++ b/src/blockstore.h
@@ -77,7 +77,6 @@ Output:
  -EINVAL = invalid input parameters
  -ENOENT = requested object/version does not exist for reads
  -ENOSPC = no space left in the store for writes
-  -EDOM = checksum error.
 - version = the version actually read or written

 ## BS_OP_DELETE
@@ -226,7 +225,4 @@ public:
    uint64_t get_journal_size();

    uint32_t get_bitmap_granularity();
-
-    // Returns true if writing can stall due to a lack of fsync
-    bool wants_fsync();
 };
--- a/src/blockstore_disk.cpp
+++ b/src/blockstore_disk.cpp
@@ -40,49 +40,10 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
    data_block_size = parse_size(config["block_size"]);
    journal_device = config["journal_device"];
    journal_offset = parse_size(config["journal_offset"]);
-    disk_alignment = parse_size(config["disk_alignment"]);
-    journal_block_size = parse_size(config["journal_block_size"]);
-    meta_block_size = parse_size(config["meta_block_size"]);
-    bitmap_granularity = parse_size(config["bitmap_granularity"]);
-    meta_format = stoull_full(config["meta_format"]);
-    if (config.find("data_io") == config.end() &&
-        config.find("meta_io") == config.end() &&
-        config.find("journal_io") == config.end())
-    {
-        bool cached_io_data = config["cached_io_data"] == "true" || config["cached_io_data"] == "yes" || config["cached_io_data"] == "1";
-        bool cached_io_meta = cached_io_data && (meta_device == data_device || meta_device == "") &&
-            config.find("cached_io_meta") == config.end() ||
-            config["cached_io_meta"] == "true" || config["cached_io_meta"] == "yes" || config["cached_io_meta"] == "1";
-        bool cached_io_journal = cached_io_meta && (journal_device == meta_device || journal_device == "") &&
-            config.find("cached_io_journal") == config.end() ||
-            config["cached_io_journal"] == "true" || config["cached_io_journal"] == "yes" || config["cached_io_journal"] == "1";
-        data_io = cached_io_data ? "cached" : "direct";
-        meta_io = cached_io_meta ? "cached" : "direct";
-        journal_io = cached_io_journal ? "cached" : "direct";
-    }
-    else
-    {
-        data_io = config.find("data_io") != config.end() ? config["data_io"] : "direct";
-        meta_io = config.find("meta_io") != config.end()
-            ? config["meta_io"]
-            : (meta_device == data_device || meta_device == "" ? data_io : "direct");
-        journal_io = config.find("journal_io") != config.end()
-            ? config["journal_io"]
-            : (journal_device == meta_device || journal_device == "" ? meta_io : "direct");
-    }
-    if (config["data_csum_type"] == "crc32c")
-    {
-        data_csum_type = BLOCKSTORE_CSUM_CRC32C;
-    }
-    else if (config["data_csum_type"] == "" || config["data_csum_type"] == "none")
-    {
-        data_csum_type = BLOCKSTORE_CSUM_NONE;
-    }
-    else
-    {
-        throw std::runtime_error("data_csum_type="+config["data_csum_type"]+" is unsupported, only \"crc32c\" and \"none\" are supported");
-    }
-    csum_block_size = parse_size(config["csum_block_size"]);
+    disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10);
+    journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10);
+    meta_block_size = strtoull(config["meta_block_size"].c_str(), NULL, 10);
+    bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
    // Validate
    if (!data_block_size)
    {
@@ -130,23 +91,7 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
    }
    if (data_block_size % bitmap_granularity)
    {
-        throw std::runtime_error("Data block size must be a multiple of sparse write tracking granularity");
-    }
-    if (!data_csum_type)
-    {
-        csum_block_size = 0;
-    }
-    else if (!csum_block_size)
-    {
-        csum_block_size = bitmap_granularity;
-    }
-    if (csum_block_size && (csum_block_size % bitmap_granularity))
-    {
-        throw std::runtime_error("Checksum block size must be a multiple of sparse write tracking granularity");
-    }
-    if (csum_block_size && (data_block_size % csum_block_size))
-    {
-        throw std::runtime_error("Checksum block size must be a divisor of data block size");
+        throw std::runtime_error("Block size must be a multiple of sparse write tracking granularity");
    }
    if (meta_device == "")
    {
@@ -165,9 +110,7 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
        throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
    }
    clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
-    clean_dyn_size = clean_entry_bitmap_size*2 + (csum_block_size
-        ? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
-    clean_entry_size = sizeof(clean_disk_entry) + clean_dyn_size + 4 /*entry_csum*/;
+    clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
 }

 void blockstore_disk_t::calc_lengths(bool skip_meta_check)
@@ -217,25 +160,6 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
    // required metadata size
    block_count = data_len / data_block_size;
    meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
-    if (meta_format == BLOCKSTORE_META_FORMAT_V1 ||
-        !meta_format && !skip_meta_check && meta_area_size < meta_len && !data_csum_type)
-    {
-        uint64_t clean_entry_v0_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
-        uint64_t meta_v0_len = (1 + (block_count - 1 + meta_block_size / clean_entry_v0_size)
-            / (meta_block_size / clean_entry_v0_size)) * meta_block_size;
-        if (meta_format == BLOCKSTORE_META_FORMAT_V1 || meta_area_size >= meta_v0_len)
-        {
-            // Old metadata fits.
-            printf("Warning: Using old metadata format without checksums because the new format doesn't fit into provided area\n");
-            clean_entry_size = clean_entry_v0_size;
-            meta_len = meta_v0_len;
-            meta_format = BLOCKSTORE_META_FORMAT_V1;
-        }
-        else
-            meta_format = BLOCKSTORE_META_FORMAT_V2;
-    }
-    else
-        meta_format = BLOCKSTORE_META_FORMAT_V2;
    if (!skip_meta_check && meta_area_size < meta_len)
    {
        throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
@@ -290,19 +214,9 @@ static void check_size(int fd, uint64_t *size, uint64_t *sectsize, std::string n
    }
 }

-static int bs_openmode(const std::string & mode)
-{
-    if (mode == "directsync")
-        return O_DIRECT|O_SYNC;
-    else if (mode == "cached")
-        return O_SYNC;
-    else
-        return O_DIRECT;
-}
-
 void blockstore_disk_t::open_data()
 {
-    data_fd = open(data_device.c_str(), bs_openmode(data_io) | O_RDWR);
+    data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
    if (data_fd == -1)
    {
        throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
@@ -327,9 +241,9 @@ void blockstore_disk_t::open_data()

 void blockstore_disk_t::open_meta()
 {
-    if (meta_device != data_device || meta_io != data_io)
+    if (meta_device != data_device)
    {
-        meta_fd = open(meta_device.c_str(), bs_openmode(meta_io) | O_RDWR);
+        meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
        if (meta_fd == -1)
        {
            throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
@@ -339,7 +253,7 @@ void blockstore_disk_t::open_meta()
        {
            throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_device_size));
        }
-        if (!disable_flock && meta_device != data_device && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
+        if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
        {
            throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
        }
@@ -365,15 +279,15 @@ void blockstore_disk_t::open_meta()

 void blockstore_disk_t::open_journal()
 {
-    if (journal_device != meta_device || journal_io != meta_io)
+    if (journal_device != meta_device)
    {
-        journal_fd = open(journal_device.c_str(), bs_openmode(journal_io) | O_RDWR);
+        journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
        if (journal_fd == -1)
        {
            throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
        }
        check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
-        if (!disable_flock && journal_device != meta_device && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
+        if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
        {
            throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno));
        }
--- a/src/blockstore_disk.h
+++ b/src/blockstore_disk.h
@@ -8,10 +8,6 @@
 #include <string>
 #include <map>

-#define BLOCKSTORE_CSUM_NONE 0
-// Lower byte of checksum type is its length
-#define BLOCKSTORE_CSUM_CRC32C 0x104
-
 struct blockstore_disk_t
 {
    std::string data_device, meta_device, journal_device;
@@ -25,24 +21,17 @@ struct blockstore_disk_t
    uint64_t meta_block_size = 4096;
    // Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
    uint64_t bitmap_granularity = 4096;
-    // Data checksum type, BLOCKSTORE_CSUM_NONE or BLOCKSTORE_CSUM_CRC32C
-    uint32_t data_csum_type = BLOCKSTORE_CSUM_NONE;
-    // Checksum block size, must be a multiple of bitmap_granularity
-    uint32_t csum_block_size = 4096;
    // By default, Blockstore locks all opened devices exclusively. This option can be used to disable locking
    bool disable_flock = false;
-    // I/O modes for data, metadata and journal: direct or "" = O_DIRECT, cached = O_SYNC, directsync = O_DIRECT|O_SYNC
-    // O_SYNC without O_DIRECT = use Linux page cache for reads and writes
-    std::string data_io, meta_io, journal_io;

    int meta_fd = -1, data_fd = -1, journal_fd = -1;
-    uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len, meta_format = 0;
+    uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len;
    uint64_t data_offset, data_device_sect, data_device_size, data_len;
    uint64_t journal_offset, journal_device_sect, journal_device_size, journal_len;

    uint32_t block_order;
    uint64_t block_count;
-    uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, clean_dyn_size = 0;
+    uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;

    void parse_config(std::map<std::string, std::string> & config);
    void open_data();
@@ -50,13 +39,4 @@ struct blockstore_disk_t
    void open_journal();
    void calc_lengths(bool skip_meta_check = false);
    void close_all();
-
-    inline uint64_t dirty_dyn_size(uint64_t offset, uint64_t len)
-    {
-        // Checksums may be partial if write is not aligned with csum_block_size
-        return clean_entry_bitmap_size + (csum_block_size && len > 0
-            ? ((offset+len+csum_block_size-1)/csum_block_size - offset/csum_block_size)
-                * (data_csum_type & 0xFF)
-            : 0);
-    }
 };
--- a/src/blockstore_flush.cpp
+++ b/src/blockstore_flush.cpp
--- a/src/blockstore_flush.h
+++ b/src/blockstore_flush.h
@@ -1,22 +1,10 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)

-#define COPY_BUF_JOURNAL 1
-#define COPY_BUF_DATA 2
-#define COPY_BUF_ZERO 4
-#define COPY_BUF_CSUM_FILL 8
-#define COPY_BUF_COALESCED 16
-#define COPY_BUF_META_BLOCK 32
-#define COPY_BUF_JOURNALED_BIG 64
-
 struct copy_buffer_t
 {
-    int copy_flags;
-    uint64_t offset, len, disk_offset;
-    uint64_t journal_sector; // only for reads: sector+1 if used and !journal.inmemory, otherwise 0
+    uint64_t offset, len;
    void *buf;
-    uint8_t *csum_buf;
-    int *dyn_data;
 };

 struct meta_sector_t
@@ -49,7 +37,7 @@ class journal_flusher_co
 {
    blockstore_impl_t *bs;
    journal_flusher_t *flusher;
-    int wait_state, wait_count, wait_journal_count;
+    int wait_state, wait_count;
    struct io_uring_sqe *sqe;
    struct ring_data_t *data;

@@ -58,39 +46,28 @@ class journal_flusher_co
    obj_ver_id cur;
    std::map<obj_ver_id, dirty_entry>::iterator dirty_it, dirty_start, dirty_end;
    std::map<object_id, uint64_t>::iterator repeat_it;
-    std::function<void(ring_data_t*)> simple_callback_r, simple_callback_rj, simple_callback_w;
+    std::function<void(ring_data_t*)> simple_callback_r, simple_callback_w;

    bool skip_copy, has_delete, has_writes;
    std::vector<copy_buffer_t> v;
    std::vector<copy_buffer_t>::iterator it;
-    int i;
-    bool fill_incomplete, cleared_incomplete;
-    int read_to_fill_incomplete;
    int copy_count;
-    uint64_t clean_loc, clean_ver, old_clean_loc, old_clean_ver;
+    uint64_t clean_loc, old_clean_loc;
    flusher_meta_write_t meta_old, meta_new;
    bool clean_init_bitmap;
    uint64_t clean_bitmap_offset, clean_bitmap_len;
-    uint8_t *clean_init_dyn_ptr;
-    uint8_t *new_clean_bitmap;
+    void *new_clean_bitmap;

    uint64_t new_trim_pos;

+    // local: scan_dirty()
+    uint64_t offset, end_offset, submit_offset, submit_len;
+
    friend class journal_flusher_t;
-    void scan_dirty();
-    bool read_dirty(int wait_base);
-    bool modify_meta_do_reads(int wait_base);
-    bool wait_meta_reads(int wait_base);
+    bool scan_dirty(int wait_base);
    bool modify_meta_read(uint64_t meta_loc, flusher_meta_write_t &wr, int wait_base);
-    bool clear_incomplete_csum_block_bits(int wait_base);
-    void calc_block_checksums(uint32_t *new_data_csums, bool skip_overwrites);
-    void update_metadata_entry();
-    bool write_meta_block(flusher_meta_write_t & meta_block, int wait_base);
    void update_clean_db();
-    void free_data_blocks();
    bool fsync_batch(bool fsync_meta, int wait_base);
-    bool trim_journal(int wait_base);
-    void free_buffers();
 public:
    journal_flusher_co();
    bool loop();
@@ -118,16 +95,14 @@ class journal_flusher_t

    std::map<uint64_t, meta_sector_t> meta_sectors;
    std::deque<object_id> flush_queue;
-    std::map<object_id, uint64_t> flush_versions; // FIXME: consider unordered_map?
+    std::map<object_id, uint64_t> flush_versions;

    bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
-    bool try_find_other(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);

 public:
    journal_flusher_t(blockstore_impl_t *bs);
    ~journal_flusher_t();
    void loop();
-    bool is_trim_wanted() { return trim_wanted; }
    bool is_active();
    void mark_trim_possible();
    void request_trim();
@@ -136,5 +111,4 @@ public:
    void unshift_flush(obj_ver_id oid, bool force);
    void remove_flush(object_id oid);
    void dump_diagnostics();
-    bool is_mutated(uint64_t clean_loc);
 };
--- a/src/blockstore_impl.cpp
+++ b/src/blockstore_impl.cpp
@@ -13,7 +13,6 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
    initialized = 0;
    parse_config(config, true);
    zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
-    alloc_dyn_data = dsk.clean_dyn_size > sizeof(void*) || dsk.csum_block_size > 0;
    try
    {
        dsk.open_data();
@@ -39,8 +38,8 @@ blockstore_impl_t::~blockstore_impl_t()
    dsk.close_all();
    if (metadata_buffer)
        free(metadata_buffer);
-    if (clean_bitmaps)
-        free(clean_bitmaps);
+    if (clean_bitmap)
+        free(clean_bitmap);
 }

 bool blockstore_impl_t::is_started()
@@ -167,7 +166,7 @@ void blockstore_impl_t::loop()
                // wait for all big writes to complete, submit data device fsync
                // wait for the data device fsync to complete, then submit journal writes for big writes
                // then submit an fsync operation
-                if (0 && has_writes)
+                if (has_writes)
                {
                    // Can't submit SYNC before previous writes
                    continue;
@@ -384,10 +383,6 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
        ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
        return;
    }
-    if (op->opcode == BS_OP_SYNC)
-    {
-        unsynced_queued_ops = 0;
-    }
    init_op(op);
    submit_queue.push_back(op);
    ringloop->wakeup();
@@ -397,7 +392,6 @@ void blockstore_impl_t::init_op(blockstore_op_t *op)
 {
    // Call constructor without allocating memory. We'll call destructor before returning op back
    new ((void*)op->private_data) blockstore_op_private_t;
-    PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
    PRIV(op)->wait_for = 0;
    PRIV(op)->op_state = 0;
    PRIV(op)->pending_ops = 0;
@@ -734,15 +728,3 @@ void blockstore_impl_t::disk_error_abort(const char *op, int retval, int expecte
    fprintf(stderr, "Disk %s failed: result is %d, expected %d. Can't continue, sorry :-(\n", op, retval, expected);
    exit(1);
 }
-
-bool blockstore_impl_t::wants_fsync()
-{
-    if (!unstable_writes.size())
-    {
-        return false;
-    }
-    uint64_t journal_free_space = journal.next_free < journal.used_start
-        ? (journal.used_start - journal.next_free)
-        : (journal.len - journal.next_free + journal.used_start - journal.block_size);
-    return journal_fsync_feedback_limit > 0 && journal.len-journal_free_space >= journal_fsync_feedback_limit;
-}
--- a/src/blockstore_impl.h
+++ b/src/blockstore_impl.h
@@ -93,10 +93,11 @@

 // "VITAstor"
 #define BLOCKSTORE_META_MAGIC_V1 0x726F747341544956l
-#define BLOCKSTORE_META_FORMAT_V1 1
-#define BLOCKSTORE_META_FORMAT_V2 2
+#define BLOCKSTORE_META_VERSION_V1 1

 // metadata header (superblock)
+// FIXME: After adding the OSD superblock, add a key to metadata
+// and journal headers to check if they belong to the same OSD
 struct __attribute__((__packed__)) blockstore_meta_header_v1_t
 {
    uint64_t zero;
@@ -107,29 +108,14 @@ struct __attribute__((__packed__)) blockstore_meta_header_v1_t
    uint32_t bitmap_granularity;
 };

-struct __attribute__((__packed__)) blockstore_meta_header_v2_t
-{
-    uint64_t zero;
-    uint64_t magic;
-    uint64_t version;
-    uint32_t meta_block_size;
-    uint32_t data_block_size;
-    uint32_t bitmap_granularity;
-    uint32_t data_csum_type;
-    uint32_t csum_block_size;
-    uint32_t header_csum;
-};
-
 // 32 bytes = 24 bytes + block bitmap (4 bytes by default) + external attributes (also bitmap, 4 bytes by default)
 // per "clean" entry on disk with fixed metadata tables
+// FIXME: maybe add crc32's to metadata
 struct __attribute__((__packed__)) clean_disk_entry
 {
    object_id oid;
    uint64_t version;
    uint8_t bitmap[];
-    // Two more fields come after bitmap in metadata version 2:
-    // uint32_t data_csum[];
-    // uint32_t entry_csum;
 };

 // 32 = 16 + 16 bytes per "clean" entry in memory (object_id => clean_entry)
@@ -139,7 +125,7 @@ struct __attribute__((__packed__)) clean_entry
    uint64_t location;
 };

-// 64 = 24 + 40 bytes per dirty entry in memory (obj_ver_id => dirty_entry). Plus checksums
+// 64 = 24 + 40 bytes per dirty entry in memory (obj_ver_id => dirty_entry)
 struct __attribute__((__packed__)) dirty_entry
 {
    uint32_t state;
@@ -148,7 +134,7 @@ struct __attribute__((__packed__)) dirty_entry
    uint32_t offset;   // data offset within object (stripe)
    uint32_t len;      // data length
    uint64_t journal_sector; // journal sector used for this entry
-    void* dyn_data;    // dynamic data: external bitmap and data block checksums. may be a pointer to the in-memory journal
+    void* bitmap;   // either external bitmap itself when it fits, or a pointer to it when it doesn't
 };

 // - Sync must be submitted after previous writes/deletes (not before!)
@@ -177,23 +163,12 @@ struct __attribute__((__packed__)) dirty_entry
 // Suspend operation until there is some free space on the data device
 #define WAIT_FREE 5

-struct used_clean_obj_t
+struct fulfill_read_t
 {
-    int refs;
-    bool was_freed; // was freed by a parallel flush?
-    bool was_changed; // was changed by a parallel flush?
+    uint64_t offset, len;
+    uint64_t journal_sector; // sector+1 if used and !journal.inmemory, otherwise 0
 };

-// https://github.com/algorithm-ninja/cpp-btree
-// https://github.com/greg7mdp/sparsepp/ was used previously, but it was TERRIBLY slow after resizing
-// with sparsepp, random reads dropped to ~700 iops very fast with just as much as ~32k objects in the DB
-typedef btree::btree_map<object_id, clean_entry> blockstore_clean_db_t;
-typedef std::map<obj_ver_id, dirty_entry> blockstore_dirty_db_t;
-
-#include "blockstore_init.h"
-
-#include "blockstore_flush.h"
-
 #define PRIV(op) ((blockstore_op_private_t*)(op)->private_data)
 #define FINISH_OP(op) PRIV(op)->~blockstore_op_private_t(); std::function<void (blockstore_op_t*)>(op->callback)(op)

@@ -206,11 +181,10 @@ struct blockstore_op_private_t
    int op_state;

    // Read
-    uint64_t clean_block_used;
-    std::vector<copy_buffer_t> read_vec;
+    std::vector<fulfill_read_t> read_vec;

    // Sync, write
-    uint64_t min_flushed_journal_sector, max_flushed_journal_sector;
+    int min_flushed_journal_sector, max_flushed_journal_sector;

    // Write
    struct iovec iov_zerofill[3];
@@ -220,8 +194,19 @@ struct blockstore_op_private_t

    // Sync
    std::vector<obj_ver_id> sync_big_writes, sync_small_writes;
+    int sync_small_checked, sync_big_checked;
 };

+// https://github.com/algorithm-ninja/cpp-btree
+// https://github.com/greg7mdp/sparsepp/ was used previously, but it was TERRIBLY slow after resizing
+// with sparsepp, random reads dropped to ~700 iops very fast with just as much as ~32k objects in the DB
+typedef btree::btree_map<object_id, clean_entry> blockstore_clean_db_t;
+typedef std::map<obj_ver_id, dirty_entry> blockstore_dirty_db_t;
+
+#include "blockstore_init.h"
+
+#include "blockstore_flush.h"
+
 typedef uint32_t pool_id_t;
 typedef uint64_t pool_pg_id_t;

@@ -262,22 +247,17 @@ class blockstore_impl_t
    int throttle_target_parallelism = 1;
    // Minimum difference in microseconds between target and real execution times to throttle the response
    int throttle_threshold_us = 50;
-    // Maximum writes between automatically added fsync operations
-    uint64_t autosync_writes = 128;
-    // Maximum free space in the journal in bytes to start sending fsync feedback to primary OSDs
-    uint64_t journal_fsync_feedback_limit = 0;
    /******* END OF OPTIONS *******/

    struct ring_consumer_t ring_consumer;

    std::map<pool_id_t, pool_shard_settings_t> clean_db_settings;
    std::map<pool_pg_id_t, blockstore_clean_db_t> clean_db_shards;
-    uint8_t *clean_bitmaps = NULL;
+    uint8_t *clean_bitmap = NULL;
    blockstore_dirty_db_t dirty_db;
    std::vector<blockstore_op_t*> submit_queue;
    std::vector<obj_ver_id> unsynced_big_writes, unsynced_small_writes;
    int unsynced_big_write_count = 0;
-    int unsynced_queued_ops = 0;
    allocator *data_alloc = NULL;
    uint8_t *zero_object;

@@ -287,10 +267,6 @@ class blockstore_impl_t
    journal_flusher_t *flusher;
    int big_to_flush = 0;
    int write_iodepth = 0;
-    bool alloc_dyn_data = false;
-
-    // clean data blocks referenced by read operations
-    std::map<uint64_t, used_clean_obj_t> used_clean_objects;

    bool live = false, queue_stall = false;
    ring_loop_t *ringloop;
@@ -334,30 +310,8 @@ class blockstore_impl_t

    // Read
    int dequeue_read(blockstore_op_t *read_op);
-    void find_holes(std::vector<copy_buffer_t> & read_vec, uint32_t item_start, uint32_t item_end,
-        std::function<int(int, bool, uint32_t, uint32_t)> callback);
-    int fulfill_read(blockstore_op_t *read_op,
-        uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
-        uint32_t item_state, uint64_t item_version, uint64_t item_location,
-        uint64_t journal_sector, uint8_t *csum, int *dyn_data);
-    bool fulfill_clean_read(blockstore_op_t *read_op, uint64_t & fulfilled,
-        uint8_t *clean_entry_bitmap, int *dyn_data,
-        uint32_t item_start, uint32_t item_end, uint64_t clean_loc, uint64_t clean_ver);
-    int fill_partial_checksum_blocks(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled,
-        uint8_t *clean_entry_bitmap, int *dyn_data, bool from_journal, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end);
-    int pad_journal_read(std::vector<copy_buffer_t> & rv, copy_buffer_t & cp,
-        uint64_t dirty_offset, uint64_t dirty_end, uint64_t dirty_loc, uint8_t *csum_ptr, int *dyn_data,
-        uint64_t offset, uint64_t submit_len, uint64_t & blk_begin, uint64_t & blk_end, uint8_t* & blk_buf);
-    bool read_range_fulfilled(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled, uint8_t *read_buf,
-        uint8_t *clean_entry_bitmap, uint32_t item_start, uint32_t item_end);
-    bool read_checksum_block(blockstore_op_t *op, int rv_pos, uint64_t &fulfilled, uint64_t clean_loc);
-    uint8_t* read_clean_meta_block(blockstore_op_t *read_op, uint64_t clean_loc, int rv_pos);
-    bool verify_padded_checksums(uint8_t *clean_entry_bitmap, uint8_t *csum_buf, uint32_t offset,
-        iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
-    bool verify_journal_checksums(uint8_t *csums, uint32_t offset,
-        iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
-    bool verify_clean_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, uint8_t *dyn_data, bool from_journal,
-        iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
+    int fulfill_read(blockstore_op_t *read_op, uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
+        uint32_t item_state, uint64_t item_version, uint64_t item_location, uint64_t journal_sector);
    int fulfill_read_push(blockstore_op_t *op, void *buf, uint64_t offset, uint64_t len,
        uint32_t item_state, uint64_t item_version);
    void handle_read_event(ring_data_t *data, blockstore_op_t *op);
@@ -388,7 +342,6 @@ class blockstore_impl_t
    int continue_rollback(blockstore_op_t *op);
    void mark_rolled_back(const obj_ver_id & ov);
    void erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc);
-    void free_dirty_dyn_data(dirty_entry & e);

    // List
    void process_list(blockstore_op_t *op);
@@ -435,6 +388,4 @@ public:
    inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
    inline uint32_t get_bitmap_granularity() { return dsk.disk_alignment; }
    inline uint64_t get_journal_size() { return dsk.journal_len; }
-
-    bool wants_fsync();
 };
--- a/src/blockstore_init.cpp
+++ b/src/blockstore_init.cpp
@@ -77,20 +77,13 @@ resume_1:
    if (iszero((uint64_t*)metadata_buffer, bs->dsk.meta_block_size / sizeof(uint64_t)))
    {
        {
-            blockstore_meta_header_v2_t *hdr = (blockstore_meta_header_v2_t *)metadata_buffer;
+            blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
            hdr->zero = 0;
            hdr->magic = BLOCKSTORE_META_MAGIC_V1;
-            hdr->version = bs->dsk.meta_format;
+            hdr->version = BLOCKSTORE_META_VERSION_V1;
            hdr->meta_block_size = bs->dsk.meta_block_size;
            hdr->data_block_size = bs->dsk.data_block_size;
            hdr->bitmap_granularity = bs->dsk.bitmap_granularity;
-            if (bs->dsk.meta_format >= BLOCKSTORE_META_FORMAT_V2)
-            {
-                hdr->data_csum_type = bs->dsk.data_csum_type;
-                hdr->csum_block_size = bs->dsk.csum_block_size;
-                hdr->header_csum = 0;
-                hdr->header_csum = crc32c(0, hdr, sizeof(*hdr));
-            }
        }
        if (bs->readonly)
        {
@@ -116,62 +109,28 @@ resume_1:
    }
    else
    {
-        blockstore_meta_header_v2_t *hdr = (blockstore_meta_header_v2_t *)metadata_buffer;
-        if (hdr->zero != 0 || hdr->magic != BLOCKSTORE_META_MAGIC_V1 || hdr->version < BLOCKSTORE_META_FORMAT_V1)
+        blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
+        if (hdr->zero != 0 ||
+            hdr->magic != BLOCKSTORE_META_MAGIC_V1 ||
+            hdr->version != BLOCKSTORE_META_VERSION_V1)
        {
            printf(
-                "Metadata is corrupt or too old (pre-0.6.x).\n"
-                " If this is a new OSD, please zero out the metadata area before starting it.\n"
-                " If you need to upgrade from 0.5.x, convert metadata with vitastor-disk.\n"
-            );
-            exit(1);
-        }
-        if (hdr->version == BLOCKSTORE_META_FORMAT_V2)
-        {
-            uint32_t csum = hdr->header_csum;
-            hdr->header_csum = 0;
-            if (crc32c(0, hdr, sizeof(*hdr)) != csum)
-            {
-                printf("Metadata header is corrupt (checksum mismatch).\n");
-                exit(1);
-            }
-            hdr->header_csum = csum;
-            bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V2;
-        }
-        else if (hdr->version == BLOCKSTORE_META_FORMAT_V1)
-        {
-            hdr->data_csum_type = 0;
-            hdr->csum_block_size = 0;
-            hdr->header_csum = 0;
-            // Enable compatibility mode - entries without checksums
-            bs->dsk.clean_entry_size = sizeof(clean_disk_entry) + bs->dsk.clean_entry_bitmap_size*2;
-            bs->dsk.meta_len = (1 + (bs->dsk.block_count - 1 + bs->dsk.meta_block_size / bs->dsk.clean_entry_size)
-                / (bs->dsk.meta_block_size / bs->dsk.clean_entry_size)) * bs->dsk.meta_block_size;
-            bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V1;
-            printf("Warning: Starting with metadata in the old format without checksums, as stored on disk\n");
-        }
-        else if (hdr->version > BLOCKSTORE_META_FORMAT_V2)
-        {
-            printf(
-                "Metadata format is too new for me (stored version is %lu, max supported %u).\n",
-                hdr->version, BLOCKSTORE_META_FORMAT_V2
+                "Metadata is corrupt or old version.\n"
+                " If this is a new OSD please zero out the metadata area before starting it.\n"
+                " If you need to upgrade from 0.5.x please request it via the issue tracker.\n"
            );
            exit(1);
        }
        if (hdr->meta_block_size != bs->dsk.meta_block_size ||
            hdr->data_block_size != bs->dsk.data_block_size ||
-            hdr->bitmap_granularity != bs->dsk.bitmap_granularity ||
-            hdr->data_csum_type != bs->dsk.data_csum_type ||
-            hdr->csum_block_size != bs->dsk.csum_block_size)
+            hdr->bitmap_granularity != bs->dsk.bitmap_granularity)
        {
            printf(
                "Configuration stored in metadata superblock"
-                " (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u, data_csum_type=%u, csum_block_size=%u)"
-                " differs from OSD configuration (%lu/%u/%lu, %u/%u).\n",
+                " (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u)"
+                " differs from OSD configuration (%lu/%u/%lu).\n",
                hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity,
-                hdr->data_csum_type, hdr->csum_block_size,
-                bs->dsk.meta_block_size, bs->dsk.data_block_size, bs->dsk.bitmap_granularity,
-                bs->dsk.data_csum_type, bs->dsk.csum_block_size
+                bs->dsk.meta_block_size, bs->dsk.data_block_size, bs->dsk.bitmap_granularity
            );
            exit(1);
        }
@@ -320,22 +279,12 @@ bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_
    for (uint64_t i = 0; i < max_i; i++)
    {
        clean_disk_entry *entry = (clean_disk_entry*)(buf + i*bs->dsk.clean_entry_size);
+        if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
+        {
+            memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->dsk.clean_entry_bitmap_size, &entry->bitmap, 2*bs->dsk.clean_entry_bitmap_size);
+        }
        if (entry->oid.inode > 0)
        {
-            if (bs->dsk.meta_format >= BLOCKSTORE_META_FORMAT_V2)
-            {
-                // Check entry crc32
-                uint32_t *entry_csum = (uint32_t*)((uint8_t*)entry + bs->dsk.clean_entry_size - 4);
-                if (*entry_csum != crc32c(0, entry, bs->dsk.clean_entry_size - 4))
-                {
-                    printf("Metadata entry %lu is corrupt (checksum mismatch), skipping\n", done_cnt+i);
-                    continue;
-                }
-            }
-            if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
-            {
-                memcpy(bs->clean_bitmaps + (done_cnt+i) * 2 * bs->dsk.clean_entry_bitmap_size, &entry->bitmap, 2 * bs->dsk.clean_entry_bitmap_size);
-            }
            auto & clean_db = bs->clean_db_shard(entry->oid);
            auto clean_it = clean_db.find(entry->oid);
            if (clean_it == clean_db.end() || clean_it->second.version < entry->version)
@@ -491,9 +440,7 @@ resume_1:
            .size = sizeof(journal_entry_start),
            .reserved = 0,
            .journal_start = bs->journal.block_size,
-            .version = JOURNAL_VERSION_V2,
-            .data_csum_type = bs->dsk.data_csum_type,
-            .csum_block_size = bs->dsk.csum_block_size,
+            .version = JOURNAL_VERSION,
        };
        ((journal_entry_start*)submitted_buf)->crc32 = je_crc32((journal_entry*)submitted_buf);
        if (bs->readonly)
@@ -545,36 +492,18 @@ resume_1:
        if (je_start->magic != JOURNAL_MAGIC ||
            je_start->type != JE_START ||
            je_crc32((journal_entry*)je_start) != je_start->crc32 ||
-            je_start->size != JE_START_V0_SIZE && je_start->size != JE_START_V1_SIZE && je_start->size != JE_START_V2_SIZE)
+            je_start->size != sizeof(journal_entry_start) && je_start->size != JE_START_LEGACY_SIZE)
        {
            // Entry is corrupt
-            fprintf(stderr, "First entry of the journal is corrupt or unsupported\n");
+            fprintf(stderr, "First entry of the journal is corrupt\n");
            exit(1);
        }
-        if (je_start->size == JE_START_V0_SIZE ||
-            (je_start->version != JOURNAL_VERSION_V1 || je_start->size != JE_START_V1_SIZE) &&
-            (je_start->version != JOURNAL_VERSION_V2 || je_start->size != JE_START_V2_SIZE))
+        if (je_start->size == JE_START_LEGACY_SIZE || je_start->version != JOURNAL_VERSION)
        {
            fprintf(
-                stderr, "The code only supports journal versions 2 and 1, but it is %lu on disk."
-                    " Please use vitastor-disk to rewrite the journal\n",
-                je_start->size == JE_START_V0_SIZE ? 0 : je_start->version
-            );
-            exit(1);
-        }
-        if (je_start->version == JOURNAL_VERSION_V1)
-        {
-            je_start->data_csum_type = 0;
-            je_start->csum_block_size = 0;
-        }
-        if (je_start->data_csum_type != bs->dsk.data_csum_type ||
-            je_start->csum_block_size != bs->dsk.csum_block_size)
-        {
-            printf(
-                "Configuration stored in journal superblock (data_csum_type=%u, csum_block_size=%u)"
-                " differs from OSD configuration (%u/%u).\n",
-                je_start->data_csum_type, je_start->csum_block_size,
-                bs->dsk.data_csum_type, bs->dsk.csum_block_size
+                stderr, "The code only supports journal version %d, but it is %lu on disk."
+                    " Please use the previous version to flush the journal before upgrading OSD\n",
+                JOURNAL_VERSION, je_start->size == JE_START_LEGACY_SIZE ? 0 : je_start->version
            );
            exit(1);
        }
@@ -776,14 +705,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                    snprintf(err, 1024, "BUG: calculated journal data offset (%08lx) != stored journal data offset (%08lx)", location, je->small_write.data_offset);
                    throw std::runtime_error(err);
                }
-                small_write_data.clear();
+                uint32_t data_crc32 = 0;
                if (location >= done_pos && location+je->small_write.len <= done_pos+len)
                {
                    // data is within this buffer
-                    small_write_data.push_back((iovec){
-                        .iov_base = (uint8_t*)buf + location - done_pos,
-                        .iov_len = je->small_write.len,
-                    });
+                    data_crc32 = crc32c(0, (uint8_t*)buf + location - done_pos, je->small_write.len);
                }
                else
                {
@@ -798,10 +724,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                                ? location+je->small_write.len : done[i].pos+done[i].len);
                            uint64_t part_begin = (location < done[i].pos ? done[i].pos : location);
                            covered += part_end - part_begin;
-                            small_write_data.push_back((iovec){
-                                .iov_base = (uint8_t*)done[i].buf + part_begin - done[i].pos,
-                                .iov_len = part_end - part_begin,
-                            });
+                            data_crc32 = crc32c(data_crc32, (uint8_t*)done[i].buf + part_begin - done[i].pos, part_end - part_begin);
                        }
                    }
                    if (covered < je->small_write.len)
@@ -811,102 +734,12 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        return 2;
                    }
                }
-                bool data_csum_valid = true;
-                if (!bs->dsk.csum_block_size)
-                {
-                    uint32_t data_crc32 = 0;
-                    for (auto & sd: small_write_data)
-                    {
-                        data_crc32 = crc32c(data_crc32, sd.iov_base, sd.iov_len);
-                    }
-                    data_csum_valid = data_crc32 == je->small_write.crc32_data;
-                    if (!data_csum_valid)
-                    {
-                        printf(
-                            "Journal entry data is corrupt for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - data crc32 %x != %x\n",
-                            je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
-                            je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
-                            je->small_write.offset, je->small_write.len,
-                            data_crc32, je->small_write.crc32_data
-                        );
-                    }
-                }
-                else if (je->small_write.len > 0)
-                {
-                    // FIXME: deduplicate with disk_tool_journal.cpp
-                    // like in enqueue_write()
-                    uint32_t start = je->small_write.offset / bs->dsk.csum_block_size;
-                    uint32_t end = (je->small_write.offset+je->small_write.len-1) / bs->dsk.csum_block_size;
-                    uint32_t data_csum_size = (end-start+1) * (bs->dsk.data_csum_type & 0xFF);
-                    uint32_t required_size = sizeof(journal_entry_small_write) + bs->dsk.clean_entry_bitmap_size + data_csum_size;
-                    if (je->size != required_size)
-                    {
-                        printf(
-                            "Journal entry data has invalid size for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - should be %u bytes but is %u bytes\n",
-                            je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
-                            je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
-                            je->small_write.offset, je->small_write.len,
-                            required_size, je->size
-                        );
-                        data_csum_valid = false;
-                    }
-                    else
-                    {
-                        int sd_num = 0;
-                        size_t sd_pos = 0;
-                        uint32_t *block_csums = (uint32_t*)((uint8_t*)je + sizeof(journal_entry_small_write) + bs->dsk.clean_entry_bitmap_size);
-                        for (uint32_t pos = start; pos <= end; pos++, block_csums++)
-                        {
-                            size_t block_left = (pos == start
-                                ? (start == end
-                                    ? je->small_write.len
-                                    : bs->dsk.csum_block_size - je->small_write.offset%bs->dsk.csum_block_size)
-                                : (pos < end
-                                    ? bs->dsk.csum_block_size
-                                    : (je->small_write.offset + je->small_write.len)%bs->dsk.csum_block_size));
-                            if (pos > start && pos == end && block_left == 0)
-                            {
-                                // full last block
-                                block_left = bs->dsk.csum_block_size;
-                            }
-                            uint32_t block_crc32 = 0;
-                            while (block_left > 0)
-                            {
-                                assert(sd_num < small_write_data.size());
-                                if (small_write_data[sd_num].iov_len >= sd_pos+block_left)
-                                {
-                                    block_crc32 = crc32c(block_crc32, (uint8_t*)small_write_data[sd_num].iov_base+sd_pos, block_left);
-                                    sd_pos += block_left;
-                                    break;
-                                }
-                                else
-                                {
-                                    block_crc32 = crc32c(block_crc32, (uint8_t*)small_write_data[sd_num].iov_base+sd_pos, small_write_data[sd_num].iov_len-sd_pos);
-                                    block_left -= (small_write_data[sd_num].iov_len-sd_pos);
-                                    sd_pos = 0;
-                                    sd_num++;
-                                }
-                            }
-                            if (block_crc32 != *block_csums)
-                            {
-                                printf(
-                                    "Journal entry data is corrupt for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - block %u crc32 %x != %x\n",
-                                    je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
-                                    je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
-                                    je->small_write.offset, je->small_write.len,
-                                    pos, block_crc32, *block_csums
-                                );
-                                data_csum_valid = false;
-                                break;
-                            }
-                        }
-                    }
-                }
-                if (!data_csum_valid)
+                if (data_crc32 != je->small_write.crc32_data)
                {
                    // journal entry is corrupt, stop here
                    // interesting thing is that we must clear the corrupt entry if we're not readonly,
                    // because we don't write next entries in the same journal block
+                    printf("Journal entry data is corrupt (data crc32 %x != %x)\n", data_crc32, je->small_write.crc32_data);
                    memset((uint8_t*)buf + proc_pos - done_pos + pos, 0, bs->journal.block_size - pos);
                    bs->journal.next_free = prev_free;
                    init_write_buf = (uint8_t*)buf + proc_pos - done_pos;
@@ -922,14 +755,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        .oid = je->small_write.oid,
                        .version = je->small_write.version,
                    };
-                    uint64_t dyn_size = bs->dsk.dirty_dyn_size(je->small_write.offset, je->small_write.len);
-                    void *dyn = NULL;
-                    void *dyn_from = (uint8_t*)je + sizeof(journal_entry_small_write);
-                    if (!bs->alloc_dyn_data)
+                    void *bmp = NULL;
+                    void *bmp_from = (uint8_t*)je + sizeof(journal_entry_small_write);
+                    if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
                    {
-                        // Bitmap without checksum is only 4 bytes for 128k objects, save it inline
-                        // It can even contain 4 byte bitmap + 4 byte CRC32 for 4 kb writes :)
-                        memcpy(&dyn, dyn_from, dyn_size);
+                        memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
                    }
                    else
                    {
@@ -937,9 +767,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        // allocations for entry bitmaps. This can only be fixed by using
                        // a patched map with dynamic entry size, but not the btree_map,
                        // because it doesn't keep iterators valid all the time.
-                        dyn = malloc_or_die(dyn_size+sizeof(int));
-                        *((int*)dyn) = 1;
-                        memcpy((uint8_t*)dyn+sizeof(int), dyn_from, dyn_size);
+                        bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
+                        memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
                    }
                    bs->dirty_db.emplace(ov, (dirty_entry){
                        .state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
@@ -948,7 +777,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        .offset = je->small_write.offset,
                        .len = je->small_write.len,
                        .journal_sector = proc_pos,
-                        .dyn_data = dyn,
+                        .bitmap = bmp,
                    });
                    bs->journal.used_sectors[proc_pos]++;
 #ifdef BLOCKSTORE_DEBUG
@@ -1007,13 +836,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        .oid = je->big_write.oid,
                        .version = je->big_write.version,
                    };
-                    uint64_t dyn_size = bs->dsk.dirty_dyn_size(je->big_write.offset, je->big_write.len);
-                    void *dyn = NULL;
-                    void *dyn_from = (uint8_t*)je + sizeof(journal_entry_big_write);
-                    if (!bs->alloc_dyn_data)
+                    void *bmp = NULL;
+                    void *bmp_from = (uint8_t*)je + sizeof(journal_entry_big_write);
+                    if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
                    {
-                        // Bitmap without checksum is only 4 bytes for 128k objects, save it inline
-                        memcpy(&dyn, dyn_from, dyn_size);
+                        memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
                    }
                    else
                    {
@@ -1021,9 +848,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        // allocations for entry bitmaps. This can only be fixed by using
                        // a patched map with dynamic entry size, but not the btree_map,
                        // because it doesn't keep iterators valid all the time.
-                        dyn = malloc_or_die(dyn_size+sizeof(int));
-                        *((int*)dyn) = 1;
-                        memcpy((uint8_t*)dyn+sizeof(int), dyn_from, dyn_size);
+                        bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
+                        memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
                    }
                    auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
                        .state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
@@ -1032,7 +858,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
                        .offset = je->big_write.offset,
                        .len = je->big_write.len,
                        .journal_sector = proc_pos,
-                        .dyn_data = dyn,
+                        .bitmap = bmp,
                    }).first;
                    if (bs->data_alloc->get(je->big_write.location >> bs->dsk.block_order))
                    {
--- a/src/blockstore_init.h
+++ b/src/blockstore_init.h
@@ -50,7 +50,6 @@ class blockstore_init_journal
    uint64_t next_free;
    std::vector<bs_init_journal_done> done;
    std::vector<obj_ver_id> double_allocs;
-    std::vector<iovec> small_write_data;
    uint64_t journal_pos = 0;
    uint64_t continue_pos = 0;
    void *init_write_buf = NULL;
--- a/src/blockstore_journal.cpp
+++ b/src/blockstore_journal.cpp
@@ -17,7 +17,6 @@ blockstore_journal_check_t::blockstore_journal_check_t(blockstore_impl_t *bs)
 // Check if we can write <required> entries of <size> bytes and <data_after> data bytes after them to the journal
 int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries_required, int size, int data_after)
 {
-    uint64_t prev_next = next_sector;
    int required = entries_required;
    while (1)
    {
@@ -36,19 +35,11 @@ int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries
            }
            required -= fits;
            next_in_pos += fits * size;
-            if (next_sector != prev_next || !sectors_to_write)
-            {
-                // Except the previous call to this function
-                sectors_to_write++;
-            }
+            sectors_to_write++;
        }
        else if (bs->journal.sector_info[next_sector].dirty)
        {
-            if (next_sector != prev_next || !sectors_to_write)
-            {
-                // Except the previous call to this function
-                sectors_to_write++;
-            }
+            sectors_to_write++;
        }
        if (required <= 0)
        {
@@ -198,7 +189,6 @@ void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_
    priv->pending_ops++;
    if (!priv->min_flushed_journal_sector)
        priv->min_flushed_journal_sector = 1+cur_sector;
-    assert(priv->min_flushed_journal_sector <= journal.sector_count);
    priv->max_flushed_journal_sector = 1+cur_sector;
 }

@@ -299,31 +289,3 @@ void journal_t::dump_diagnostics()
        journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
    );
 }
-
-static uint64_t zero_page[4096];
-
-uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
-{
-    uint32_t r = prev_crc;
-    while (left_pad >= 4096)
-    {
-        r = crc32c(r, zero_page, 4096);
-        left_pad -= 4096;
-    }
-    if (left_pad > 0)
-        r = crc32c(r, zero_page, left_pad);
-    r = crc32c(r, buf, len);
-    while (right_pad >= 4096)
-    {
-        r = crc32c(r, zero_page, 4096);
-        right_pad -= 4096;
-    }
-    if (left_pad > 0)
-        r = crc32c(r, zero_page, right_pad);
-    return r;
-}
-
-uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
-{
-    return crc32c(0, buf, len);
-}
--- a/src/blockstore_journal.h
+++ b/src/blockstore_journal.h
@@ -8,8 +8,7 @@

 #define MIN_JOURNAL_SIZE 4*1024*1024
 #define JOURNAL_MAGIC 0x4A33
-#define JOURNAL_VERSION_V1 1
-#define JOURNAL_VERSION_V2 2
+#define JOURNAL_VERSION 1
 #define JOURNAL_BUFFER_SIZE 4*1024*1024
 #define JOURNAL_ENTRY_HEADER_SIZE 16

@@ -33,7 +32,7 @@
 #define JE_BIG_WRITE_INSTANT   0x08
 #define JE_MAX         0x08

-// crc32c comes first to ease calculation
+// crc32c comes first to ease calculation and is equal to crc32()
 struct __attribute__((__packed__)) journal_entry_start
 {
    uint32_t crc32;
@@ -43,12 +42,8 @@ struct __attribute__((__packed__)) journal_entry_start
    uint32_t reserved;
    uint64_t journal_start;
    uint64_t version;
-    uint32_t data_csum_type;
-    uint32_t csum_block_size;
 };
-#define JE_START_V0_SIZE 24
-#define JE_START_V1_SIZE 32
-#define JE_START_V2_SIZE 40
+#define JE_START_LEGACY_SIZE 24

 struct __attribute__((__packed__)) journal_entry_small_write
 {
@@ -64,12 +59,10 @@ struct __attribute__((__packed__)) journal_entry_small_write
    // small_write entries contain <len> bytes of data which is stored in next sectors
    // data_offset is its offset within journal
    uint64_t data_offset;
-    uint32_t crc32_data; // zero when data_csum_type != 0
+    uint32_t crc32_data;
    // small_write and big_write entries are followed by the "external" bitmap
    // its size is dynamic and included in journal entry's <size> field
    uint8_t bitmap[];
-    // and then data checksums if data_csum_type != 0
-    // uint32_t data_crc32c[];
 };

 struct __attribute__((__packed__)) journal_entry_big_write
@@ -87,8 +80,6 @@ struct __attribute__((__packed__)) journal_entry_big_write
    // small_write and big_write entries are followed by the "external" bitmap
    // its size is dynamic and included in journal entry's <size> field
    uint8_t bitmap[];
-    // and then data checksums if data_csum_type != 0
-    // uint32_t data_crc32c[];
 };

 struct __attribute__((__packed__)) journal_entry_stable
@@ -227,6 +218,3 @@ struct blockstore_journal_check_t
 };

 journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size);
-
-uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
-uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
--- a/src/blockstore_open.cpp
+++ b/src/blockstore_open.cpp
@@ -4,25 +4,6 @@
 #include <sys/file.h>
 #include "blockstore_impl.h"

-static uint64_t parse_fsync_feedback(blockstore_config_t & config, uint64_t journal_len)
-{
-    uint64_t journal_fsync_feedback_limit = 0;
-    if (config.find("journal_min_free_bytes") == config.end() &&
-        config.find("journal_min_free_percent") == config.end())
-    {
-        journal_fsync_feedback_limit = 90 * journal_len / 100;
-    }
-    else
-    {
-        journal_fsync_feedback_limit = strtoull(config["journal_min_free_bytes"].c_str(), NULL, 10);
-        if (!journal_fsync_feedback_limit)
-        {
-            journal_fsync_feedback_limit = strtoull(config["journal_min_free_percent"].c_str(), NULL, 10) * journal_len / 100;
-        }
-    }
-    return journal_fsync_feedback_limit;
-}
-
 void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
 {
    // Online-configurable options:
@@ -38,10 +19,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
    throttle_target_mbs = strtoull(config["throttle_target_mbs"].c_str(), NULL, 10);
    throttle_target_parallelism = strtoull(config["throttle_target_parallelism"].c_str(), NULL, 10);
    throttle_threshold_us = strtoull(config["throttle_threshold_us"].c_str(), NULL, 10);
-    if (config.find("autosync_writes") != config.end())
-    {
-        autosync_writes = strtoull(config["autosync_writes"].c_str(), NULL, 10);
-    }
    if (!max_flusher_count)
    {
        max_flusher_count = 256;
@@ -72,8 +49,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
    }
    if (!init)
    {
-        // has to be parsed after dsk.parse_config(), thus repeated here for online update
-        journal_fsync_feedback_limit = parse_fsync_feedback(config, journal.len);
        return;
    }
    // Offline-configurable options:
@@ -110,14 +85,11 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
        immediate_commit = IMMEDIATE_SMALL;
    }
    metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10);
-    inmemory_meta = config["inmemory_metadata"] != "false" && config["inmemory_metadata"] != "0" &&
-        config["inmemory_metadata"] != "no";
+    inmemory_meta = config["inmemory_metadata"] != "false";
    journal.sector_count = strtoull(config["journal_sector_buffer_count"].c_str(), NULL, 10);
    journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" ||
        config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes";
-    journal.inmemory = config["inmemory_journal"] != "false" && config["inmemory_journal"] != "0" &&
-        config["inmemory_journal"] != "no";
-    journal_fsync_feedback_limit = parse_fsync_feedback(config, journal.len);
+    journal.inmemory = config["inmemory_journal"] != "false";
    // Validate
    if (journal.sector_count < 2)
    {
@@ -161,24 +133,19 @@ void blockstore_impl_t::calc_lengths()
    {
        metadata_buffer = memalign(MEM_ALIGNMENT, dsk.meta_len);
        if (!metadata_buffer)
-            throw std::runtime_error("Failed to allocate memory for the metadata ("+std::to_string(dsk.meta_len/1024/1024)+" MB)");
+            throw std::runtime_error("Failed to allocate memory for the metadata");
    }
-    else if (dsk.clean_entry_bitmap_size || dsk.data_csum_type)
+    else if (dsk.clean_entry_bitmap_size)
    {
-        clean_bitmaps = (uint8_t*)malloc(dsk.block_count * 2 * dsk.clean_entry_bitmap_size);
-        if (!clean_bitmaps)
-        {
-            throw std::runtime_error(
-                "Failed to allocate memory for the metadata sparse write bitmap ("+
-                std::to_string(dsk.block_count * 2 * dsk.clean_entry_bitmap_size / 1024 / 1024)+" MB)"
-            );
-        }
+        clean_bitmap = (uint8_t*)malloc(dsk.block_count * 2*dsk.clean_entry_bitmap_size);
+        if (!clean_bitmap)
+            throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
    }
    if (journal.inmemory)
    {
        journal.buffer = memalign(MEM_ALIGNMENT, journal.len);
        if (!journal.buffer)
-            throw std::runtime_error("Failed to allocate memory for journal ("+std::to_string(journal.len/1024/1024)+" MB)");
+            throw std::runtime_error("Failed to allocate memory for journal");
    }
    else
    {
--- a/src/blockstore_read.cpp
+++ b/src/blockstore_read.cpp
@@ -1,7 +1,6 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)

-#include <limits.h>
 #include "blockstore_impl.h"

 int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_t offset, uint64_t len,
@@ -9,7 +8,12 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
 {
    if (!len)
    {
-        // Zero-length read
+        // Zero-length version - skip
+        return 1;
+    }
+    else if (IS_IN_FLIGHT(item_state))
+    {
+        // Write not finished yet - skip
        return 1;
    }
    else if (IS_DELETE(item_state))
@@ -18,7 +22,6 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
        memset(buf, 0, len);
        return 1;
    }
-    assert(!IS_IN_FLIGHT(item_state));
    if (journal.inmemory && IS_JOURNAL(item_state))
    {
        memcpy(buf, (uint8_t*)journal.buffer + offset, len);
@@ -37,115 +40,59 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
    return 1;
 }

-void blockstore_impl_t::find_holes(std::vector<copy_buffer_t> & read_vec,
-    uint32_t item_start, uint32_t item_end,
-    std::function<int(int, bool, uint32_t, uint32_t)> callback)
+// FIXME I've seen a bug here so I want some tests
+int blockstore_impl_t::fulfill_read(blockstore_op_t *read_op, uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
+    uint32_t item_state, uint64_t item_version, uint64_t item_location, uint64_t journal_sector)
 {
-    auto cur_start = item_start;
-    int i = 0;
-    while (cur_start < item_end)
+    uint32_t cur_start = item_start;
+    if (cur_start < read_op->offset + read_op->len && item_end > read_op->offset)
    {
-        // COPY_BUF_CSUM_FILL items are fake items inserted in the end, their offsets aren't in order
-        if (i >= read_vec.size() || read_vec[i].copy_flags & COPY_BUF_CSUM_FILL || read_vec[i].offset >= item_end)
+        cur_start = cur_start < read_op->offset ? read_op->offset : cur_start;
+        item_end = item_end > read_op->offset + read_op->len ? read_op->offset + read_op->len : item_end;
+        auto it = PRIV(read_op)->read_vec.begin();
+        while (1)
        {
-            // Hole (at end): cur_start .. item_end
-            i += callback(i, false, cur_start, item_end);
-            break;
-        }
-        else if (read_vec[i].offset > cur_start)
-        {
-            // Hole: cur_start .. min(read_vec[i].offset, item_end)
-            auto cur_end = read_vec[i].offset > item_end ? item_end : read_vec[i].offset;
-            i += callback(i, false, cur_start, cur_end);
-            cur_start = cur_end;
-        }
-        else if (read_vec[i].offset + read_vec[i].len > cur_start)
-        {
-            // Allocated: cur_start .. min(read_vec[i].offset + read_vec[i].len, item_end)
-            auto cur_end = read_vec[i].offset + read_vec[i].len;
-            cur_end = cur_end > item_end ? item_end : cur_end;
-            i += callback(i, true, cur_start, cur_end);
-            cur_start = cur_end;
-            i++;
-        }
-        else
-            i++;
-    }
-}
-
-int blockstore_impl_t::fulfill_read(blockstore_op_t *read_op,
-    uint64_t &fulfilled, uint32_t item_start, uint32_t item_end, // FIXME: Rename item_* to dirty_*
-    uint32_t item_state, uint64_t item_version, uint64_t item_location,
-    uint64_t journal_sector, uint8_t *csum, int *dyn_data)
-{
-    int r = 1;
-    if (item_start < read_op->offset + read_op->len && item_end > read_op->offset)
-    {
-        auto & rv = PRIV(read_op)->read_vec;
-        auto rd_start = item_start < read_op->offset ? read_op->offset : item_start;
-        auto rd_end = item_end > read_op->offset + read_op->len ? read_op->offset + read_op->len : item_end;
-        find_holes(rv, rd_start, rd_end, [&](int pos, bool alloc, uint32_t start, uint32_t end)
-        {
-            if (!r || alloc)
-                return 0;
-            if (!journal.inmemory && dsk.csum_block_size > dsk.bitmap_granularity && IS_JOURNAL(item_state) && !IS_DELETE(item_state))
+            for (; it != PRIV(read_op)->read_vec.end(); it++)
            {
-                uint32_t blk_begin = (start/dsk.csum_block_size) * dsk.csum_block_size;
-                blk_begin = blk_begin < item_start ? item_start : blk_begin;
-                uint32_t blk_end = ((end-1) / dsk.csum_block_size + 1) * dsk.csum_block_size;
-                blk_end = blk_end > item_end ? item_end : blk_end;
-                rv.push_back((copy_buffer_t){
-                    .copy_flags = COPY_BUF_JOURNAL|COPY_BUF_CSUM_FILL,
-                    .offset = blk_begin,
-                    .len = blk_end-blk_begin,
-                    .csum_buf = (csum + (blk_begin/dsk.csum_block_size -
-                        item_start/dsk.csum_block_size) * (dsk.data_csum_type & 0xFF)),
-                    .dyn_data = dyn_data,
-                });
-                if (dyn_data)
+                if (it->offset >= cur_start)
                {
-                    (*dyn_data)++;
+                    break;
                }
-                // Submit the journal checksum block read
-                if (!read_checksum_block(read_op, 1, fulfilled, item_location - item_start))
+                else if (it->offset + it->len > cur_start)
                {
-                    r = 0;
+                    cur_start = it->offset + it->len;
+                    if (cur_start >= item_end)
+                    {
+                        goto endwhile;
+                    }
                }
-                return 0;
            }
-            copy_buffer_t el = {
-                .copy_flags = (IS_JOURNAL(item_state) ? COPY_BUF_JOURNAL : COPY_BUF_DATA),
-                .offset = start,
-                .len = end-start,
-                .disk_offset = item_location + start - item_start,
-                .journal_sector = (IS_JOURNAL(item_state) ? journal_sector : 0),
-                .csum_buf = !csum ? NULL : (csum + (start - item_start) / dsk.csum_block_size * (dsk.data_csum_type & 0xFF)),
-                .dyn_data = dyn_data,
-            };
-            if (dyn_data)
+            if (it == PRIV(read_op)->read_vec.end() || it->offset > cur_start)
            {
-                (*dyn_data)++;
+                fulfill_read_t el = {
+                    .offset = cur_start,
+                    .len = it == PRIV(read_op)->read_vec.end() || it->offset >= item_end ? item_end-cur_start : it->offset-cur_start,
+                    .journal_sector = journal_sector,
+                };
+                it = PRIV(read_op)->read_vec.insert(it, el);
+                if (!fulfill_read_push(read_op,
+                    (uint8_t*)read_op->buf + el.offset - read_op->offset,
+                    item_location + el.offset - item_start,
+                    el.len, item_state, item_version))
+                {
+                    return 0;
+                }
+                fulfilled += el.len;
            }
-            if (IS_BIG_WRITE(item_state))
+            cur_start = it->offset + it->len;
+            if (it == PRIV(read_op)->read_vec.end() || cur_start >= item_end)
            {
-                // If we don't track it then we may IN THEORY read another object's data:
-                // submit read -> remove the object -> flush remove -> overwrite with another object -> finish read
-                // Very improbable, but possible
-                PRIV(read_op)->clean_block_used = 1;
+                break;
            }
-            rv.insert(rv.begin() + pos, el);
-            fulfilled += el.len;
-            if (!fulfill_read_push(read_op,
-                (uint8_t*)read_op->buf + el.offset - read_op->offset,
-                item_location + el.offset - item_start,
-                el.len, item_state, item_version))
-            {
-                r = 0;
-            }
-            return 1;
-        });
+        }
    }
-    return r;
+endwhile:
+    return 1;
 }

 uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offset)
@@ -159,225 +106,10 @@ uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offse
        clean_entry_bitmap = ((uint8_t*)metadata_buffer + sector + pos*dsk.clean_entry_size + sizeof(clean_disk_entry) + offset);
    }
    else
-        clean_entry_bitmap = (uint8_t*)(clean_bitmaps + meta_loc*2*dsk.clean_entry_bitmap_size + offset);
+        clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*2*dsk.clean_entry_bitmap_size + offset);
    return clean_entry_bitmap;
 }

-int blockstore_impl_t::fill_partial_checksum_blocks(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled,
-    uint8_t *clean_entry_bitmap, int *dyn_data, bool from_journal, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end)
-{
-    if (read_end == read_offset)
-        return 0;
-    int required = 0;
-    read_buf -= read_offset;
-    uint32_t last_block = (read_end-1)/dsk.csum_block_size;
-    uint32_t start_block = read_offset/dsk.csum_block_size;
-    uint32_t end_block = 0;
-    while (start_block <= last_block)
-    {
-        if (read_range_fulfilled(rv, fulfilled, read_buf, clean_entry_bitmap,
-            start_block*dsk.csum_block_size < read_offset ? read_offset : start_block*dsk.csum_block_size,
-            (start_block+1)*dsk.csum_block_size > read_end ? read_end : (start_block+1)*dsk.csum_block_size))
-        {
-            // read_range_fulfilled() also adds zero-filled areas
-            start_block++;
-        }
-        else
-        {
-            // Find a sequence of checksum blocks required to be read
-            end_block = start_block;
-            while ((end_block+1)*dsk.csum_block_size < read_end &&
-                !read_range_fulfilled(rv, fulfilled, read_buf, clean_entry_bitmap,
-                    (end_block+1)*dsk.csum_block_size < read_offset ? read_offset : (end_block+1)*dsk.csum_block_size,
-                    (end_block+2)*dsk.csum_block_size > read_end ? read_end : (end_block+2)*dsk.csum_block_size))
-            {
-                end_block++;
-            }
-            end_block++;
-            // OK, mark this range as required
-            rv.push_back((copy_buffer_t){
-                .copy_flags = COPY_BUF_CSUM_FILL | (from_journal ? COPY_BUF_JOURNALED_BIG : 0),
-                .offset = start_block*dsk.csum_block_size,
-                .len = (end_block-start_block)*dsk.csum_block_size,
-                // save clean_entry_bitmap if we're reading clean data from the journal
-                .csum_buf = from_journal ? clean_entry_bitmap : NULL,
-                .dyn_data = dyn_data,
-            });
-            if (dyn_data)
-            {
-                (*dyn_data)++;
-            }
-            start_block = end_block;
-            required++;
-        }
-    }
-    return required;
-}
-
-// read_buf should be == op->buf - op->offset
-bool blockstore_impl_t::read_range_fulfilled(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled, uint8_t *read_buf,
-    uint8_t *clean_entry_bitmap, uint32_t item_start, uint32_t item_end)
-{
-    bool all_done = true;
-    find_holes(rv, item_start, item_end, [&](int pos, bool alloc, uint32_t cur_start, uint32_t cur_end)
-    {
-        if (alloc)
-            return 0;
-        int diff = 0;
-        uint32_t bmp_start = cur_start/dsk.bitmap_granularity;
-        uint32_t bmp_end = cur_end/dsk.bitmap_granularity;
-        uint32_t bmp_pos = bmp_start;
-        while (bmp_pos < bmp_end)
-        {
-            while (bmp_pos < bmp_end && !(clean_entry_bitmap[bmp_pos >> 3] & (1 << (bmp_pos & 0x7))))
-                bmp_pos++;
-            if (bmp_pos > bmp_start)
-            {
-                // zero fill
-                copy_buffer_t el = {
-                    .copy_flags = COPY_BUF_ZERO,
-                    .offset = bmp_start*dsk.bitmap_granularity,
-                    .len = (bmp_pos-bmp_start)*dsk.bitmap_granularity,
-                };
-                rv.insert(rv.begin() + pos, el);
-                if (read_buf)
-                    memset(read_buf + el.offset, 0, el.len);
-                fulfilled += el.len;
-                diff++;
-            }
-            bmp_start = bmp_pos;
-            while (bmp_pos < bmp_end && (clean_entry_bitmap[bmp_pos >> 3] & (1 << (bmp_pos & 0x7))))
-                bmp_pos++;
-            if (bmp_pos > bmp_start)
-            {
-                // something is to be read
-                all_done = false;
-            }
-            bmp_start = bmp_pos;
-        }
-        return diff;
-    });
-    return all_done;
-}
-
-bool blockstore_impl_t::read_checksum_block(blockstore_op_t *op, int rv_pos, uint64_t &fulfilled, uint64_t clean_loc)
-{
-    auto & rv = PRIV(op)->read_vec;
-    auto *vi = &rv[rv.size()-rv_pos];
-    uint32_t item_start = vi->offset, item_end = vi->offset+vi->len;
-    uint32_t fill_size = 0;
-    int n_iov = 0;
-    find_holes(rv, item_start, item_end, [&](int pos, bool alloc, uint32_t cur_start, uint32_t cur_end)
-    {
-        if (alloc)
-        {
-            fill_size += cur_end-cur_start;
-            n_iov++;
-        }
-        else
-        {
-            if (cur_start < op->offset)
-            {
-                fill_size += op->offset-cur_start;
-                n_iov++;
-                cur_start = op->offset;
-            }
-            if (cur_end > op->offset+op->len)
-            {
-                fill_size += cur_end-(op->offset+op->len);
-                n_iov++;
-                cur_end = op->offset+op->len;
-            }
-            if (cur_end > cur_start)
-            {
-                n_iov++;
-            }
-        }
-        return 0;
-    });
-    void *buf = memalign_or_die(MEM_ALIGNMENT, fill_size + n_iov*sizeof(struct iovec));
-    iovec *iov = (struct iovec*)((uint8_t*)buf+fill_size);
-    n_iov = 0;
-    fill_size = 0;
-    find_holes(rv, item_start, item_end, [&](int pos, bool alloc, uint32_t cur_start, uint32_t cur_end)
-    {
-        int res = 0;
-        if (alloc)
-        {
-            iov[n_iov++] = (struct iovec){ (uint8_t*)buf+fill_size, cur_end-cur_start };
-            fill_size += cur_end-cur_start;
-        }
-        else
-        {
-            if (cur_start < op->offset)
-            {
-                iov[n_iov++] = (struct iovec){ (uint8_t*)buf+fill_size, op->offset-cur_start };
-                fill_size += op->offset-cur_start;
-                cur_start = op->offset;
-            }
-            auto lim_end = cur_end > op->offset+op->len ? op->offset+op->len : cur_end;
-            if (lim_end > cur_start)
-            {
-                iov[n_iov++] = (struct iovec){ (uint8_t*)op->buf+cur_start-op->offset, lim_end-cur_start };
-                rv.insert(rv.begin() + pos, (copy_buffer_t){
-                    .copy_flags = COPY_BUF_DATA,
-                    .offset = cur_start,
-                    .len = lim_end-cur_start,
-                });
-                fulfilled += lim_end-cur_start;
-                res++;
-            }
-            if (cur_end > op->offset+op->len)
-            {
-                iov[n_iov++] = (struct iovec){ (uint8_t*)buf+fill_size, cur_end - (op->offset+op->len) };
-                fill_size += cur_end - (op->offset+op->len);
-                cur_end = op->offset+op->len;
-            }
-        }
-        return res;
-    });
-    vi = &rv[rv.size()-rv_pos];
-    // Save buf into read_vec too but in a creepy way
-    // FIXME: Shit, something else should be invented %)
-    *vi = (copy_buffer_t){
-        .copy_flags = vi->copy_flags,
-        .offset = vi->offset,
-        .len = ((uint64_t)n_iov << 32) | fill_size,
-        .disk_offset = clean_loc + item_start,
-        .buf = (uint8_t*)buf,
-        .csum_buf = vi->csum_buf,
-        .dyn_data = vi->dyn_data,
-    };
-    int submit_fd = (vi->copy_flags & COPY_BUF_JOURNAL ? dsk.journal_fd : dsk.data_fd);
-    uint64_t submit_offset = (vi->copy_flags & COPY_BUF_JOURNAL ? journal.offset : dsk.data_offset);
-    uint32_t d_pos = 0;
-    for (int n_pos = 0; n_pos < n_iov; n_pos += IOV_MAX)
-    {
-        int n_cur = n_iov-n_pos < IOV_MAX ? n_iov-n_pos : IOV_MAX;
-        BS_SUBMIT_GET_SQE(sqe, data);
-        PRIV(op)->pending_ops++;
-        my_uring_prep_readv(sqe, submit_fd, iov + n_pos, n_cur, submit_offset + clean_loc + item_start + d_pos);
-        data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
-        if (n_pos > 0 || n_pos + IOV_MAX < n_iov)
-        {
-            uint32_t d_len = 0;
-            for (int i = 0; i < IOV_MAX; i++)
-                d_len += iov[n_pos+i].iov_len;
-            data->iov.iov_len = d_len;
-            d_pos += d_len;
-        }
-        else
-            data->iov.iov_len = item_end-item_start;
-    }
-    if (!(vi->copy_flags & COPY_BUF_JOURNAL))
-    {
-        // Reads running parallel to flushes of the same clean block may read
-        // a mixture of old and new data. So we don't verify checksums for such blocks.
-        PRIV(op)->clean_block_used = 1;
-    }
-    return true;
-}
-
 int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
 {
    auto & clean_db = clean_db_shard(read_op->oid);
@@ -399,8 +131,6 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
    }
    uint64_t fulfilled = 0;
    PRIV(read_op)->pending_ops = 0;
-    PRIV(read_op)->clean_block_used = 0;
-    auto & rv = PRIV(read_op)->read_vec;
    uint64_t result_version = 0;
    if (dirty_found)
    {
@@ -418,36 +148,23 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
                    FINISH_OP(read_op);
                    return 2;
                }
-                int *dyn_data = (int*)(dsk.csum_block_size > 0 && alloc_dyn_data ? dirty.dyn_data : NULL);
-                uint8_t *bmp_ptr = (alloc_dyn_data
-                    ? (uint8_t*)dirty.dyn_data + sizeof(int) : (uint8_t*)&dirty.dyn_data);
                if (!result_version)
                {
                    result_version = dirty_it->first.version;
                    if (read_op->bitmap)
                    {
+                        void *bmp_ptr = (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
                        memcpy(read_op->bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
                    }
                }
                // If inmemory_journal is false, journal trim will have to wait until the read is completed
-                if (!IS_JOURNAL(dirty.state))
+                if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
+                    dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset),
+                    (IS_JOURNAL(dirty.state) ? dirty.journal_sector+1 : 0)))
                {
-                    // Read from data disk, possibly checking checksums
-                    if (!fulfill_clean_read(read_op, fulfilled, bmp_ptr, dyn_data,
-                        dirty.offset, dirty.offset+dirty.len, dirty.location, dirty_it->first.version))
-                    {
-                        goto undo_read;
-                    }
-                }
-                else
-                {
-                    // Copy from memory or read from journal, possibly checking checksums
-                    if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
-                        dirty.state, dirty_it->first.version, dirty.location, dirty.journal_sector+1,
-                        journal.inmemory ? NULL : bmp_ptr+dsk.clean_entry_bitmap_size, dyn_data))
-                    {
-                        goto undo_read;
-                    }
+                    // need to wait. undo added requests, don't dequeue op
+                    PRIV(read_op)->read_vec.clear();
+                    return 0;
                }
            }
            if (fulfilled == read_op->len || dirty_it == dirty_db.begin())
@@ -470,10 +187,50 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
        }
        if (fulfilled < read_op->len)
        {
-            if (!fulfill_clean_read(read_op, fulfilled, NULL, NULL, 0, dsk.data_block_size,
-                clean_it->second.location, clean_it->second.version))
+            if (!dsk.clean_entry_bitmap_size)
            {
-                goto undo_read;
+                if (!fulfill_read(read_op, fulfilled, 0, dsk.data_block_size,
+                    (BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_it->second.location, 0))
+                {
+                    // need to wait. undo added requests, don't dequeue op
+                    PRIV(read_op)->read_vec.clear();
+                    return 0;
+                }
+            }
+            else
+            {
+                uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, 0);
+                uint64_t bmp_start = 0, bmp_end = 0, bmp_size = dsk.data_block_size/dsk.bitmap_granularity;
+                while (bmp_start < bmp_size)
+                {
+                    while (!(clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7))) && bmp_end < bmp_size)
+                    {
+                        bmp_end++;
+                    }
+                    if (bmp_end > bmp_start)
+                    {
+                        // fill with zeroes
+                        assert(fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
+                            bmp_end * dsk.bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
+                    }
+                    bmp_start = bmp_end;
+                    while (clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7)) && bmp_end < bmp_size)
+                    {
+                        bmp_end++;
+                    }
+                    if (bmp_end > bmp_start)
+                    {
+                        if (!fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
+                            bmp_end * dsk.bitmap_granularity, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0,
+                            clean_it->second.location + bmp_start * dsk.bitmap_granularity, 0))
+                        {
+                            // need to wait. undo added requests, don't dequeue op
+                            PRIV(read_op)->read_vec.clear();
+                            return 0;
+                        }
+                        bmp_start = bmp_end;
+                    }
+                }
            }
        }
    }
@@ -485,7 +242,11 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
        FINISH_OP(read_op);
        return 2;
    }
-    assert(fulfilled == read_op->len);
+    if (fulfilled < read_op->len)
+    {
+        assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
+        assert(fulfilled == read_op->len);
+    }
    read_op->version = result_version;
    if (!PRIV(read_op)->pending_ops)
    {
@@ -510,309 +271,6 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
    }
    read_op->retval = 0;
    return 2;
-undo_read:
-    // need to wait. undo added requests, don't dequeue op
-    if (dsk.csum_block_size > dsk.bitmap_granularity)
-    {
-        for (auto & vec: rv)
-        {
-            if ((vec.copy_flags & COPY_BUF_CSUM_FILL) && vec.buf)
-            {
-                free(vec.buf);
-                vec.buf = NULL;
-            }
-            if (vec.dyn_data && --(*vec.dyn_data) == 0) // refcount
-            {
-                free(vec.dyn_data);
-                vec.dyn_data = NULL;
-            }
-        }
-    }
-    rv.clear();
-    return 0;
-}
-
-int blockstore_impl_t::pad_journal_read(std::vector<copy_buffer_t> & rv, copy_buffer_t & cp,
-    // FIXME Passing dirty_entry& would be nicer
-    uint64_t dirty_offset, uint64_t dirty_end, uint64_t dirty_loc, uint8_t *csum_ptr, int *dyn_data,
-    uint64_t offset, uint64_t submit_len, uint64_t & blk_begin, uint64_t & blk_end, uint8_t* & blk_buf)
-{
-    if (offset % dsk.csum_block_size || submit_len % dsk.csum_block_size)
-    {
-        if (offset < blk_end)
-        {
-            // Already being read as a part of the previous checksum block series
-            cp.buf = blk_buf + offset - blk_begin;
-            cp.copy_flags |= COPY_BUF_COALESCED;
-            if (offset+submit_len > blk_end)
-                cp.len = blk_end-offset;
-            return 2;
-        }
-        else
-        {
-            // We don't use fill_partial_checksum_blocks for journal because journal writes never have holes (internal bitmap)
-            blk_begin = (offset/dsk.csum_block_size) * dsk.csum_block_size;
-            blk_begin = blk_begin < dirty_offset ? dirty_offset : blk_begin;
-            blk_end = ((offset+submit_len-1)/dsk.csum_block_size + 1) * dsk.csum_block_size;
-            blk_end = blk_end > dirty_end ? dirty_end : blk_end;
-            if (blk_begin < offset || blk_end > offset+submit_len)
-            {
-                blk_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, blk_end-blk_begin);
-                cp.buf = blk_buf + offset - blk_begin;
-                cp.copy_flags |= COPY_BUF_COALESCED;
-                rv.push_back((copy_buffer_t){
-                    .copy_flags = COPY_BUF_JOURNAL|COPY_BUF_CSUM_FILL,
-                    .offset = blk_begin,
-                    .len = blk_end-blk_begin,
-                    .disk_offset = dirty_loc + blk_begin - dirty_offset,
-                    .buf = blk_buf,
-                    .csum_buf = (csum_ptr + (blk_begin/dsk.csum_block_size -
-                        dirty_offset/dsk.csum_block_size) * (dsk.data_csum_type & 0xFF)),
-                    .dyn_data = dyn_data,
-                });
-                if (dyn_data)
-                {
-                    (*dyn_data)++;
-                }
-                return 1;
-            }
-        }
-    }
-    return 0;
-}
-
-bool blockstore_impl_t::fulfill_clean_read(blockstore_op_t *read_op, uint64_t & fulfilled,
-    uint8_t *clean_entry_bitmap, int *dyn_data, uint32_t item_start, uint32_t item_end, uint64_t clean_loc, uint64_t clean_ver)
-{
-    bool from_journal = clean_entry_bitmap != NULL;
-    if (!clean_entry_bitmap)
-    {
-        // NULL clean_entry_bitmap means we're reading from data, not from the journal,
-        // and the bitmap location is obvious
-        clean_entry_bitmap = get_clean_entry_bitmap(clean_loc, 0);
-    }
-    if (dsk.csum_block_size > dsk.bitmap_granularity)
-    {
-        auto & rv = PRIV(read_op)->read_vec;
-        int req = fill_partial_checksum_blocks(rv, fulfilled, clean_entry_bitmap, dyn_data, from_journal,
-            (uint8_t*)read_op->buf, read_op->offset, read_op->offset+read_op->len);
-        if (!inmemory_meta && !from_journal && req > 0)
-        {
-            // Read checksums from disk
-            uint8_t *csum_buf = read_clean_meta_block(read_op, clean_loc, rv.size()-req);
-            for (int i = req; i > 0; i--)
-            {
-                rv[rv.size()-i].csum_buf = csum_buf;
-            }
-        }
-        for (int i = req; i > 0; i--)
-        {
-            if (!read_checksum_block(read_op, i, fulfilled, clean_loc))
-            {
-                return false;
-            }
-        }
-        PRIV(read_op)->clean_block_used = req > 0;
-    }
-    else if (from_journal)
-    {
-        // Don't scan bitmap - journal writes don't have holes (internal bitmap)!
-        uint8_t *csum = !dsk.csum_block_size ? 0 : (clean_entry_bitmap + dsk.clean_entry_bitmap_size +
-            item_start/dsk.csum_block_size*(dsk.data_csum_type & 0xFF));
-        if (!fulfill_read(read_op, fulfilled, item_start, item_end,
-            (BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_loc + item_start, 0, csum, dyn_data))
-        {
-            return false;
-        }
-        if (item_start > 0 && fulfilled < read_op->len)
-        {
-            // fill with zeroes
-            assert(fulfill_read(read_op, fulfilled, 0, item_start, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0, NULL, NULL));
-        }
-        if (item_end < dsk.data_block_size && fulfilled < read_op->len)
-        {
-            // fill with zeroes
-            assert(fulfill_read(read_op, fulfilled, item_end, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0, NULL, NULL));
-        }
-    }
-    else
-    {
-        bool csum_done = !dsk.csum_block_size || inmemory_meta;
-        uint8_t *csum_buf = clean_entry_bitmap;
-        uint64_t bmp_start = 0, bmp_end = 0, bmp_size = dsk.data_block_size/dsk.bitmap_granularity;
-        while (bmp_start < bmp_size)
-        {
-            while (!(clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7))) && bmp_end < bmp_size)
-            {
-                bmp_end++;
-            }
-            if (bmp_end > bmp_start)
-            {
-                // fill with zeroes
-                assert(fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
-                    bmp_end * dsk.bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0, NULL, NULL));
-            }
-            bmp_start = bmp_end;
-            while (clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7)) && bmp_end < bmp_size)
-            {
-                bmp_end++;
-            }
-            if (bmp_end > bmp_start)
-            {
-                if (!csum_done)
-                {
-                    // Read checksums from disk
-                    csum_buf = read_clean_meta_block(read_op, clean_loc, PRIV(read_op)->read_vec.size());
-                    csum_done = true;
-                }
-                uint8_t *csum = !dsk.csum_block_size ? 0 : (csum_buf + 2*dsk.clean_entry_bitmap_size + bmp_start*(dsk.data_csum_type & 0xFF));
-                if (!fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
-                    bmp_end * dsk.bitmap_granularity, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0,
-                    clean_loc + bmp_start * dsk.bitmap_granularity, 0, csum, dyn_data))
-                {
-                    return false;
-                }
-                bmp_start = bmp_end;
-            }
-        }
-    }
-    // Increment reference counter if clean data is being read from the disk
-    if (PRIV(read_op)->clean_block_used)
-    {
-        auto & uo = used_clean_objects[clean_loc];
-        uo.refs++;
-        if (dsk.csum_block_size && flusher->is_mutated(clean_loc))
-            uo.was_changed = true;
-        PRIV(read_op)->clean_block_used = clean_loc;
-    }
-    return true;
-}
-
-uint8_t* blockstore_impl_t::read_clean_meta_block(blockstore_op_t *op, uint64_t clean_loc, int rv_pos)
-{
-    auto & rv = PRIV(op)->read_vec;
-    auto sector = ((clean_loc >> dsk.block_order) / (dsk.meta_block_size / dsk.clean_entry_size)) * dsk.meta_block_size;
-    auto pos = ((clean_loc >> dsk.block_order) % (dsk.meta_block_size / dsk.clean_entry_size)) * dsk.clean_entry_size;
-    uint8_t *buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.meta_block_size);
-    rv.insert(rv.begin()+rv_pos, (copy_buffer_t){
-        .copy_flags = COPY_BUF_META_BLOCK|COPY_BUF_CSUM_FILL,
-        .offset = pos,
-        .buf = buf,
-    });
-    BS_SUBMIT_GET_SQE(sqe, data);
-    data->iov = (struct iovec){ buf, dsk.meta_block_size };
-    PRIV(op)->pending_ops++;
-    my_uring_prep_readv(sqe, dsk.meta_fd, &data->iov, 1, dsk.meta_offset + dsk.meta_block_size + sector);
-    data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
-    // return pointer to checksums + bitmap
-    return buf + pos + sizeof(clean_disk_entry);
-}
-
-bool blockstore_impl_t::verify_padded_checksums(uint8_t *clean_entry_bitmap, uint8_t *csum_buf, uint32_t offset,
-    iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb)
-{
-    assert(!(offset % dsk.csum_block_size));
-    uint32_t *csums = (uint32_t*)csum_buf;
-    uint32_t block_csum = 0;
-    uint32_t block_done = 0;
-    uint32_t block_num = clean_entry_bitmap ? offset/dsk.csum_block_size : 0;
-    uint32_t bmp_pos = offset/dsk.bitmap_granularity;
-    for (int i = 0; i < n_iov; i++)
-    {
-        uint32_t pos = 0;
-        while (pos < iov[i].iov_len)
-        {
-            uint32_t start = pos;
-            uint8_t bit = (clean_entry_bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
-            while (pos < iov[i].iov_len && ((clean_entry_bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1) == bit)
-            {
-                pos += dsk.bitmap_granularity;
-                bmp_pos++;
-            }
-            uint32_t len = pos-start;
-            auto buf = (uint8_t*)iov[i].iov_base+start;
-            while (block_done+len >= dsk.csum_block_size)
-            {
-                auto cur_len = dsk.csum_block_size-block_done;
-                block_csum = crc32c_pad(block_csum, buf, bit ? cur_len : 0, bit ? 0 : cur_len, 0);
-                if (block_csum != csums[block_num])
-                {
-                    if (bad_block_cb)
-                        bad_block_cb(block_num*dsk.csum_block_size, block_csum, csums[block_num]);
-                    else
-                        return false;
-                }
-                block_num++;
-                buf += cur_len;
-                len -= cur_len;
-                block_done = block_csum = 0;
-            }
-            if (len > 0)
-            {
-                block_csum = crc32c_pad(block_csum, buf, bit ? len : 0, bit ? 0 : len, 0);
-                block_done += len;
-            }
-        }
-    }
-    assert(!block_done);
-    return true;
-}
-
-bool blockstore_impl_t::verify_journal_checksums(uint8_t *csums, uint32_t offset,
-    iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb)
-{
-    uint32_t block_csum = 0;
-    uint32_t block_num = 0;
-    uint32_t block_done = offset%dsk.csum_block_size;
-    for (int i = 0; i < n_iov; i++)
-    {
-        uint32_t len = iov[i].iov_len;
-        auto buf = (uint8_t*)iov[i].iov_base;
-        while (block_done+len >= dsk.csum_block_size)
-        {
-            auto cur_len = dsk.csum_block_size-block_done;
-            block_csum = crc32c(block_csum, buf, cur_len);
-            if (block_csum != ((uint32_t*)csums)[block_num])
-            {
-                if (bad_block_cb)
-                    bad_block_cb(block_num*dsk.csum_block_size, block_csum, ((uint32_t*)csums)[block_num]);
-                else
-                    return false;
-            }
-            block_num++;
-            buf += cur_len;
-            len -= cur_len;
-            block_done = block_csum = 0;
-        }
-        if (len > 0)
-        {
-            block_csum = crc32c(block_csum, buf, len);
-            block_done += len;
-        }
-    }
-    if (block_done > 0 && block_csum != ((uint32_t*)csums)[block_num])
-    {
-        if (bad_block_cb)
-            bad_block_cb(block_num*dsk.csum_block_size, block_csum, ((uint32_t*)csums)[block_num]);
-        else
-            return false;
-    }
-    return true;
-}
-
-bool blockstore_impl_t::verify_clean_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, uint8_t *dyn_data, bool from_journal,
-    iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb)
-{
-    uint32_t offset = clean_loc % dsk.data_block_size;
-    if (from_journal)
-        return verify_padded_checksums(dyn_data, dyn_data + dsk.clean_entry_bitmap_size, offset, iov, n_iov, bad_block_cb);
-    clean_loc = (clean_loc >> dsk.block_order) << dsk.block_order;
-    if (!dyn_data)
-    {
-        assert(inmemory_meta);
-        dyn_data = get_clean_entry_bitmap(clean_loc, 0);
-    }
-    return verify_padded_checksums(dyn_data, dyn_data + 2*dsk.clean_entry_bitmap_size, offset, iov, n_iov, bad_block_cb);
 }

 void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op)
@@ -826,139 +284,6 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op
    }
    if (PRIV(op)->pending_ops == 0)
    {
-        if (dsk.csum_block_size)
-        {
-            // verify checksums if required
-            auto & rv = PRIV(op)->read_vec;
-            void *meta_block = NULL;
-            if (dsk.csum_block_size > dsk.bitmap_granularity)
-            {
-                for (int i = rv.size()-1; i >= 0 && (rv[i].copy_flags & COPY_BUF_CSUM_FILL); i--)
-                {
-                    if (rv[i].copy_flags & COPY_BUF_META_BLOCK)
-                    {
-                        // Metadata read. Skip
-                        assert(!meta_block);
-                        meta_block = rv[i].buf;
-                        rv[i].buf = NULL;
-                        continue;
-                    }
-                    struct iovec *iov = (struct iovec*)((uint8_t*)rv[i].buf + (rv[i].len & 0xFFFFFFFF));
-                    int n_iov = rv[i].len >> 32;
-                    bool ok = true;
-                    if (rv[i].copy_flags & COPY_BUF_JOURNAL)
-                    {
-                        // SMALL_WRITE from journal
-                        verify_journal_checksums(
-                            rv[i].csum_buf, rv[i].offset, iov, n_iov,
-                            [&](uint32_t bad_block, uint32_t calc_csum, uint32_t stored_csum)
-                            {
-                                ok = false;
-                                printf(
-                                    "Checksum mismatch in object %lx:%lx v%lu in journal at 0x%lx, checksum block #%u: got %08x, expected %08x\n",
-                                    op->oid.inode, op->oid.stripe, op->version,
-                                    rv[i].disk_offset, bad_block / dsk.csum_block_size, calc_csum, stored_csum
-                                );
-                            }
-                        );
-                    }
-                    else
-                    {
-                        // BIG_WRITE from journal or clean data
-                        // Do not verify checksums if the data location is/was mutated by flushers
-                        auto & uo = used_clean_objects.at((rv[i].disk_offset >> dsk.block_order) << dsk.block_order);
-                        if (!uo.was_changed)
-                        {
-                            verify_clean_padded_checksums(
-                                op, rv[i].disk_offset, rv[i].csum_buf, (rv[i].copy_flags & COPY_BUF_JOURNALED_BIG), iov, n_iov,
-                                [&](uint32_t bad_block, uint32_t calc_csum, uint32_t stored_csum)
-                                {
-                                    ok = false;
-                                    printf(
-                                        "Checksum mismatch in object %lx:%lx v%lu in %s data at 0x%lx, checksum block #%u: got %08x, expected %08x\n",
-                                        op->oid.inode, op->oid.stripe, op->version,
-                                        (rv[i].copy_flags & COPY_BUF_JOURNALED_BIG ? "redirect-write" : "clean"),
-                                        rv[i].disk_offset, bad_block / dsk.csum_block_size, calc_csum, stored_csum
-                                    );
-                                }
-                            );
-                        }
-                    }
-                    if (!ok)
-                    {
-                        op->retval = -EDOM;
-                    }
-                    free(rv[i].buf);
-                    rv[i].buf = NULL;
-                    if (rv[i].dyn_data && --(*rv[i].dyn_data) == 0) // refcount
-                    {
-                        free(rv[i].dyn_data);
-                        rv[i].dyn_data = NULL;
-                    }
-                }
-            }
-            else
-            {
-                for (auto & vec: rv)
-                {
-                    if (vec.copy_flags & COPY_BUF_META_BLOCK)
-                    {
-                        // Metadata read. Skip
-                        assert(!meta_block);
-                        meta_block = vec.buf;
-                        vec.buf = NULL;
-                        continue;
-                    }
-                    if (vec.csum_buf)
-                    {
-                        uint32_t *csum = (uint32_t*)vec.csum_buf;
-                        for (size_t p = 0; p < vec.len; p += dsk.csum_block_size, csum++)
-                        {
-                            if (crc32c(0, (uint8_t*)op->buf + vec.offset - op->offset + p, dsk.csum_block_size) != *csum)
-                            {
-                                // checksum error
-                                printf(
-                                    "Checksum mismatch in object %lx:%lx v%lu in %s area at offset 0x%lx+0x%lx: %08x vs %08x\n",
-                                    op->oid.inode, op->oid.stripe, op->version,
-                                    (vec.copy_flags & COPY_BUF_JOURNAL) ? "journal" : "data", vec.disk_offset, p,
-                                    crc32c(0, (uint8_t*)op->buf + vec.offset - op->offset + p, dsk.csum_block_size), *csum
-                                );
-                                op->retval = -EDOM;
-                                break;
-                            }
-                        }
-                    }
-                    if (vec.dyn_data && --(*vec.dyn_data) == 0) // refcount
-                    {
-                        free(vec.dyn_data);
-                        vec.dyn_data = NULL;
-                    }
-                }
-            }
-            if (meta_block)
-            {
-                // Free after checking
-                free(meta_block);
-                meta_block = NULL;
-            }
-        }
-        if (PRIV(op)->clean_block_used)
-        {
-            // Release clean data block
-            auto uo_it = used_clean_objects.find(PRIV(op)->clean_block_used);
-            if (uo_it != used_clean_objects.end())
-            {
-                uo_it->second.refs--;
-                if (uo_it->second.refs <= 0)
-                {
-                    if (uo_it->second.was_freed)
-                    {
-                        data_alloc->set(PRIV(op)->clean_block_used, false);
-                    }
-                    used_clean_objects.erase(uo_it);
-                }
-            }
-        }
        if (!journal.inmemory)
        {
            // Release journal sector usage
@@ -999,9 +324,8 @@ int blockstore_impl_t::read_bitmap(object_id oid, uint64_t target_version, void
                    *result_version = dirty_it->first.version;
                if (bitmap)
                {
-                    void *dyn_ptr = (alloc_dyn_data
-                        ? (uint8_t*)dirty_it->second.dyn_data + sizeof(int) : (uint8_t*)&dirty_it->second.dyn_data);
-                    memcpy(bitmap, dyn_ptr, dsk.clean_entry_bitmap_size);
+                    void *bmp_ptr = (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
+                    memcpy(bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
                }
                return 0;
            }
--- a/src/blockstore_rollback.cpp
+++ b/src/blockstore_rollback.cpp
@@ -218,7 +218,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
        auto used = --journal.used_sectors[dirty_it->second.journal_sector];
 #ifdef BLOCKSTORE_DEBUG
        printf(
-            "remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
+            "remove usage of journal offset %08lx by %lx:%lx v%lu (%d refs)\n", dirty_it->second.journal_sector,
            dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used
        );
 #endif
@@ -227,7 +227,11 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
            journal.used_sectors.erase(dirty_it->second.journal_sector);
            flusher->mark_trim_possible();
        }
-        free_dirty_dyn_data(dirty_it->second);
+        if (dsk.clean_entry_bitmap_size > sizeof(void*))
+        {
+            free(dirty_it->second.bitmap);
+            dirty_it->second.bitmap = NULL;
+        }
        if (dirty_it == dirty_start)
        {
            break;
@@ -236,18 +240,3 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
    }
    dirty_db.erase(dirty_start, dirty_end);
 }
-
-void blockstore_impl_t::free_dirty_dyn_data(dirty_entry & e)
-{
-    if (e.dyn_data)
-    {
-        if (alloc_dyn_data &&
-            --*((int*)e.dyn_data) == 0) // refcount
-        {
-            // dyn_data contains the bitmap and checksums
-            // free it if it doesn't refer to the in-memory journal
-            free(e.dyn_data);
-        }
-        e.dyn_data = NULL;
-    }
-}
--- a/src/blockstore_sync.cpp
+++ b/src/blockstore_sync.cpp
@@ -16,6 +16,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
 {
    if (immediate_commit == IMMEDIATE_ALL)
    {
+        // We can return immediately because sync is only dequeued after all previous writes
        op->retval = 0;
        FINISH_OP(op);
        return 2;
@@ -26,6 +27,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
        unsynced_big_write_count -= unsynced_big_writes.size();
        PRIV(op)->sync_big_writes.swap(unsynced_big_writes);
        PRIV(op)->sync_small_writes.swap(unsynced_small_writes);
+        PRIV(op)->sync_small_checked = 0;
+        PRIV(op)->sync_big_checked = 0;
        unsynced_big_writes.clear();
        unsynced_small_writes.clear();
        if (PRIV(op)->sync_big_writes.size() > 0)
@@ -75,23 +78,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
        // 2nd step: Data device is synced, prepare & write journal entries
        // Check space in the journal and journal memory buffers
        blockstore_journal_check_t space_check(this);
-        if (dsk.csum_block_size)
-        {
-            // More complex check because all journal entries have different lengths
-            int left = PRIV(op)->sync_big_writes.size();
-            for (auto & sbw: PRIV(op)->sync_big_writes)
-            {
-                left--;
-                auto & dirty_entry = dirty_db.at(sbw);
-                uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
-                if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size,
-                    left == 0 ? JOURNAL_STABILIZE_RESERVATION : 0))
-                {
-                    return 0;
-                }
-            }
-        }
-        else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
+        if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
        {
            return 0;
@@ -103,17 +90,16 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
        int s = 0;
        while (it != PRIV(op)->sync_big_writes.end())
        {
-            auto & dirty_entry = dirty_db.at(*it);
-            uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
-            if (!journal.entry_fits(sizeof(journal_entry_big_write) + dyn_size) &&
+            if (!journal.entry_fits(sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size) &&
                journal.sector_info[journal.cur_sector].dirty)
            {
                prepare_journal_sector_write(journal.cur_sector, op);
                s++;
            }
+            auto & dirty_entry = dirty_db.at(*it);
            journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
                journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
-                sizeof(journal_entry_big_write) + dyn_size
+                sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size
            );
            dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
            journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
@@ -129,8 +115,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
            je->offset = dirty_entry.offset;
            je->len = dirty_entry.len;
            je->location = dirty_entry.location;
-            memcpy((void*)(je+1), (alloc_dyn_data
-                ? (uint8_t*)dirty_entry.dyn_data+sizeof(int) : (uint8_t*)&dirty_entry.dyn_data), dyn_size);
+            memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*)
+                ? dirty_entry.bitmap : &dirty_entry.bitmap), dsk.clean_entry_bitmap_size);
            je->crc32 = je_crc32((journal_entry*)je);
            journal.crc32_last = je->crc32;
            it++;
--- a/src/blockstore_write.cpp
+++ b/src/blockstore_write.cpp
@@ -8,21 +8,12 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
    // Check or assign version number
    bool found = false, deleted = false, unsynced = false, is_del = (op->opcode == BS_OP_DELETE);
    bool wait_big = false, wait_del = false;
-    void *dyn = NULL;
-    if (is_del)
-    {
-        op->len = 0;
-    }
-    size_t dyn_size = dsk.dirty_dyn_size(op->offset, op->len);
-    if (!is_del && alloc_dyn_data)
-    {
-        // FIXME: Working with `dyn_data` has to be refactored somehow but I first have to decide how :)
-        // +sizeof(int) = refcount
-        dyn = calloc_or_die(1, dyn_size+sizeof(int));
-        *((int*)dyn) = 1;
-    }
-    uint8_t *dyn_ptr = (uint8_t*)(alloc_dyn_data ? dyn+sizeof(int) : &dyn);
+    void *bmp = NULL;
    uint64_t version = 1;
+    if (!is_del && dsk.clean_entry_bitmap_size > sizeof(void*))
+    {
+        bmp = calloc_or_die(1, dsk.clean_entry_bitmap_size);
+    }
    if (dirty_db.size() > 0)
    {
        auto dirty_it = dirty_db.upper_bound((obj_ver_id){
@@ -42,9 +33,10 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
                : ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
            if (!is_del && !deleted)
            {
-                void *dyn_from = alloc_dyn_data
-                    ? (uint8_t*)dirty_it->second.dyn_data + sizeof(int) : (uint8_t*)&dirty_it->second.dyn_data;
-                memcpy(dyn_ptr, dyn_from, dsk.clean_entry_bitmap_size);
+                if (dsk.clean_entry_bitmap_size > sizeof(void*))
+                    memcpy(bmp, dirty_it->second.bitmap, dsk.clean_entry_bitmap_size);
+                else
+                    bmp = dirty_it->second.bitmap;
            }
        }
    }
@@ -58,7 +50,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
            if (!is_del)
            {
                void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, dsk.clean_entry_bitmap_size);
-                memcpy(dyn_ptr, bmp_ptr, dsk.clean_entry_bitmap_size);
+                memcpy((dsk.clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, dsk.clean_entry_bitmap_size);
            }
        }
        else
@@ -120,16 +112,15 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
            printf("Write %lx:%lx v%lu requested, but we already have v%lu\n", op->oid.inode, op->oid.stripe, op->version, version);
 #endif
            op->retval = -EEXIST;
-            if (!is_del && alloc_dyn_data)
+            if (!is_del && dsk.clean_entry_bitmap_size > sizeof(void*))
            {
-                free(dyn);
+                free(bmp);
            }
            return false;
        }
    }
-    bool imm = (op->len < dsk.data_block_size ? (immediate_commit != IMMEDIATE_NONE) : (immediate_commit == IMMEDIATE_ALL));
-    if (wait_big && !is_del && !deleted && op->len < dsk.data_block_size && !imm ||
-        !imm && unsynced_queued_ops >= autosync_writes)
+    if (wait_big && !is_del && !deleted && op->len < dsk.data_block_size &&
+        immediate_commit != IMMEDIATE_ALL)
    {
        // Issue an additional sync so that the previous big write can reach the journal
        blockstore_op_t *sync_op = new blockstore_op_t;
@@ -140,8 +131,6 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
        };
        enqueue_op(sync_op);
    }
-    else if (!imm)
-        unsynced_queued_ops++;
 #ifdef BLOCKSTORE_DEBUG
    if (is_del)
        printf("Delete %lx:%lx v%lu\n", op->oid.inode, op->oid.stripe, op->version);
@@ -169,50 +158,26 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
        if (op->bitmap)
        {
            // Only allow to overwrite part of the object bitmap respective to the write's offset/len
+            uint8_t *bmp_ptr = (uint8_t*)(dsk.clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp);
            uint32_t bit = op->offset/dsk.bitmap_granularity;
            uint32_t bits_left = op->len/dsk.bitmap_granularity;
            while (!(bit % 8) && bits_left >= 8)
            {
                // Copy bytes
-                dyn_ptr[bit/8] = ((uint8_t*)op->bitmap)[bit/8];
+                bmp_ptr[bit/8] = ((uint8_t*)op->bitmap)[bit/8];
                bit += 8;
                bits_left -= 8;
            }
            while (bits_left > 0)
            {
                // Copy bits
-                dyn_ptr[bit/8] = (dyn_ptr[bit/8] & ~(1 << (bit%8)))
+                bmp_ptr[bit/8] = (bmp_ptr[bit/8] & ~(1 << (bit%8)))
                    | (((uint8_t*)op->bitmap)[bit/8] & (1 << bit%8));
                bit++;
                bits_left--;
            }
        }
    }
-    // Calculate checksums
-    // FIXME: Allow to receive checksums from outside?
-    if (!is_del && dsk.data_csum_type && op->len > 0)
-    {
-        uint32_t *data_csums = (uint32_t*)(dyn_ptr + dsk.clean_entry_bitmap_size);
-        uint32_t start = op->offset / dsk.csum_block_size;
-        uint32_t end = (op->offset+op->len-1) / dsk.csum_block_size;
-        auto fn = state & BS_ST_BIG_WRITE ? crc32c_pad : crc32c_nopad;
-        if (start == end)
-            data_csums[0] = fn(0, op->buf, op->len, op->offset - start*dsk.csum_block_size, end*dsk.csum_block_size - (op->offset+op->len));
-        else
-        {
-            // First block
-            data_csums[0] = fn(0, op->buf, dsk.csum_block_size*(start+1)-op->offset, op->offset - start*dsk.csum_block_size, 0);
-            // Intermediate blocks
-            for (uint32_t i = start+1; i < end; i++)
-                data_csums[i-start] = crc32c(0, (uint8_t*)op->buf + dsk.csum_block_size*i-op->offset, dsk.csum_block_size);
-            // Last block
-            data_csums[end-start] = fn(
-                0, (uint8_t*)op->buf + end*dsk.csum_block_size - op->offset,
-                op->offset+op->len - end*dsk.csum_block_size,
-                0, (end+1)*dsk.csum_block_size - (op->offset+op->len)
-            );
-        }
-    }
    dirty_db.emplace((obj_ver_id){
        .oid = op->oid,
        .version = op->version,
@@ -223,7 +188,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
        .offset = is_del ? 0 : op->offset,
        .len = is_del ? 0 : op->len,
        .journal_sector = 0,
-        .dyn_data = dyn,
+        .bitmap = bmp,
    });
    return true;
 }
@@ -232,7 +197,8 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
 {
    while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
    {
-        free_dirty_dyn_data(dirty_it->second);
+        if (dsk.clean_entry_bitmap_size > sizeof(void*))
+            free(dirty_it->second.bitmap);
        dirty_db.erase(dirty_it++);
    }
    bool found = false;
@@ -314,7 +280,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
    {
        blockstore_journal_check_t space_check(this);
        if (!space_check.check_available(op, unsynced_big_write_count + 1,
-            sizeof(journal_entry_big_write) + dsk.clean_dyn_size,
+            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size,
            (dirty_it->second.state & BS_ST_INSTANT) ? JOURNAL_INSTANT_RESERVATION : JOURNAL_STABILIZE_RESERVATION))
        {
            return 0;
@@ -381,6 +347,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
            sqe, dsk.data_fd, PRIV(op)->iov_zerofill, vcnt, dsk.data_offset + (loc << dsk.block_order) + op->offset - stripe_offset
        );
        PRIV(op)->pending_ops = 1;
+        PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
        if (immediate_commit != IMMEDIATE_ALL)
        {
            // Increase the counter, but don't save into unsynced_writes yet (can't sync until the write is finished)
@@ -396,13 +363,12 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
    {
        // Small (journaled) write
        // First check if the journal has sufficient space
-        uint64_t dyn_size = dsk.dirty_dyn_size(op->offset, op->len);
        blockstore_journal_check_t space_check(this);
        if (unsynced_big_write_count &&
            !space_check.check_available(op, unsynced_big_write_count,
-                sizeof(journal_entry_big_write) + dsk.clean_dyn_size, 0)
+                sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0)
            || !space_check.check_available(op, 1,
-                sizeof(journal_entry_small_write) + dyn_size,
+                sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size,
                op->len + ((dirty_it->second.state & BS_ST_INSTANT) ? JOURNAL_INSTANT_RESERVATION : JOURNAL_STABILIZE_RESERVATION)))
        {
            return 0;
@@ -411,21 +377,27 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
        BS_SUBMIT_CHECK_SQES(
            // Write current journal sector only if it's dirty and full, or in the immediate_commit mode
            (immediate_commit != IMMEDIATE_NONE ||
-                !journal.entry_fits(sizeof(journal_entry_small_write) + dyn_size) ? 1 : 0) +
+                !journal.entry_fits(sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size) ? 1 : 0) +
            (op->len > 0 ? 1 : 0)
        );
        write_iodepth++;
        // Got SQEs. Prepare previous journal sector write if required
        auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
-        if (immediate_commit == IMMEDIATE_NONE &&
-            !journal.entry_fits(sizeof(journal_entry_small_write) + dyn_size))
+        if (immediate_commit == IMMEDIATE_NONE)
        {
-            prepare_journal_sector_write(journal.cur_sector, op);
+            if (!journal.entry_fits(sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size))
+            {
+                prepare_journal_sector_write(journal.cur_sector, op);
+            }
+            else
+            {
+                PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
+            }
        }
        // Then pre-fill journal entry
        journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
            journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
-            sizeof(journal_entry_small_write) + dyn_size
+            sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size
        );
        dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
        journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
@@ -459,9 +431,8 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
        je->offset = op->offset;
        je->len = op->len;
        je->data_offset = journal.next_free;
-        je->crc32_data = dsk.csum_block_size ? 0 : crc32c(0, op->buf, op->len);
-        memcpy((void*)(je+1), (alloc_dyn_data
-            ? (uint8_t*)dirty_it->second.dyn_data+sizeof(int) : (uint8_t*)&dirty_it->second.dyn_data), dyn_size);
+        je->crc32_data = crc32c(0, op->buf, op->len);
+        memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), dsk.clean_entry_bitmap_size);
        je->crc32 = je_crc32((journal_entry*)je);
        journal.crc32_last = je->crc32;
        if (immediate_commit != IMMEDIATE_NONE)
@@ -530,9 +501,9 @@ resume_2:
            .version = op->version,
        });
        assert(dirty_it != dirty_db.end());
-        uint64_t dyn_size = dsk.dirty_dyn_size(op->offset, op->len);
        blockstore_journal_check_t space_check(this);
-        if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size,
+        if (!space_check.check_available(op, 1,
+            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size,
            ((dirty_it->second.state & BS_ST_INSTANT) ? JOURNAL_INSTANT_RESERVATION : JOURNAL_STABILIZE_RESERVATION)))
        {
            return 0;
@@ -540,7 +511,7 @@ resume_2:
        BS_SUBMIT_CHECK_SQES(1);
        journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
            journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
-            sizeof(journal_entry_big_write) + dyn_size
+            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size
        );
        dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
        journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
@@ -556,8 +527,7 @@ resume_2:
        je->offset = op->offset;
        je->len = op->len;
        je->location = dirty_it->second.location;
-        memcpy((void*)(je+1), (alloc_dyn_data
-            ? (uint8_t*)dirty_it->second.dyn_data+sizeof(int) : (uint8_t*)&dirty_it->second.dyn_data), dyn_size);
+        memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), dsk.clean_entry_bitmap_size);
        je->crc32 = je_crc32((journal_entry*)je);
        journal.crc32_last = je->crc32;
        prepare_journal_sector_write(journal.cur_sector, op);
@@ -691,13 +661,8 @@ void blockstore_impl_t::release_journal_sectors(blockstore_op_t *op)
        uint64_t s = PRIV(op)->min_flushed_journal_sector;
        while (1)
        {
-            if (!journal.sector_info[s-1].dirty && journal.sector_info[s-1].flush_count == 0)
+            if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
            {
-                if (s == (1+journal.cur_sector))
-                {
-                    // Forcibly move to the next sector and move dirty position
-                    journal.in_sector_pos = journal.block_size;
-                }
                // We know for sure that we won't write into this sector anymore
                uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size;
                if (new_ds >= journal.len)
@@ -746,11 +711,17 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
    }
    write_iodepth++;
    // Prepare journal sector write
-    if (immediate_commit == IMMEDIATE_NONE &&
-        (dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
-        journal.sector_info[journal.cur_sector].dirty)
+    if (immediate_commit == IMMEDIATE_NONE)
    {
-        prepare_journal_sector_write(journal.cur_sector, op);
+        if ((dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
+            journal.sector_info[journal.cur_sector].dirty)
+        {
+            prepare_journal_sector_write(journal.cur_sector, op);
+        }
+        else
+        {
+            PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
+        }
    }
    // Pre-fill journal entry
    journal_entry_del *je = (journal_entry_del*)prefill_single_journal_entry(
--- a/src/cli.cpp
+++ b/src/cli.cpp
@@ -349,7 +349,6 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
                p->ringloop->wait();
        }
        // Destroy the client
-        p->cli->flush();
        delete p->cli;
        delete p->epmgr;
        delete p->ringloop;
@@ -358,8 +357,6 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
        p->ringloop = NULL;
    }
    // Print result
-    fflush(stderr);
-    fflush(stdout);
    if (p->json_output && !result.data.is_null())
    {
        printf("%s\n", result.data.dump().c_str());
--- a/src/cli_alloc_osd.cpp
+++ b/src/cli_alloc_osd.cpp
@@ -77,8 +77,8 @@ struct alloc_osd_t
                    std::string key = base64_decode(kv["key"].string_value());
                    osd_num_t cur_osd;
                    char null_byte = 0;
-                    int scanned = sscanf(key.c_str() + parent->cli->st_cli.etcd_prefix.length(), "/osd/stats/%lu%c", &cur_osd, &null_byte);
-                    if (scanned != 1 || !cur_osd)
+                    sscanf(key.c_str() + parent->cli->st_cli.etcd_prefix.length(), "/osd/stats/%lu%c", &cur_osd, &null_byte);
+                    if (!cur_osd || null_byte != 0)
                    {
                        fprintf(stderr, "Invalid key in etcd: %s\n", key.c_str());
                        continue;
--- a/src/cli_df.cpp
+++ b/src/cli_df.cpp
@@ -67,8 +67,8 @@ resume_1:
            // pool ID
            pool_id_t pool_id;
            char null_byte = 0;
-            int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
-            if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
+            sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
+            if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
            {
                fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
                continue;
@@ -82,8 +82,8 @@ resume_1:
            // osd ID
            osd_num_t osd_num;
            char null_byte = 0;
-            int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/osd/stats/%lu%c", &osd_num, &null_byte);
-            if (scanned != 1 || !osd_num || osd_num >= POOL_ID_MAX)
+            sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/osd/stats/%lu%c", &osd_num, &null_byte);
+            if (!osd_num || osd_num >= POOL_ID_MAX || null_byte != 0)
            {
                fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
                continue;
--- a/src/cli_ls.cpp
+++ b/src/cli_ls.cpp
@@ -56,15 +56,14 @@ struct image_lister_t
            {
                continue;
            }
-            auto pool_it = parent->cli->st_cli.pool_config.find(INODE_POOL(ic.second.num));
-            bool good_pool = pool_it != parent->cli->st_cli.pool_config.end();
+            auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(ic.second.num));
            auto item = json11::Json::object {
                { "name", ic.second.name },
                { "size", ic.second.size },
                { "used_size", 0 },
                { "readonly", ic.second.readonly },
                { "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
-                { "pool_name", good_pool ? pool_it->second.name : "? (ID:"+std::to_string(INODE_POOL(ic.second.num))+")" },
+                { "pool_name", pool_cfg.name },
                { "inode_num", INODE_NO_POOL(ic.second.num) },
                { "inode_id", ic.second.num },
            };
@@ -133,8 +132,8 @@ resume_1:
            // pool ID
            pool_id_t pool_id;
            char null_byte = 0;
-            int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
-            if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
+            sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
+            if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
            {
                fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
                continue;
@@ -149,9 +148,9 @@ resume_1:
            pool_id_t pool_id;
            inode_t only_inode_num;
            char null_byte = 0;
-            int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(),
+            sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(),
                "/inode/stats/%u/%lu%c", &pool_id, &only_inode_num, &null_byte);
-            if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || INODE_POOL(only_inode_num) != 0)
+            if (!pool_id || pool_id >= POOL_ID_MAX || INODE_POOL(only_inode_num) != 0 || null_byte != 0)
            {
                fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
                continue;
@@ -248,8 +247,6 @@ resume_1:
        if (state == 1)
            goto resume_1;
        get_list();
-        if (state == 100)
-            return;
        if (show_stats)
        {
 resume_1:
@@ -272,7 +269,7 @@ resume_1:
            { "key", "name" },
            { "title", "NAME" },
        });
-        if (list_pool_name == "")
+        if (!list_pool_id)
        {
            cols.push_back(json11::Json::object{
                { "key", "pool_name" },
@@ -379,18 +376,16 @@ resume_1:

 std::string print_table(json11::Json items, json11::Json header, bool use_esc)
 {
-    int header_sizes[header.array_items().size()];
    std::vector<int> sizes;
    for (int i = 0; i < header.array_items().size(); i++)
    {
-        header_sizes[i] = utf8_length(header[i]["title"].string_value());
-        sizes.push_back(header_sizes[i]);
+        sizes.push_back(header[i]["title"].string_value().length());
    }
    for (auto & item: items.array_items())
    {
        for (int i = 0; i < header.array_items().size(); i++)
        {
-            int l = utf8_length(item[header[i]["key"].string_value()].as_string());
+            int l = item[header[i]["key"].string_value()].as_string().length();
            sizes[i] = sizes[i] < l ? l : sizes[i];
        }
    }
@@ -402,7 +397,7 @@ std::string print_table(json11::Json items, json11::Json header, bool use_esc)
            // Separator
            str += "  ";
        }
-        int pad = sizes[i]-header_sizes[i];
+        int pad = sizes[i]-header[i]["title"].string_value().length();
        if (header[i]["right"].bool_value())
        {
            // Align right
@@ -430,7 +425,7 @@ std::string print_table(json11::Json items, json11::Json header, bool use_esc)
                // Separator
                str += "  ";
            }
-            int pad = sizes[i] - utf8_length(item[header[i]["key"].string_value()].as_string());
+            int pad = sizes[i] - item[header[i]["key"].string_value()].as_string().length();
            if (header[i]["right"].bool_value())
            {
                // Align right
--- a/src/cli_merge.cpp
+++ b/src/cli_merge.cpp
@@ -53,7 +53,6 @@ struct snap_merger_t
    std::map<inode_t, std::vector<uint64_t>> layer_lists;
    std::map<inode_t, uint64_t> layer_block_size;
    std::map<inode_t, uint64_t> layer_list_pos;
-    std::vector<snap_rw_op_t*> continue_rwo, continue_rwo2;
    int in_flight = 0;
    uint64_t last_fsync_offset = 0;
    uint64_t last_written_offset = 0;
@@ -305,12 +304,6 @@ struct snap_merger_t
        oit = merge_offsets.begin();
    resume_5:
        // Now read, overwrite and optionally delete offsets one by one
-        continue_rwo2.swap(continue_rwo);
-        for (auto rwo: continue_rwo2)
-        {
-            next_write(rwo);
-        }
-        continue_rwo2.clear();
        while (in_flight < parent->iodepth*parent->parallel_osds &&
            oit != merge_offsets.end() && !rwo_error.size())
        {
@@ -471,8 +464,7 @@ struct snap_merger_t
                rwo->error_offset = op->offset;
                rwo->error_read = true;
            }
-            continue_rwo.push_back(rwo);
-            parent->ringloop->wakeup();
+            next_write(rwo);
        };
        parent->cli->execute(op);
    }
@@ -552,9 +544,11 @@ struct snap_merger_t
            }
            // Increment CAS version
            rwo->op.version = subop->version;
+            if (use_cas)
+                next_write(rwo);
+            else
+                autofree_op(rwo);
            delete subop;
-            continue_rwo.push_back(rwo);
-            parent->ringloop->wakeup();
        };
        parent->cli->execute(subop);
    }
--- a/src/cli_modify.cpp
+++ b/src/cli_modify.cpp
@@ -13,7 +13,7 @@ struct image_changer_t
    std::string image_name;
    std::string new_name;
    uint64_t new_size = 0;
-    bool force_size = false, inc_size = false;
+    bool force_size = false;
    bool set_readonly = false, set_readwrite = false, force = false;
    // interval between fsyncs
    int fsync_interval = 128;
@@ -81,14 +81,14 @@ struct image_changer_t
        }
        if ((!set_readwrite || !cfg.readonly) &&
            (!set_readonly || cfg.readonly) &&
-            (!new_size && !force_size || cfg.size == new_size || cfg.size >= new_size && inc_size) &&
+            (!new_size && !force_size || cfg.size == new_size) &&
            (new_name == "" || new_name == image_name))
        {
            result = (cli_result_t){ .text = "No change" };
            state = 100;
            return;
        }
-        if ((new_size != 0 || force_size) && (cfg.size < new_size || !inc_size))
+        if (new_size != 0 || force_size)
        {
            if (cfg.size >= new_size)
            {
@@ -233,7 +233,6 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_modify(json11::Json cfg)
    changer->new_name = cfg["rename"].string_value();
    changer->new_size = parse_size(cfg["resize"].as_string());
    changer->force_size = cfg["force_size"].bool_value();
-    changer->inc_size = cfg["inc_size"].bool_value();
    changer->force = cfg["force"].bool_value();
    changer->set_readonly = cfg["readonly"].bool_value();
    changer->set_readwrite = cfg["readwrite"].bool_value();
--- a/src/cli_rm.cpp
+++ b/src/cli_rm.cpp
@@ -384,8 +384,8 @@ resume_100:
                pool_id_t pool_id = 0;
                inode_t inode = 0;
                char null_byte = 0;
-                int scanned = sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
-                if (scanned != 2 || !inode)
+                sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
+                if (!inode || null_byte != 0)
                {
                    result = (cli_result_t){ .err = EIO, .text = "Bad key returned from etcd: "+kv.key };
                    state = 100;
--- a/src/cli_status.cpp
+++ b/src/cli_status.cpp
@@ -132,8 +132,8 @@ resume_2:
            auto kv = parent->cli->st_cli.parse_etcd_kv(osd_stats[i]);
            osd_num_t stat_osd_num = 0;
            char null_byte = 0;
-            int scanned = sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.size(), "/osd/stats/%lu%c", &stat_osd_num, &null_byte);
-            if (scanned != 1 || !stat_osd_num)
+            sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.size(), "/osd/stats/%lu%c", &stat_osd_num, &null_byte);
+            if (!stat_osd_num || null_byte != 0)
            {
                fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
                continue;
--- a/src/cluster_client.cpp
+++ b/src/cluster_client.cpp
@@ -3,13 +3,21 @@

 #include <stdexcept>
 #include <assert.h>
-#include "cluster_client_impl.h"
-#include "http_client.h" // json_is_true
+#include "cluster_client.h"
+
+#define SCRAP_BUFFER_SIZE 4*1024*1024
+#define PART_SENT 1
+#define PART_DONE 2
+#define PART_ERROR 4
+#define PART_RETRY 8
+#define CACHE_DIRTY 1
+#define CACHE_FLUSHING 2
+#define CACHE_REPEATING 3
+#define OP_FLUSH_BUFFER 0x02
+#define OP_IMMEDIATE_COMMIT 0x04

 cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
 {
-    wb = new writeback_cache_t();
-
    cli_config = config.object_items();
    file_config = osd_messenger_t::read_config(config);
    config = osd_messenger_t::merge_configs(cli_config, file_config, etcd_global_config, {});
@@ -29,14 +37,20 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
            continue_lists();
            continue_raw_ops(peer_osd);
        }
-        else
+        else if (dirty_buffers.size())
        {
            // peer_osd just dropped connection
            // determine WHICH dirty_buffers are now obsolete and repeat them
-            if (wb->repeat_ops_for(this, peer_osd) > 0)
+            for (auto & wr: dirty_buffers)
            {
-                continue_ops();
+                if (affects_osd(wr.first.inode, wr.first.stripe, wr.second.len, peer_osd) &&
+                    wr.second.state != CACHE_REPEATING)
+                {
+                    // FIXME: Flush in larger parts
+                    flush_buffer(wr.first, &wr.second);
+                }
            }
+            continue_ops();
        }
    };
    msgr.exec_op = [this](osd_op_t *op)
@@ -64,14 +78,16 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd

 cluster_client_t::~cluster_client_t()
 {
-    msgr.repeer_pgs = [this](osd_num_t){};
+    for (auto bp: dirty_buffers)
+    {
+        free(bp.second.buf);
+    }
+    dirty_buffers.clear();
    if (ringloop)
    {
        ringloop->unregister_consumer(&consumer);
    }
    free(scrap_buffer);
-    delete wb;
-    wb = NULL;
 }

 cluster_op_t::~cluster_op_t()
@@ -120,19 +136,6 @@ void cluster_client_t::init_msgr()
    }
 }

-void cluster_client_t::unshift_op(cluster_op_t *op)
-{
-    op->next = op_queue_head;
-    if (op_queue_head)
-    {
-        op_queue_head->prev = op;
-        op_queue_head = op;
-    }
-    else
-        op_queue_tail = op_queue_head = op;
-    inc_wait(op->opcode, op->flags, op->next, 1);
-}
-
 void cluster_client_t::calc_wait(cluster_op_t *op)
 {
    op->prev_wait = 0;
@@ -153,7 +156,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
    {
        for (auto prev = op->prev; prev; prev = prev->prev)
        {
-            if (prev->opcode == OSD_OP_SYNC || prev->opcode == OSD_OP_WRITE && (!(prev->flags & OP_IMMEDIATE_COMMIT) || enable_writeback))
+            if (prev->opcode == OSD_OP_SYNC || prev->opcode == OSD_OP_WRITE && !(prev->flags & OP_IMMEDIATE_COMMIT))
            {
                op->prev_wait++;
            }
@@ -163,7 +166,21 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
    }
    else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) */
    {
-        continue_rw(op);
+        for (auto prev = op_queue_head; prev && prev != op; prev = prev->next)
+        {
+            if (prev->opcode == OSD_OP_WRITE && (prev->flags & OP_FLUSH_BUFFER))
+            {
+                op->prev_wait++;
+            }
+            else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ ||
+                prev->opcode == OSD_OP_READ_BITMAP || prev->opcode == OSD_OP_READ_CHAIN_BITMAP)
+            {
+                // Flushes are always in the beginning (we're scanning from the beginning of the queue)
+                break;
+            }
+        }
+        if (!op->prev_wait)
+            continue_rw(op);
    }
 }

@@ -174,8 +191,10 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
        while (next)
        {
            auto n2 = next->next;
-            if (next->opcode == OSD_OP_SYNC && (!(flags & OP_IMMEDIATE_COMMIT) || enable_writeback) ||
-                next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER))
+            if (next->opcode == OSD_OP_SYNC && !(flags & OP_IMMEDIATE_COMMIT) ||
+                next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
+                (next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP ||
+                    next->opcode == OSD_OP_READ_CHAIN_BITMAP) && (flags & OP_FLUSH_BUFFER))
            {
                next->prev_wait += inc;
                assert(next->prev_wait >= 0);
@@ -226,37 +245,13 @@ void cluster_client_t::erase_op(cluster_op_t *op)
        op_queue_tail = op->prev;
    op->next = op->prev = NULL;
    if (flags & OP_FLUSH_BUFFER)
-    {
-        // Completed flushes change writeback buffer states,
-        // so the callback should be run before inc_wait()
-        // which may continue following SYNCs, but these SYNCs
-        // should know about the changed buffer state
-        // This is ugly but this is the way we do it
        std::function<void(cluster_op_t*)>(op->callback)(op);
-    }
-    if (!(flags & OP_IMMEDIATE_COMMIT) || enable_writeback)
-    {
+    if (!(flags & OP_IMMEDIATE_COMMIT))
        inc_wait(opcode, flags, next, -1);
-    }
+    // Call callback at the end to avoid inconsistencies in prev_wait
+    // if the callback adds more operations itself
    if (!(flags & OP_FLUSH_BUFFER))
-    {
-        // Call callback at the end to avoid inconsistencies in prev_wait
-        // if the callback adds more operations itself
        std::function<void(cluster_op_t*)>(op->callback)(op);
-    }
-    if (flags & OP_FLUSH_BUFFER)
-    {
-        int i = 0;
-        while (i < wb->writeback_overflow.size() && wb->writebacks_active < client_max_writeback_iodepth)
-        {
-            execute_internal(wb->writeback_overflow[i]);
-            i++;
-        }
-        if (i > 0)
-        {
-            wb->writeback_overflow.erase(wb->writeback_overflow.begin(), wb->writeback_overflow.begin()+i);
-        }
-    }
 }

 void cluster_client_t::continue_ops(bool up_retry)
@@ -300,7 +295,6 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
 {
    this->etcd_global_config = etcd_global_config;
    config = osd_messenger_t::merge_configs(cli_config, file_config, etcd_global_config, {});
-    // client_max_dirty_bytes/client_dirty_limit
    if (config.find("client_max_dirty_bytes") != config.end())
    {
        client_max_dirty_bytes = config["client_max_dirty_bytes"].uint64_value();
@@ -316,34 +310,11 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
    {
        client_max_dirty_bytes = DEFAULT_CLIENT_MAX_DIRTY_BYTES;
    }
-    // client_max_dirty_ops
    client_max_dirty_ops = config["client_max_dirty_ops"].uint64_value();
    if (!client_max_dirty_ops)
    {
        client_max_dirty_ops = DEFAULT_CLIENT_MAX_DIRTY_OPS;
    }
-    // client_enable_writeback
-    enable_writeback = json_is_true(config["client_enable_writeback"]) &&
-        json_is_true(config["client_writeback_allowed"]);
-    // client_max_buffered_bytes
-    client_max_buffered_bytes = config["client_max_buffered_bytes"].uint64_value();
-    if (!client_max_buffered_bytes)
-    {
-        client_max_buffered_bytes = DEFAULT_CLIENT_MAX_BUFFERED_BYTES;
-    }
-    // client_max_buffered_ops
-    client_max_buffered_ops = config["client_max_buffered_ops"].uint64_value();
-    if (!client_max_buffered_ops)
-    {
-        client_max_buffered_ops = DEFAULT_CLIENT_MAX_BUFFERED_OPS;
-    }
-    // client_max_writeback_iodepth
-    client_max_writeback_iodepth = config["client_max_writeback_iodepth"].uint64_value();
-    if (!client_max_writeback_iodepth)
-    {
-        client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH;
-    }
-    // up_wait_retry_interval
    up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value();
    if (!up_wait_retry_interval)
    {
@@ -403,8 +374,6 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes

 bool cluster_client_t::get_immediate_commit(uint64_t inode)
 {
-    if (enable_writeback)
-        return false;
    pool_id_t pool_id = INODE_POOL(inode);
    if (!pool_id)
        return true;
@@ -439,41 +408,6 @@ void cluster_client_t::on_ready(std::function<void(void)> fn)
    }
 }

-bool cluster_client_t::flush()
-{
-    if (!ringloop)
-    {
-        if (wb->writeback_queue.size())
-        {
-            wb->start_writebacks(this, 0);
-            cluster_op_t *sync = new cluster_op_t;
-            sync->opcode = OSD_OP_SYNC;
-            sync->callback = [this](cluster_op_t *sync)
-            {
-                delete sync;
-            };
-            execute(sync);
-        }
-        return op_queue_head == NULL;
-    }
-    bool sync_done = false;
-    cluster_op_t *sync = new cluster_op_t;
-    sync->opcode = OSD_OP_SYNC;
-    sync->callback = [this, &sync_done](cluster_op_t *sync)
-    {
-        delete sync;
-        sync_done = true;
-    };
-    execute(sync);
-    while (!sync_done)
-    {
-        ringloop->loop();
-        if (!sync_done)
-            ringloop->wait();
-    }
-    return true;
-}
-
 /**
 * How writes are synced when immediate_commit is false
 *
@@ -494,9 +428,6 @@ bool cluster_client_t::flush()
 * 3) if yes, send all SYNCs. otherwise, leave current SYNC as is.
 * 4) if any of them fail due to disconnected peers, repeat SYNC after repeating all writes
 * 5) if any of them fail due to other errors, fail the SYNC operation
- *
- * If writeback caching is turned on and writeback limit is not exhausted:
- * data is just copied and the write is confirmed to the client.
 */
 void cluster_client_t::execute(cluster_op_t *op)
 {
@@ -512,73 +443,67 @@ void cluster_client_t::execute(cluster_op_t *op)
        offline_ops.push_back(op);
        return;
    }
-    op->flags = op->flags & OSD_OP_IGNORE_READONLY; // the only allowed flag
-    execute_internal(op);
-}
-
-void cluster_client_t::execute_internal(cluster_op_t *op)
-{
    op->cur_inode = op->inode;
    op->retval = 0;
-    // check alignment, readonly flag and so on
-    if (!check_rw(op))
+    op->flags = op->flags & OSD_OP_IGNORE_READONLY; // single allowed flag
+    if (op->opcode != OSD_OP_SYNC)
    {
-        return;
-    }
-    if (op->opcode == OSD_OP_WRITE && enable_writeback && !(op->flags & OP_FLUSH_BUFFER) &&
-        !op->version /* FIXME no CAS writeback */)
-    {
-        if (wb->writebacks_active >= client_max_writeback_iodepth)
+        pool_id_t pool_id = INODE_POOL(op->cur_inode);
+        if (!pool_id)
        {
-            // Writeback queue is full, postpone the operation
-            wb->writeback_overflow.push_back(op);
+            op->retval = -EINVAL;
+            std::function<void(cluster_op_t*)>(op->callback)(op);
            return;
        }
-        // Just copy and acknowledge the operation
-        wb->copy_write(op, CACHE_DIRTY);
-        while (wb->writeback_bytes + op->len > client_max_buffered_bytes || wb->writeback_queue_size > client_max_buffered_ops)
+        auto pool_it = st_cli.pool_config.find(pool_id);
+        if (pool_it == st_cli.pool_config.end() || pool_it->second.real_pg_count == 0)
        {
-            // Initiate some writeback (asynchronously)
-            wb->start_writebacks(this, 1);
+            // Pools are loaded, but this one is unknown
+            op->retval = -EINVAL;
+            std::function<void(cluster_op_t*)>(op->callback)(op);
+            return;
+        }
+        // Check alignment
+        if (!op->len && (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP || op->opcode == OSD_OP_WRITE) ||
+            op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
+        {
+            op->retval = -EINVAL;
+            std::function<void(cluster_op_t*)>(op->callback)(op);
+            return;
+        }
+        if (pool_it->second.immediate_commit == IMMEDIATE_ALL)
+        {
+            op->flags |= OP_IMMEDIATE_COMMIT;
        }
-        op->retval = op->len;
-        std::function<void(cluster_op_t*)>(op->callback)(op);
-        return;
    }
    if (op->opcode == OSD_OP_WRITE && !(op->flags & OP_IMMEDIATE_COMMIT))
    {
-        if (!(op->flags & OP_FLUSH_BUFFER))
-        {
-            wb->copy_write(op, CACHE_WRITTEN);
-        }
        if (dirty_bytes >= client_max_dirty_bytes || dirty_ops >= client_max_dirty_ops)
        {
            // Push an extra SYNC operation to flush previous writes
            cluster_op_t *sync_op = new cluster_op_t;
            sync_op->opcode = OSD_OP_SYNC;
-            sync_op->flags = OP_FLUSH_BUFFER;
            sync_op->callback = [](cluster_op_t* sync_op)
            {
                delete sync_op;
            };
-            execute_internal(sync_op);
+            sync_op->prev = op_queue_tail;
+            if (op_queue_tail)
+            {
+                op_queue_tail->next = sync_op;
+                op_queue_tail = sync_op;
+            }
+            else
+                op_queue_tail = op_queue_head = sync_op;
+            dirty_bytes = 0;
+            dirty_ops = 0;
+            calc_wait(sync_op);
        }
        dirty_bytes += op->len;
        dirty_ops++;
    }
    else if (op->opcode == OSD_OP_SYNC)
    {
-        // Flush the whole write-back queue first
-        if (!(op->flags & OP_FLUSH_BUFFER) && wb->writeback_overflow.size() > 0)
-        {
-            // Writeback queue is full, postpone the operation
-            wb->writeback_overflow.push_back(op);
-            return;
-        }
-        if (wb->writeback_queue.size())
-        {
-            wb->start_writebacks(this, 0);
-        }
        dirty_bytes = 0;
        dirty_ops = 0;
    }
@@ -590,7 +515,7 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
    }
    else
        op_queue_tail = op_queue_head = op;
-    if (!(op->flags & OP_IMMEDIATE_COMMIT) || enable_writeback)
+    if (!(op->flags & OP_IMMEDIATE_COMMIT))
        calc_wait(op);
    else
    {
@@ -601,52 +526,6 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
    }
 }

-bool cluster_client_t::check_rw(cluster_op_t *op)
-{
-    if (op->opcode == OSD_OP_SYNC)
-    {
-        return true;
-    }
-    pool_id_t pool_id = INODE_POOL(op->cur_inode);
-    if (!pool_id)
-    {
-        op->retval = -EINVAL;
-        std::function<void(cluster_op_t*)>(op->callback)(op);
-        return false;
-    }
-    auto pool_it = st_cli.pool_config.find(pool_id);
-    if (pool_it == st_cli.pool_config.end() || pool_it->second.real_pg_count == 0)
-    {
-        // Pools are loaded, but this one is unknown
-        op->retval = -EINVAL;
-        std::function<void(cluster_op_t*)>(op->callback)(op);
-        return false;
-    }
-    // Check alignment
-    if (!op->len && (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP || op->opcode == OSD_OP_WRITE) ||
-        op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
-    {
-        op->retval = -EINVAL;
-        std::function<void(cluster_op_t*)>(op->callback)(op);
-        return false;
-    }
-    if (pool_it->second.immediate_commit == IMMEDIATE_ALL)
-    {
-        op->flags |= OP_IMMEDIATE_COMMIT;
-    }
-    if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && !(op->flags & OSD_OP_IGNORE_READONLY))
-    {
-        auto ino_it = st_cli.inode_config.find(op->inode);
-        if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
-        {
-            op->retval = -EROFS;
-            std::function<void(cluster_op_t*)>(op->callback)(op);
-            return false;
-        }
-    }
-    return true;
-}
-
 void cluster_client_t::execute_raw(osd_num_t osd_num, osd_op_t *op)
 {
    auto fd_it = msgr.osd_peer_fds.find(osd_num);
@@ -664,6 +543,114 @@ void cluster_client_t::execute_raw(osd_num_t osd_num, osd_op_t *op)
    }
 }

+void cluster_client_t::copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers)
+{
+    // Save operation for replay when one of PGs goes out of sync
+    // (primary OSD drops our connection in this case)
+    auto dirty_it = dirty_buffers.lower_bound((object_id){
+        .inode = op->inode,
+        .stripe = op->offset,
+    });
+    while (dirty_it != dirty_buffers.begin())
+    {
+        dirty_it--;
+        if (dirty_it->first.inode != op->inode ||
+            (dirty_it->first.stripe + dirty_it->second.len) <= op->offset)
+        {
+            dirty_it++;
+            break;
+        }
+    }
+    uint64_t pos = op->offset, len = op->len, iov_idx = 0, iov_pos = 0;
+    while (len > 0)
+    {
+        uint64_t new_len = 0;
+        if (dirty_it == dirty_buffers.end())
+        {
+            new_len = len;
+        }
+        else if (dirty_it->first.inode != op->inode || dirty_it->first.stripe > pos)
+        {
+            new_len = dirty_it->first.stripe - pos;
+            if (new_len > len)
+            {
+                new_len = len;
+            }
+        }
+        if (new_len > 0)
+        {
+            dirty_it = dirty_buffers.emplace_hint(dirty_it, (object_id){
+                .inode = op->inode,
+                .stripe = pos,
+            }, (cluster_buffer_t){
+                .buf = malloc_or_die(new_len),
+                .len = new_len,
+            });
+        }
+        // FIXME: Split big buffers into smaller ones on overwrites. But this will require refcounting
+        dirty_it->second.state = CACHE_DIRTY;
+        uint64_t cur_len = (dirty_it->first.stripe + dirty_it->second.len - pos);
+        if (cur_len > len)
+        {
+            cur_len = len;
+        }
+        while (cur_len > 0 && iov_idx < op->iov.count)
+        {
+            unsigned iov_len = (op->iov.buf[iov_idx].iov_len - iov_pos);
+            if (iov_len <= cur_len)
+            {
+                memcpy((uint8_t*)dirty_it->second.buf + pos - dirty_it->first.stripe,
+                    (uint8_t*)op->iov.buf[iov_idx].iov_base + iov_pos, iov_len);
+                pos += iov_len;
+                len -= iov_len;
+                cur_len -= iov_len;
+                iov_pos = 0;
+                iov_idx++;
+            }
+            else
+            {
+                memcpy((uint8_t*)dirty_it->second.buf + pos - dirty_it->first.stripe,
+                    (uint8_t*)op->iov.buf[iov_idx].iov_base + iov_pos, cur_len);
+                pos += cur_len;
+                len -= cur_len;
+                iov_pos += cur_len;
+                cur_len = 0;
+            }
+        }
+        dirty_it++;
+    }
+}
+
+void cluster_client_t::flush_buffer(const object_id & oid, cluster_buffer_t *wr)
+{
+    wr->state = CACHE_REPEATING;
+    cluster_op_t *op = new cluster_op_t;
+    op->flags = OSD_OP_IGNORE_READONLY|OP_FLUSH_BUFFER;
+    op->opcode = OSD_OP_WRITE;
+    op->cur_inode = op->inode = oid.inode;
+    op->offset = oid.stripe;
+    op->len = wr->len;
+    op->iov.push_back(wr->buf, wr->len);
+    op->callback = [wr](cluster_op_t* op)
+    {
+        if (wr->state == CACHE_REPEATING)
+        {
+            wr->state = CACHE_DIRTY;
+        }
+        delete op;
+    };
+    op->next = op_queue_head;
+    if (op_queue_head)
+    {
+        op_queue_head->prev = op;
+        op_queue_head = op;
+    }
+    else
+        op_queue_tail = op_queue_head = op;
+    inc_wait(op->opcode, op->flags, op->next, 1);
+    continue_rw(op);
+}
+
 int cluster_client_t::continue_rw(cluster_op_t *op)
 {
    if (op->state == 0)
@@ -672,7 +659,27 @@ int cluster_client_t::continue_rw(cluster_op_t *op)
        goto resume_1;
    else if (op->state == 2)
        goto resume_2;
+    else if (op->state == 3)
+        goto resume_3;
 resume_0:
+    if (op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE)
+    {
+        if (!(op->flags & OSD_OP_IGNORE_READONLY))
+        {
+            auto ino_it = st_cli.inode_config.find(op->inode);
+            if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
+            {
+                op->retval = -EINVAL;
+                erase_op(op);
+                return 1;
+            }
+        }
+        if (op->opcode == OSD_OP_WRITE && !(op->flags & OP_IMMEDIATE_COMMIT) && !(op->flags & OP_FLUSH_BUFFER))
+        {
+            copy_write(op, dirty_buffers);
+        }
+    }
+resume_1:
    // Slice the operation into parts
    slice_rw(op);
    op->needs_reslice = false;
@@ -683,9 +690,9 @@ resume_0:
        erase_op(op);
        return 1;
    }
-resume_1:
+resume_2:
    // Send unsent parts, if they're not subject to change
-    op->state = 2;
+    op->state = 3;
    if (op->needs_reslice)
    {
        for (int i = 0; i < op->parts.size(); i++)
@@ -695,7 +702,7 @@ resume_1:
                op->retval = -EPIPE;
            }
        }
-        goto resume_2;
+        goto resume_3;
    }
    for (int i = 0; i < op->parts.size(); i++)
    {
@@ -716,18 +723,18 @@ resume_1:
                        });
                    }
                }
-                op->state = 1;
+                op->state = 2;
            }
        }
    }
-    if (op->state == 1)
+    if (op->state == 2)
    {
        return 0;
    }
-resume_2:
+resume_3:
    if (op->inflight_count > 0)
    {
-        op->state = 2;
+        op->state = 3;
        return 0;
    }
    if (op->done_count >= op->parts.size())
@@ -755,7 +762,7 @@ resume_2:
                op->cur_inode = ino_it->second.parent_id;
                op->parts.clear();
                op->done_count = 0;
-                goto resume_0;
+                goto resume_1;
            }
        }
        op->retval = op->len;
@@ -767,8 +774,7 @@ resume_2:
        erase_op(op);
        return 1;
    }
-    else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
-        op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC)
+    else if (op->retval != 0 && op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC)
    {
        // Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
        // FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal
@@ -783,7 +789,7 @@ resume_2:
        {
            op->parts.clear();
            op->done_count = 0;
-            goto resume_0;
+            goto resume_1;
        }
        else
        {
@@ -794,7 +800,7 @@ resume_2:
                    op->parts[i].flags = PART_RETRY;
                }
            }
-            goto resume_1;
+            goto resume_2;
        }
    }
    return 0;
@@ -868,11 +874,6 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
    int iov_idx = 0;
    size_t iov_pos = 0;
    int i = 0;
-    // We also have to return reads from CACHE_REPEATING buffers - they are not
-    // guaranteed to be present on target OSDs at the moment of repeating
-    // And we're also free to return data from other cached buffers just
-    // because it's faster
-    bool dirty_copied = wb->read_from_cache(op, pool_cfg.bitmap_granularity);
    for (uint64_t stripe = first_stripe; stripe <= last_stripe; stripe += pg_block_size)
    {
        pg_num_t pg_num = (stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
@@ -880,8 +881,7 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
        uint64_t end = (op->offset + op->len) > (stripe + pg_block_size)
            ? (stripe + pg_block_size) : (op->offset + op->len);
        op->parts[i].iov.reset();
-        op->parts[i].flags = 0;
-        if (op->cur_inode != op->inode || op->opcode == OSD_OP_READ && dirty_copied)
+        if (op->cur_inode != op->inode)
        {
            // Read remaining parts from upper layers
            uint64_t prev = begin, cur = begin;
@@ -918,10 +918,7 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
            else
                add_iov(cur-prev, skip_prev, op, iov_idx, iov_pos, op->parts[i].iov, scrap_buffer, scrap_buffer_size);
            if (end == begin)
-            {
                op->done_count++;
-                op->parts[i].flags = PART_DONE;
-            }
        }
        else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
        {
@@ -933,6 +930,7 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
            op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
        op->parts[i].pg_num = pg_num;
        op->parts[i].osd_num = 0;
+        op->parts[i].flags = 0;
        i++;
    }
 }
@@ -1044,7 +1042,13 @@ int cluster_client_t::continue_sync(cluster_op_t *op)
            do_it++;
    }
    // Post sync to affected OSDs
-    wb->fsync_start();
+    for (auto & prev_op: dirty_buffers)
+    {
+        if (prev_op.second.state == CACHE_DIRTY)
+        {
+            prev_op.second.state = CACHE_FLUSHING;
+        }
+    }
    op->parts.resize(dirty_osds.size());
    op->retval = 0;
    {
@@ -1069,7 +1073,13 @@ resume_1:
    }
    if (op->retval != 0)
    {
-        wb->fsync_error();
+        for (auto uw_it = dirty_buffers.begin(); uw_it != dirty_buffers.end(); uw_it++)
+        {
+            if (uw_it->second.state == CACHE_FLUSHING)
+            {
+                uw_it->second.state = CACHE_DIRTY;
+            }
+        }
        if (op->retval == -EPIPE || op->retval == -EIO || op->retval == -ENOSPC)
        {
            // Retry later
@@ -1083,7 +1093,16 @@ resume_1:
    }
    else
    {
-        wb->fsync_ok();
+        for (auto uw_it = dirty_buffers.begin(); uw_it != dirty_buffers.end(); )
+        {
+            if (uw_it->second.state == CACHE_FLUSHING)
+            {
+                free(uw_it->second.buf);
+                dirty_buffers.erase(uw_it++);
+            }
+            else
+                uw_it++;
+        }
    }
    erase_op(op);
    return 1;
--- a/src/cluster_client.h
+++ b/src/cluster_client.h
@@ -8,9 +8,6 @@

 #define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
 #define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
-#define DEFAULT_CLIENT_MAX_BUFFERED_BYTES 32*1024*1024
-#define DEFAULT_CLIENT_MAX_BUFFERED_OPS 1024
-#define DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH 256
 #define INODE_LIST_DONE 1
 #define INODE_LIST_HAS_UNSTABLE 2
 #define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
@@ -67,12 +64,17 @@ protected:
    cluster_op_t *prev = NULL, *next = NULL;
    int prev_wait = 0;
    friend class cluster_client_t;
-    friend class writeback_cache_t;
+};
+
+struct cluster_buffer_t
+{
+    void *buf;
+    uint64_t len;
+    int state;
 };

 struct inode_list_t;
 struct inode_list_osd_t;
-class writeback_cache_t;

 // FIXME: Split into public and private interfaces
 class cluster_client_t
@@ -81,23 +83,16 @@ class cluster_client_t
    ring_loop_t *ringloop;

    std::map<pool_id_t, uint64_t> pg_counts;
-    // client_max_dirty_* is actually "max unsynced", for the case when immediate_commit is off
+    // FIXME: Implement inmemory_commit mode. Note that it requires to return overlapping reads from memory.
    uint64_t client_max_dirty_bytes = 0;
    uint64_t client_max_dirty_ops = 0;
-    // writeback improves (1) small consecutive writes and (2) Q1 writes without fsync
-    bool enable_writeback = false;
-    // client_max_buffered_* is the real "dirty limit" - maximum amount of writes buffered in memory
-    uint64_t client_max_buffered_bytes = 0;
-    uint64_t client_max_buffered_ops = 0;
-    uint64_t client_max_writeback_iodepth = 0;
-
    int log_level;
    int up_wait_retry_interval = 500; // ms

    int retry_timeout_id = 0;
    std::vector<cluster_op_t*> offline_ops;
    cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
-    writeback_cache_t *wb = NULL;
+    std::map<object_id, cluster_buffer_t> dirty_buffers;
    std::set<osd_num_t> dirty_osds;
    uint64_t dirty_bytes = 0, dirty_ops = 0;

@@ -127,10 +122,10 @@ public:
    void execute_raw(osd_num_t osd_num, osd_op_t *op);
    bool is_ready();
    void on_ready(std::function<void(void)> fn);
-    bool flush();

    bool get_immediate_commit(uint64_t inode);

+    static void copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers);
    void continue_ops(bool up_retry = false);
    inode_list_t *list_inode_start(inode_t inode,
        std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
@@ -143,14 +138,12 @@ public:

 protected:
    bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
+    void flush_buffer(const object_id & oid, cluster_buffer_t *wr);
    void on_load_config_hook(json11::Json::object & config);
    void on_load_pgs_hook(bool success);
    void on_change_hook(std::map<std::string, etcd_kv_t> & changes);
    void on_change_osd_state_hook(uint64_t peer_osd);
-    void execute_internal(cluster_op_t *op);
-    void unshift_op(cluster_op_t *op);
    int continue_rw(cluster_op_t *op);
-    bool check_rw(cluster_op_t *op);
    void slice_rw(cluster_op_t *op);
    bool try_send(cluster_op_t *op, int i);
    int continue_sync(cluster_op_t *op);
@@ -164,6 +157,4 @@ protected:
    void continue_listing(inode_list_t *lst);
    void send_list(inode_list_osd_t *cur_list);
    void continue_raw_ops(osd_num_t peer_osd);
-
-    friend class writeback_cache_t;
 };
--- a/src/cluster_client_impl.h
+++ b/src/cluster_client_impl.h
@@ -1,57 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
-
-#pragma once
-
-#include "cluster_client.h"
-
-#define SCRAP_BUFFER_SIZE 4*1024*1024
-#define PART_SENT 1
-#define PART_DONE 2
-#define PART_ERROR 4
-#define PART_RETRY 8
-#define CACHE_DIRTY 1
-#define CACHE_WRITTEN 2
-#define CACHE_FLUSHING 3
-#define CACHE_REPEATING 4
-#define OP_FLUSH_BUFFER 0x02
-#define OP_IMMEDIATE_COMMIT 0x04
-
-struct cluster_buffer_t
-{
-    uint8_t *buf;
-    uint64_t len;
-    int state;
-    uint64_t flush_id;
-    uint64_t *refcnt;
-};
-
-typedef std::map<object_id, cluster_buffer_t>::iterator dirty_buf_it_t;
-
-class writeback_cache_t
-{
-public:
-    uint64_t writeback_bytes = 0;
-    int writeback_queue_size = 0;
-    int writebacks_active = 0;
-    uint64_t last_flush_id = 0;
-
-    std::map<object_id, cluster_buffer_t> dirty_buffers;
-    std::vector<cluster_op_t*> writeback_overflow;
-    std::vector<object_id> writeback_queue;
-    std::multimap<uint64_t, uint64_t*> flushed_buffers; // flush_id => refcnt
-
-    ~writeback_cache_t();
-    dirty_buf_it_t find_dirty(uint64_t inode, uint64_t offset);
-    bool is_left_merged(dirty_buf_it_t dirty_it);
-    bool is_right_merged(dirty_buf_it_t dirty_it);
-    bool is_merged(const dirty_buf_it_t & dirty_it);
-    void copy_write(cluster_op_t *op, int state);
-    int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd);
-    void start_writebacks(cluster_client_t *cli, int count);
-    bool read_from_cache(cluster_op_t *op, uint32_t bitmap_granularity);
-    void flush_buffers(cluster_client_t *cli, dirty_buf_it_t from_it, dirty_buf_it_t to_it);
-    void fsync_start();
-    void fsync_error();
-    void fsync_ok();
-};
--- a/src/cluster_client_wb.cpp
+++ b/src/cluster_client_wb.cpp
@@ -1,498 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
-
-#include <cassert>
-
-#include "cluster_client_impl.h"
-
-writeback_cache_t::~writeback_cache_t()
-{
-    for (auto & bp: dirty_buffers)
-    {
-        if (!--(*bp.second.refcnt))
-        {
-            free(bp.second.refcnt); // refcnt is allocated with the buffer
-        }
-    }
-    dirty_buffers.clear();
-}
-
-dirty_buf_it_t writeback_cache_t::find_dirty(uint64_t inode, uint64_t offset)
-{
-    auto dirty_it = dirty_buffers.lower_bound((object_id){
-        .inode = inode,
-        .stripe = offset,
-    });
-    while (dirty_it != dirty_buffers.begin())
-    {
-        dirty_it--;
-        if (dirty_it->first.inode != inode ||
-            (dirty_it->first.stripe + dirty_it->second.len) <= offset)
-        {
-            dirty_it++;
-            break;
-        }
-    }
-    return dirty_it;
-}
-
-bool writeback_cache_t::is_left_merged(dirty_buf_it_t dirty_it)
-{
-    if (dirty_it != dirty_buffers.begin())
-    {
-        auto prev_it = dirty_it;
-        prev_it--;
-        if (prev_it->first.inode == dirty_it->first.inode &&
-            prev_it->first.stripe+prev_it->second.len == dirty_it->first.stripe &&
-            prev_it->second.state == CACHE_DIRTY)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-bool writeback_cache_t::is_right_merged(dirty_buf_it_t dirty_it)
-{
-    auto next_it = dirty_it;
-    next_it++;
-    if (next_it != dirty_buffers.end() &&
-        next_it->first.inode == dirty_it->first.inode &&
-        next_it->first.stripe == dirty_it->first.stripe+dirty_it->second.len &&
-        next_it->second.state == CACHE_DIRTY)
-    {
-        return true;
-    }
-    return false;
-}
-
-bool writeback_cache_t::is_merged(const dirty_buf_it_t & dirty_it)
-{
-    return is_left_merged(dirty_it) || is_right_merged(dirty_it);
-}
-
-void writeback_cache_t::copy_write(cluster_op_t *op, int state)
-{
-    // Save operation for replay when one of PGs goes out of sync
-    // (primary OSD drops our connection in this case)
-    // ...or just save it for writeback if write buffering is enabled
-    if (op->len == 0)
-    {
-        return;
-    }
-    auto dirty_it = find_dirty(op->inode, op->offset);
-    auto new_end = op->offset + op->len;
-    while (dirty_it != dirty_buffers.end() &&
-        dirty_it->first.inode == op->inode &&
-        dirty_it->first.stripe < op->offset+op->len)
-    {
-        assert(dirty_it->first.stripe + dirty_it->second.len > op->offset);
-        // Remove overlapping part(s) of buffers
-        auto old_end = dirty_it->first.stripe + dirty_it->second.len;
-        if (dirty_it->first.stripe < op->offset)
-        {
-            if (old_end > new_end)
-            {
-                // Split into end and start
-                dirty_it->second.len = op->offset - dirty_it->first.stripe;
-                dirty_it = dirty_buffers.emplace_hint(dirty_it, (object_id){
-                    .inode = op->inode,
-                    .stripe = new_end,
-                }, (cluster_buffer_t){
-                    .buf = dirty_it->second.buf + new_end - dirty_it->first.stripe,
-                    .len = old_end - new_end,
-                    .state = dirty_it->second.state,
-                    .flush_id = dirty_it->second.flush_id,
-                    .refcnt = dirty_it->second.refcnt,
-                });
-                (*dirty_it->second.refcnt)++;
-                if (dirty_it->second.state == CACHE_DIRTY)
-                {
-                    writeback_bytes -= op->len;
-                    writeback_queue_size++;
-                }
-                break;
-            }
-            else
-            {
-                // Only leave the beginning
-                if (dirty_it->second.state == CACHE_DIRTY)
-                {
-                    writeback_bytes -= old_end - op->offset;
-                    if (is_left_merged(dirty_it) && !is_right_merged(dirty_it))
-                    {
-                        writeback_queue_size++;
-                    }
-                }
-                dirty_it->second.len = op->offset - dirty_it->first.stripe;
-                dirty_it++;
-            }
-        }
-        else if (old_end > new_end)
-        {
-            // Only leave the end
-            if (dirty_it->second.state == CACHE_DIRTY)
-            {
-                writeback_bytes -= new_end - dirty_it->first.stripe;
-                if (!is_left_merged(dirty_it) && is_right_merged(dirty_it))
-                {
-                    writeback_queue_size++;
-                }
-            }
-            auto new_dirty_it = dirty_buffers.emplace_hint(dirty_it, (object_id){
-                .inode = op->inode,
-                .stripe = new_end,
-            }, (cluster_buffer_t){
-                .buf = dirty_it->second.buf + new_end - dirty_it->first.stripe,
-                .len = old_end - new_end,
-                .state = dirty_it->second.state,
-                .flush_id = dirty_it->second.flush_id,
-                .refcnt = dirty_it->second.refcnt,
-            });
-            dirty_buffers.erase(dirty_it);
-            dirty_it = new_dirty_it;
-            break;
-        }
-        else
-        {
-            // Remove the whole buffer
-            if (dirty_it->second.state == CACHE_DIRTY && !is_merged(dirty_it))
-            {
-                writeback_bytes -= dirty_it->second.len;
-                assert(writeback_queue_size > 0);
-                writeback_queue_size--;
-            }
-            if (!--(*dirty_it->second.refcnt))
-            {
-                free(dirty_it->second.refcnt);
-            }
-            dirty_buffers.erase(dirty_it++);
-        }
-    }
-    // Overlapping buffers are removed, just insert the new one
-    uint64_t *refcnt = (uint64_t*)malloc_or_die(sizeof(uint64_t) + op->len);
-    uint8_t *buf = (uint8_t*)refcnt + sizeof(uint64_t);
-    *refcnt = 1;
-    dirty_it = dirty_buffers.emplace_hint(dirty_it, (object_id){
-        .inode = op->inode,
-        .stripe = op->offset,
-    }, (cluster_buffer_t){
-        .buf = buf,
-        .len = op->len,
-        .state = state,
-        .refcnt = refcnt,
-    });
-    if (state == CACHE_DIRTY)
-    {
-        writeback_bytes += op->len;
-        // Track consecutive write-back operations
-        if (!is_merged(dirty_it))
-        {
-            // <writeback_queue> is OK to contain more than actual number of consecutive
-            // requests as long as it doesn't miss anything. But <writeback_queue_size>
-            // is always calculated correctly.
-            writeback_queue_size++;
-            writeback_queue.push_back((object_id){
-                .inode = op->inode,
-                .stripe = op->offset,
-            });
-        }
-    }
-    uint64_t pos = 0, len = op->len, iov_idx = 0;
-    while (len > 0 && iov_idx < op->iov.count)
-    {
-        auto & iov = op->iov.buf[iov_idx];
-        memcpy(buf + pos, iov.iov_base, iov.iov_len);
-        pos += iov.iov_len;
-        iov_idx++;
-    }
-}
-
-int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd)
-{
-    int repeated = 0;
-    if (dirty_buffers.size())
-    {
-        // peer_osd just dropped connection
-        // determine WHICH dirty_buffers are now obsolete and repeat them
-        for (auto wr_it = dirty_buffers.begin(), flush_it = wr_it, last_it = wr_it; ; )
-        {
-            bool end = wr_it == dirty_buffers.end();
-            bool flush_this = !end && wr_it->second.state != CACHE_REPEATING &&
-                cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
-            if (flush_it != wr_it && (end || !flush_this ||
-                wr_it->first.inode != flush_it->first.inode ||
-                wr_it->first.stripe != last_it->first.stripe+last_it->second.len))
-            {
-                repeated++;
-                flush_buffers(cli, flush_it, wr_it);
-                flush_it = wr_it;
-            }
-            if (end)
-                break;
-            last_it = wr_it;
-            wr_it++;
-            if (!flush_this)
-                flush_it = wr_it;
-        }
-    }
-    return repeated;
-}
-
-void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from_it, dirty_buf_it_t to_it)
-{
-    auto prev_it = to_it;
-    prev_it--;
-    bool is_writeback = from_it->second.state == CACHE_DIRTY;
-    cluster_op_t *op = new cluster_op_t;
-    op->flags = OSD_OP_IGNORE_READONLY|OP_FLUSH_BUFFER;
-    op->opcode = OSD_OP_WRITE;
-    op->cur_inode = op->inode = from_it->first.inode;
-    op->offset = from_it->first.stripe;
-    op->len = prev_it->first.stripe + prev_it->second.len - from_it->first.stripe;
-    uint32_t calc_len = 0;
-    uint64_t flush_id = ++last_flush_id;
-    for (auto it = from_it; it != to_it; it++)
-    {
-        it->second.state = CACHE_REPEATING;
-        it->second.flush_id = flush_id;
-        (*it->second.refcnt)++;
-        flushed_buffers.emplace(flush_id, it->second.refcnt);
-        op->iov.push_back(it->second.buf, it->second.len);
-        calc_len += it->second.len;
-    }
-    assert(calc_len == op->len);
-    writebacks_active++;
-    op->callback = [this, cli, flush_id](cluster_op_t* op)
-    {
-        // Buffer flushes should be always retried, regardless of the error,
-        // so they should never result in an error here
-        assert(op->retval == op->len);
-        for (auto fl_it = flushed_buffers.find(flush_id);
-            fl_it != flushed_buffers.end() && fl_it->first == flush_id; )
-        {
-            if (!--(*fl_it->second)) // refcnt
-            {
-                free(fl_it->second);
-            }
-            flushed_buffers.erase(fl_it++);
-        }
-        for (auto dirty_it = find_dirty(op->inode, op->offset);
-            dirty_it != dirty_buffers.end() && dirty_it->first.inode == op->inode &&
-            dirty_it->first.stripe < op->offset+op->len; dirty_it++)
-        {
-            if (dirty_it->second.flush_id == flush_id && dirty_it->second.state == CACHE_REPEATING)
-            {
-                dirty_it->second.flush_id = 0;
-                dirty_it->second.state = CACHE_WRITTEN;
-            }
-        }
-        delete op;
-        writebacks_active--;
-        // We can't call execute_internal because it affects an invalid copy of the list here
-        // (erase_op remembers `next` after writeback callback)
-    };
-    if (is_writeback)
-    {
-        cli->execute_internal(op);
-    }
-    else
-    {
-        // Insert repeated flushes into the beginning
-        cli->unshift_op(op);
-        cli->continue_rw(op);
-    }
-}
-
-void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
-{
-    if (!writeback_queue.size())
-    {
-        return;
-    }
-    std::vector<object_id> queue_copy;
-    queue_copy.swap(writeback_queue);
-    int started = 0, i = 0;
-    for (i = 0; i < queue_copy.size() && (!count || started < count); i++)
-    {
-        object_id & req = queue_copy[i];
-        auto dirty_it = find_dirty(req.inode, req.stripe);
-        if (dirty_it == dirty_buffers.end() ||
-            dirty_it->first.inode != req.inode ||
-            dirty_it->second.state != CACHE_DIRTY)
-        {
-            continue;
-        }
-        auto from_it = dirty_it;
-        uint64_t off = dirty_it->first.stripe;
-        while (from_it != dirty_buffers.begin())
-        {
-            from_it--;
-            if (from_it->second.state != CACHE_DIRTY ||
-                from_it->first.inode != req.inode ||
-                from_it->first.stripe+from_it->second.len != off)
-            {
-                from_it++;
-                break;
-            }
-            off = from_it->first.stripe;
-        }
-        off = dirty_it->first.stripe + dirty_it->second.len;
-        auto to_it = dirty_it;
-        to_it++;
-        while (to_it != dirty_buffers.end())
-        {
-            if (to_it->second.state != CACHE_DIRTY ||
-                to_it->first.inode != req.inode ||
-                to_it->first.stripe != off)
-            {
-                break;
-            }
-            off = to_it->first.stripe + to_it->second.len;
-            to_it++;
-        }
-        started++;
-        assert(writeback_queue_size > 0);
-        writeback_queue_size--;
-        writeback_bytes -= off - from_it->first.stripe;
-        flush_buffers(cli, from_it, to_it);
-    }
-    queue_copy.erase(queue_copy.begin(), queue_copy.begin()+i);
-    if (writeback_queue.size())
-    {
-        queue_copy.insert(queue_copy.end(), writeback_queue.begin(), writeback_queue.end());
-    }
-    queue_copy.swap(writeback_queue);
-}
-
-static void copy_to_op(cluster_op_t *op, uint64_t offset, uint8_t *buf, uint64_t len, uint32_t bitmap_granularity)
-{
-    if (op->opcode == OSD_OP_READ)
-    {
-        // Not OSD_OP_READ_BITMAP or OSD_OP_READ_CHAIN_BITMAP
-        int iov_idx = 0;
-        uint64_t cur_offset = op->offset;
-        while (iov_idx < op->iov.count && cur_offset+op->iov.buf[iov_idx].iov_len <= offset)
-        {
-            cur_offset += op->iov.buf[iov_idx].iov_len;
-            iov_idx++;
-        }
-        while (iov_idx < op->iov.count && cur_offset < offset+len)
-        {
-            auto & v = op->iov.buf[iov_idx];
-            auto begin = (cur_offset < offset ? offset : cur_offset);
-            auto end = (cur_offset+v.iov_len > offset+len ? offset+len : cur_offset+v.iov_len);
-            memcpy(
-                v.iov_base + begin - cur_offset,
-                buf + (cur_offset <= offset ? 0 : cur_offset-offset),
-                end - begin
-            );
-            cur_offset += v.iov_len;
-            iov_idx++;
-        }
-    }
-    // Set bitmap bits
-    int start_bit = (offset-op->offset)/bitmap_granularity;
-    int end_bit = (offset-op->offset+len)/bitmap_granularity;
-    for (int bit = start_bit; bit < end_bit;)
-    {
-        if (!(bit%8) && bit <= end_bit-8)
-        {
-            ((uint8_t*)op->bitmap_buf)[bit/8] = 0xFF;
-            bit += 8;
-        }
-        else
-        {
-            ((uint8_t*)op->bitmap_buf)[bit/8] |= (1 << (bit%8));
-            bit++;
-        }
-    }
-}
-
-bool writeback_cache_t::read_from_cache(cluster_op_t *op, uint32_t bitmap_granularity)
-{
-    bool dirty_copied = false;
-    if (dirty_buffers.size() && (op->opcode == OSD_OP_READ ||
-        op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP))
-    {
-        // We also have to return reads from CACHE_REPEATING buffers - they are not
-        // guaranteed to be present on target OSDs at the moment of repeating
-        // And we're also free to return data from other cached buffers just
-        // because it's faster
-        auto dirty_it = find_dirty(op->cur_inode, op->offset);
-        while (dirty_it != dirty_buffers.end() && dirty_it->first.inode == op->cur_inode &&
-            dirty_it->first.stripe < op->offset+op->len)
-        {
-            uint64_t begin = dirty_it->first.stripe, end = dirty_it->first.stripe + dirty_it->second.len;
-            if (begin < op->offset)
-                begin = op->offset;
-            if (end > op->offset+op->len)
-                end = op->offset+op->len;
-            bool skip_prev = true;
-            uint64_t cur = begin, prev = begin;
-            while (cur < end)
-            {
-                unsigned bmp_loc = (cur - op->offset)/bitmap_granularity;
-                bool skip = (((*((uint8_t*)op->bitmap_buf + bmp_loc/8)) >> (bmp_loc%8)) & 0x1);
-                if (skip_prev != skip)
-                {
-                    if (cur > prev && !skip)
-                    {
-                        // Copy data
-                        dirty_copied = true;
-                        copy_to_op(op, prev, dirty_it->second.buf + prev - dirty_it->first.stripe, cur-prev, bitmap_granularity);
-                    }
-                    skip_prev = skip;
-                    prev = cur;
-                }
-                cur += bitmap_granularity;
-            }
-            assert(cur > prev);
-            if (!skip_prev)
-            {
-                // Copy data
-                dirty_copied = true;
-                copy_to_op(op, prev, dirty_it->second.buf + prev - dirty_it->first.stripe, cur-prev, bitmap_granularity);
-            }
-            dirty_it++;
-        }
-    }
-    return dirty_copied;
-}
-
-void writeback_cache_t::fsync_start()
-{
-    for (auto & prev_op: dirty_buffers)
-    {
-        if (prev_op.second.state == CACHE_WRITTEN)
-        {
-            prev_op.second.state = CACHE_FLUSHING;
-        }
-    }
-}
-
-void writeback_cache_t::fsync_error()
-{
-    for (auto & prev_op: dirty_buffers)
-    {
-        if (prev_op.second.state == CACHE_FLUSHING)
-        {
-            prev_op.second.state = CACHE_WRITTEN;
-        }
-    }
-}
-
-void writeback_cache_t::fsync_ok()
-{
-    for (auto uw_it = dirty_buffers.begin(); uw_it != dirty_buffers.end(); )
-    {
-        if (uw_it->second.state == CACHE_FLUSHING)
-        {
-            if (!--(*uw_it->second.refcnt))
-                free(uw_it->second.refcnt);
-            dirty_buffers.erase(uw_it++);
-        }
-        else
-            uw_it++;
-    }
-}
--- a/src/disk_simple_offsets.cpp
+++ b/src/disk_simple_offsets.cpp
@@ -10,7 +10,6 @@
 #include "json11/json11.hpp"
 #include "str_util.h"
 #include "blockstore.h"
-#include "blockstore_disk.h"

 // Calculate offsets for a block device and print OSD command line parameters
 void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
@@ -21,39 +20,23 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
        fprintf(stderr, "Device path is missing\n");
        exit(1);
    }
-    uint64_t data_block_size = parse_size(cfg["object_size"].string_value());
+    uint64_t object_size = parse_size(cfg["object_size"].string_value());
    uint64_t bitmap_granularity = parse_size(cfg["bitmap_granularity"].string_value());
    uint64_t journal_size = parse_size(cfg["journal_size"].string_value());
    uint64_t device_block_size = parse_size(cfg["device_block_size"].string_value());
    uint64_t journal_offset = parse_size(cfg["journal_offset"].string_value());
    uint64_t device_size = parse_size(cfg["device_size"].string_value());
-    uint32_t csum_block_size = parse_size(cfg["csum_block_size"].string_value());
-    uint32_t data_csum_type = BLOCKSTORE_CSUM_NONE;
-    if (cfg["data_csum_type"] == "crc32c")
-        data_csum_type = BLOCKSTORE_CSUM_CRC32C;
-    else if (cfg["data_csum_type"].string_value() != "" && cfg["data_csum_type"].string_value() != "none")
-    {
-        fprintf(
-            stderr, "data_csum_type=%s is unsupported, only \"crc32c\" and \"none\" are supported",
-            cfg["data_csum_type"].string_value().c_str()
-        );
-        exit(1);
-    }
    std::string format = cfg["format"].string_value();
    if (json_output)
        format = "json";
-    if (!data_block_size)
-        data_block_size = 1 << DEFAULT_DATA_BLOCK_ORDER;
+    if (!object_size)
+        object_size = 1 << DEFAULT_DATA_BLOCK_ORDER;
    if (!bitmap_granularity)
        bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
    if (!journal_size)
        journal_size = 16*1024*1024;
    if (!device_block_size)
        device_block_size = 4096;
-    if (!data_csum_type)
-        csum_block_size = 0;
-    else if (!csum_block_size)
-        csum_block_size = bitmap_granularity;
    uint64_t orig_device_size = device_size;
    if (!device_size)
    {
@@ -102,30 +85,22 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
        fprintf(stderr, "Invalid device block size specified: %lu\n", device_block_size);
        exit(1);
    }
-    if (data_block_size < device_block_size || data_block_size > MAX_DATA_BLOCK_SIZE ||
-        data_block_size & (data_block_size-1) != 0)
+    if (object_size < device_block_size || object_size > MAX_DATA_BLOCK_SIZE ||
+        object_size & (object_size-1) != 0)
    {
-        fprintf(stderr, "Invalid object size specified: %lu\n", data_block_size);
+        fprintf(stderr, "Invalid object size specified: %lu\n", object_size);
        exit(1);
    }
-    if (bitmap_granularity < device_block_size || bitmap_granularity > data_block_size ||
+    if (bitmap_granularity < device_block_size || bitmap_granularity > object_size ||
        bitmap_granularity & (bitmap_granularity-1) != 0)
    {
        fprintf(stderr, "Invalid bitmap granularity specified: %lu\n", bitmap_granularity);
        exit(1);
    }
-    if (csum_block_size && (data_block_size % csum_block_size))
-    {
-        fprintf(stderr, "csum_block_size must be a divisor of data_block_size\n");
-        exit(1);
-    }
    journal_offset = ((journal_offset+device_block_size-1)/device_block_size)*device_block_size;
    uint64_t meta_offset = journal_offset + ((journal_size+device_block_size-1)/device_block_size)*device_block_size;
-    uint64_t data_csum_size = (data_csum_type ? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
-    uint64_t clean_entry_bitmap_size = data_block_size/bitmap_granularity/8;
-    uint64_t clean_entry_size = 24 /*sizeof(clean_disk_entry)*/ + 2*clean_entry_bitmap_size + data_csum_size + 4 /*entry_csum*/;
-    uint64_t entries_per_block = device_block_size / clean_entry_size;
-    uint64_t object_count = ((device_size-meta_offset)/data_block_size);
+    uint64_t entries_per_block = (device_block_size / (24 + 2*object_size/bitmap_granularity/8));
+    uint64_t object_count = ((device_size-meta_offset)/object_size);
    uint64_t meta_size = (1 + (object_count+entries_per_block-1)/entries_per_block) * device_block_size;
    uint64_t data_offset = meta_offset + meta_size;
    if (format == "json")
--- a/src/disk_tool.cpp
+++ b/src/disk_tool.cpp
@@ -59,8 +59,6 @@ static const char *help_text =
    "    --journal_size 32M/1G      Set journal size (area or partition size)\n"
    "    --block_size 128k/1M       Set blockstore object size\n"
    "    --bitmap_granularity 4k    Set bitmap granularity\n"
-    "    --data_csum_type none      Set data checksum type (crc32c or none)\n"
-    "    --csum_block_size 4k       Set data checksum block size\n"
    "    --data_device_block 4k     Override data device block size\n"
    "    --meta_device_block 4k     Override metadata device block size\n"
    "    --journal_device_block 4k  Override journal device block size\n"
@@ -74,9 +72,8 @@ static const char *help_text =
    "  If it doesn't succeed it issues a warning in the system log.\n"
    "  \n"
    "  You can also pass other OSD options here as arguments and they'll be persisted\n"
-    "  in the superblock: data_io, meta_io, journal_io,\n"
-    "  inmemory_metadata, inmemory_journal, max_write_iodepth,\n"
-    "  min_flusher_count, max_flusher_count, journal_sector_buffer_count,\n"
+    "  to the superblock: max_write_iodepth, max_write_iodepth, min_flusher_count,\n"
+    "  max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,\n"
    "  journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,\n"
    "  throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.\n"
    "\n"
@@ -164,8 +161,6 @@ static const char *help_text =
    "    --object_size 128k       Set blockstore block size\n"
    "    --bitmap_granularity 4k  Set bitmap granularity\n"
    "    --journal_size 16M       Set journal size\n"
-    "    --data_csum_type none    Set data checksum type (crc32c or none)\n"
-    "    --csum_block_size 4k     Set data checksum block size\n"
    "    --device_block_size 4k   Set device block size\n"
    "    --journal_offset 0       Set journal offset\n"
    "    --device_size 0          Set device size\n"
@@ -275,19 +270,6 @@ int main(int argc, char *argv[])
            fprintf(stderr, "Invalid JSON: %s\n", json_err.c_str());
            return 1;
        }
-        if (entries[0]["type"] == "start")
-        {
-            self.dsk.data_csum_type = csum_type_from_str(entries[0]["data_csum_type"].string_value());
-            self.dsk.csum_block_size = entries[0]["csum_block_size"].uint64_value();
-        }
-        if (self.options["data_csum_type"] != "")
-        {
-            self.dsk.data_csum_type = csum_type_from_str(self.options["data_csum_type"]);
-        }
-        if (self.options["csum_block_size"] != "")
-        {
-            self.dsk.csum_block_size = stoull_full(self.options["csum_block_size"], 0);
-        }
        return self.write_json_journal(entries);
    }
    else if (!strcmp(cmd[0], "dump-meta"))
--- a/src/disk_tool.h
+++ b/src/disk_tool.h
@@ -64,19 +64,17 @@ struct disk_tool_t
    ring_loop_t *ringloop;
    ring_consumer_t ring_consumer;
    int remap_active;
-    journal_entry_start je_start;
    uint8_t *new_journal_buf, *new_meta_buf, *new_journal_ptr, *new_journal_data;
    uint64_t new_journal_in_pos;
    int64_t data_idx_diff;
    uint64_t total_blocks, free_first, free_last;
-    uint64_t new_clean_entry_bitmap_size, new_data_csum_size, new_clean_entry_size, new_entries_per_block;
+    uint64_t new_clean_entry_bitmap_size, new_clean_entry_size, new_entries_per_block;
    int new_journal_fd, new_meta_fd;
    resizer_data_moving_t *moving_blocks;

    bool started;
    void *small_write_data;
    uint32_t data_crc32;
-    bool data_csum_valid;
    uint32_t crc32_last;
    uint32_t new_crc32_prev;

@@ -86,11 +84,11 @@ struct disk_tool_t
    void dump_journal_entry(int num, journal_entry *je, bool json);
    int process_journal(std::function<int(void*)> block_fn);
    int process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn);
-    int process_meta(std::function<void(blockstore_meta_header_v2_t *)> hdr_fn,
+    int process_meta(std::function<void(blockstore_meta_header_v1_t *)> hdr_fn,
        std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn);

    int dump_meta();
-    void dump_meta_header(blockstore_meta_header_v2_t *hdr);
+    void dump_meta_header(blockstore_meta_header_v1_t *hdr);
    void dump_meta_entry(uint64_t block_num, clean_disk_entry *entry, uint8_t *bitmap);

    int write_json_journal(json11::Json entries);
@@ -98,7 +96,7 @@ struct disk_tool_t

    int resize_data();
    int resize_parse_params();
-    void resize_init(blockstore_meta_header_v2_t *hdr);
+    void resize_init(blockstore_meta_header_v1_t *hdr);
    int resize_remap_blocks();
    int resize_copy_data();
    int resize_rewrite_journal();
@@ -143,5 +141,3 @@ json11::Json read_parttable(std::string dev);
 uint64_t dev_size_from_parttable(json11::Json pt);
 uint64_t free_from_parttable(json11::Json pt);
 int fix_partition_type(std::string dev_by_uuid);
-std::string csum_type_str(uint32_t data_csum_type);
-uint32_t csum_type_from_str(std::string data_csum_type);
--- a/src/disk_tool_journal.cpp
+++ b/src/disk_tool_journal.cpp
@@ -55,23 +55,6 @@ int disk_tool_t::dump_journal()
                    printf("offset %08lx:\n", journal_pos);
                else
                    printf(",\"entries\":[\n");
-                if (journal_pos == 0)
-                {
-                    // Fill journal header to know checksum type & size
-                    journal_entry *je = (journal_entry*)journal_buf;
-                    if (je->magic == JOURNAL_MAGIC && je->type == JE_START &&
-                        (je->start.version == JOURNAL_VERSION_V1 || je->start.version == JOURNAL_VERSION_V2))
-                    {
-                        memcpy(&je_start, je, sizeof(je_start));
-                        if (je_start.size == JE_START_V0_SIZE)
-                            je_start.version = 0;
-                        if (je_start.version < JOURNAL_VERSION_V2)
-                        {
-                            je_start.data_csum_type = 0;
-                            je_start.csum_block_size = 0;
-                        }
-                    }
-                }
                first_entry = true;
                process_journal_block(journal_buf, [this](int num, journal_entry *je) { dump_journal_entry(num, je, json); });
                if (json)
@@ -137,22 +120,8 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
        fprintf(stderr, "offset %08lx: journal superblock is invalid\n", journal_pos);
        r = 1;
    }
-    else if (je->start.size != JE_START_V0_SIZE && je->start.version != JOURNAL_VERSION_V1 && je->start.version != JOURNAL_VERSION_V2)
-    {
-        fprintf(stderr, "offset %08lx: journal superblock contains version %lu, but I only understand 0, 1 and 2\n",
-            journal_pos, je->start.size == JE_START_V0_SIZE ? 0 : je->start.version);
-        r = 1;
-    }
    else
    {
-        memcpy(&je_start, je, sizeof(je_start));
-        if (je_start.size == JE_START_V0_SIZE)
-            je_start.version = 0;
-        if (je_start.version < JOURNAL_VERSION_V2)
-        {
-            je_start.data_csum_type = 0;
-            je_start.csum_block_size = 0;
-        }
        started = false;
        crc32_last = 0;
        block_fn(data);
@@ -214,49 +183,7 @@ int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journa
            }
            small_write_data = memalign_or_die(MEM_ALIGNMENT, je->small_write.len);
            assert(pread(dsk.journal_fd, small_write_data, je->small_write.len, dsk.journal_offset+je->small_write.data_offset) == je->small_write.len);
-            data_crc32 = je_start.csum_block_size ? 0 : crc32c(0, small_write_data, je->small_write.len);
-            data_csum_valid = (data_crc32 == je->small_write.crc32_data);
-            if (je_start.csum_block_size && je->small_write.len > 0)
-            {
-                // like in enqueue_write()
-                uint32_t start = je->small_write.offset / je_start.csum_block_size;
-                uint32_t end = (je->small_write.offset+je->small_write.len-1) / je_start.csum_block_size;
-                uint32_t data_csum_size = (end-start+1) * (je_start.data_csum_type & 0xFF);
-                if (je->size < sizeof(journal_entry_small_write) + data_csum_size)
-                {
-                    data_csum_valid = false;
-                }
-                else
-                {
-                    uint32_t calc_csum = 0;
-                    uint32_t *block_csums = (uint32_t*)((uint8_t*)je + je->size - data_csum_size);
-                    if (start == end)
-                    {
-                        calc_csum = crc32c(0, (uint8_t*)small_write_data, je->small_write.len);
-                        data_csum_valid = data_csum_valid && (calc_csum == *block_csums++);
-                    }
-                    else
-                    {
-                        // First block
-                        calc_csum = crc32c(0, (uint8_t*)small_write_data,
-                            je_start.csum_block_size*(start+1)-je->small_write.offset);
-                        data_csum_valid = data_csum_valid && (calc_csum == *block_csums++);
-                        // Intermediate blocks
-                        for (uint32_t i = start+1; i < end; i++)
-                        {
-                            calc_csum = crc32c(0, (uint8_t*)small_write_data +
-                                je_start.csum_block_size*i-je->small_write.offset, je_start.csum_block_size);
-                            data_csum_valid = data_csum_valid && (calc_csum == *block_csums++);
-                        }
-                        // Last block
-                        calc_csum = crc32c(
-                            0, (uint8_t*)small_write_data + end*je_start.csum_block_size - je->small_write.offset,
-                            je->small_write.offset+je->small_write.len - end*je_start.csum_block_size
-                        );
-                        data_csum_valid = data_csum_valid && (calc_csum == *block_csums++);
-                    }
-                }
-            }
+            data_crc32 = crc32c(0, small_write_data, je->small_write.len);
        }
        iter_fn(entry, je);
        if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT)
@@ -296,40 +223,29 @@ void disk_tool_t::dump_journal_entry(int num, journal_entry *je, bool json)
    if (je->type == JE_START)
    {
        printf(
-            json ? ",\"type\":\"start\",\"start\":\"0x%lx\"" : "je_start start=%08lx",
+            json ? ",\"type\":\"start\",\"start\":\"0x%lx\"}" : "je_start start=%08lx\n",
            je->start.journal_start
        );
-        if (je->start.data_csum_type)
-        {
-            printf(
-                json ? ",\"data_csum_type\":\"%s\",\"csum_block_size\":%u" : " data_csum_type=%s csum_block_size=%u",
-                csum_type_str(je->start.data_csum_type).c_str(), je->start.csum_block_size
-            );
-        }
-        printf(json ? "}" : "\n");
    }
    else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT)
    {
-        auto & sw = je->small_write;
        printf(
            json ? ",\"type\":\"small_write%s\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\",\"offset\":%u,\"len\":%u,\"loc\":\"0x%lx\""
                : "je_small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u loc=%08lx",
            je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
-            sw.oid.inode, sw.oid.stripe, sw.version, sw.offset, sw.len, sw.data_offset
+            je->small_write.oid.inode, je->small_write.oid.stripe,
+            je->small_write.version, je->small_write.offset, je->small_write.len,
+            je->small_write.data_offset
        );
-        if (journal_calc_data_pos != sw.data_offset)
+        if (journal_calc_data_pos != je->small_write.data_offset)
        {
            printf(json ? ",\"bad_loc\":true,\"calc_loc\":\"0x%lx\""
                : " (mismatched, calculated = %lu)", journal_pos);
        }
-        uint32_t data_csum_size = (!je_start.csum_block_size
-            ? 0
-            : ((sw.offset + sw.len - 1)/je_start.csum_block_size - sw.offset/je_start.csum_block_size + 1)
-                *(je_start.data_csum_type & 0xFF));
-        if (je->size > sizeof(journal_entry_small_write) + data_csum_size)
+        if (je->small_write.size > sizeof(journal_entry_small_write))
        {
            printf(json ? ",\"bitmap\":\"" : " (bitmap: ");
-            for (int i = sizeof(journal_entry_small_write); i < je->size - data_csum_size; i++)
+            for (int i = sizeof(journal_entry_small_write); i < je->small_write.size; i++)
            {
                printf("%02x", ((uint8_t*)je)[i]);
            }
@@ -338,56 +254,34 @@ void disk_tool_t::dump_journal_entry(int num, journal_entry *je, bool json)
        if (dump_with_data)
        {
            printf(json ? ",\"data\":\"" : " (data: ");
-            for (int i = 0; i < sw.len; i++)
+            for (int i = 0; i < je->small_write.len; i++)
            {
                printf("%02x", ((uint8_t*)small_write_data)[i]);
            }
            printf(json ? "\"" : ")");
        }
-        if (data_csum_size > 0 && je->size >= sizeof(journal_entry_small_write) + data_csum_size)
-        {
-            printf(json ? ",\"block_csums\":\"" : " block_csums=");
-            uint8_t *block_csums = (uint8_t*)je + je->size - data_csum_size;
-            for (int i = 0; i < data_csum_size; i++)
-                printf("%02x", block_csums[i]);
-            printf(json ? "\"" : "");
-        }
-        else
-        {
-            printf(json ? ",\"data_crc32\":\"%08x\"" : " data_crc32=%08x", sw.crc32_data);
-        }
        printf(
-            json ? ",\"data_valid\":%s}" : "%s\n",
-            (data_csum_valid
-                ? (json ? "true" : " (valid)")
-                : (json ? "false" : " (invalid)"))
+            json ? ",\"data_crc32\":\"%08x\",\"data_valid\":%s}" : " data_crc32=%08x%s\n",
+            je->small_write.crc32_data,
+            (data_crc32 != je->small_write.crc32_data
+                ? (json ? "false" : " (invalid)")
+                : (json ? "true" : " (valid)"))
        );
    }
    else if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT)
    {
-        auto & bw = je->big_write;
        printf(
            json ? ",\"type\":\"big_write%s\",\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"ver\":\"%lu\",\"offset\":%u,\"len\":%u,\"loc\":\"0x%lx\""
                : "je_big_write%s oid=%lx:%lx ver=%lu offset=%u len=%u loc=%08lx",
            je->type == JE_BIG_WRITE_INSTANT ? "_instant" : "",
-            bw.oid.inode, bw.oid.stripe, bw.version, bw.offset, bw.len, bw.location
+            je->big_write.oid.inode, je->big_write.oid.stripe,
+            je->big_write.version, je->big_write.offset, je->big_write.len,
+            je->big_write.location
        );
-        uint32_t data_csum_size = (!je_start.csum_block_size
-            ? 0
-            : ((bw.offset + bw.len - 1)/je_start.csum_block_size - bw.offset/je_start.csum_block_size + 1)
-                *(je_start.data_csum_type & 0xFF));
-        if (data_csum_size > 0 && je->size >= sizeof(journal_entry_big_write) + data_csum_size)
-        {
-            printf(json ? ",\"block_csums\":\"" : " block_csums=");
-            uint8_t *block_csums = (uint8_t*)je + je->size - data_csum_size;
-            for (int i = 0; i < data_csum_size; i++)
-                printf("%02x", block_csums[i]);
-            printf(json ? "\"" : "");
-        }
-        if (bw.size > sizeof(journal_entry_big_write) + data_csum_size)
+        if (je->big_write.size > sizeof(journal_entry_big_write))
        {
            printf(json ? ",\"bitmap\":\"" : " (bitmap: ");
-            for (int i = sizeof(journal_entry_big_write); i < bw.size - data_csum_size; i++)
+            for (int i = sizeof(journal_entry_big_write); i < je->big_write.size; i++)
            {
                printf("%02x", ((uint8_t*)je)[i]);
            }
@@ -444,9 +338,7 @@ int disk_tool_t::write_json_journal(json11::Json entries)
        .type = JE_START,
        .size = sizeof(journal_entry_start),
        .journal_start = dsk.journal_block_size,
-        .version = JOURNAL_VERSION_V2,
-        .data_csum_type = dsk.data_csum_type,
-        .csum_block_size = dsk.csum_block_size,
+        .version = JOURNAL_VERSION,
    };
    ((journal_entry*)new_journal_buf)->crc32 = je_crc32((journal_entry*)new_journal_buf);
    new_journal_ptr += dsk.journal_block_size;
@@ -466,11 +358,9 @@ int disk_tool_t::write_json_journal(json11::Json entries)
        uint32_t entry_size = (type == JE_START
            ? sizeof(journal_entry_start)
            : (type == JE_SMALL_WRITE || type == JE_SMALL_WRITE_INSTANT
-                ? sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size +
-                    (dsk.data_csum_type ? rec["len"].uint64_value()/dsk.csum_block_size*(dsk.data_csum_type & 0xFF) : 0)
+                ? sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size
                : (type == JE_BIG_WRITE || type == JE_BIG_WRITE_INSTANT
-                    ? sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size +
-                        (dsk.data_csum_type ? rec["len"].uint64_value()/dsk.csum_block_size*(dsk.data_csum_type & 0xFF) : 0)
+                    ? sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size
                    : sizeof(journal_entry_del))));
        if (dsk.journal_block_size < new_journal_in_pos + entry_size)
        {
@@ -512,24 +402,12 @@ int disk_tool_t::write_json_journal(json11::Json entries)
                .offset = (uint32_t)rec["offset"].uint64_value(),
                .len = (uint32_t)rec["len"].uint64_value(),
                .data_offset = (uint64_t)(new_journal_data-new_journal_buf),
-                .crc32_data = !dsk.data_csum_type ? 0 : (uint32_t)sscanf_json("%x", rec["data_crc32"]),
+                .crc32_data = (uint32_t)sscanf_json("%x", rec["data_crc32"]),
            };
-            uint32_t data_csum_size = !dsk.data_csum_type ? 0 : ne->small_write.len/dsk.csum_block_size*(dsk.data_csum_type & 0xFF);
-            fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_small_write) + data_csum_size);
+            fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_small_write));
            fromhexstr(rec["data"].string_value(), ne->small_write.len, new_journal_data);
-            if (dsk.data_csum_type)
-                fromhexstr(rec["block_csums"].string_value(), data_csum_size, ((uint8_t*)ne) + sizeof(journal_entry_small_write));
            if (rec["data"].is_string())
-            {
-                if (!dsk.data_csum_type)
-                    ne->small_write.crc32_data = crc32c(0, new_journal_data, ne->small_write.len);
-                else if (dsk.data_csum_type == BLOCKSTORE_CSUM_CRC32C)
-                {
-                    uint32_t *block_csums = (uint32_t*)(((uint8_t*)ne) + sizeof(journal_entry_small_write));
-                    for (uint32_t i = 0; i < ne->small_write.len; i += dsk.csum_block_size, block_csums++)
-                        *block_csums = crc32c(0, new_journal_data+i, dsk.csum_block_size);
-                }
-            }
+                ne->small_write.crc32_data = crc32c(0, new_journal_data, ne->small_write.len);
            new_journal_data += ne->small_write.len;
        }
        else if (type == JE_BIG_WRITE || type == JE_BIG_WRITE_INSTANT)
@@ -548,10 +426,7 @@ int disk_tool_t::write_json_journal(json11::Json entries)
                .len = (uint32_t)rec["len"].uint64_value(),
                .location = sscanf_json(NULL, rec["loc"]),
            };
-            uint32_t data_csum_size = !dsk.data_csum_type ? 0 : ne->big_write.len/dsk.csum_block_size*(dsk.data_csum_type & 0xFF);
-            fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_big_write) + data_csum_size);
-            if (dsk.data_csum_type)
-                fromhexstr(rec["block_csums"].string_value(), data_csum_size, ((uint8_t*)ne) + sizeof(journal_entry_big_write));
+            fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_big_write));
        }
        else if (type == JE_STABLE || type == JE_ROLLBACK || type == JE_DELETE)
        {
--- a/src/disk_tool_meta.cpp
+++ b/src/disk_tool_meta.cpp
@@ -5,7 +5,7 @@
 #include "rw_blocking.h"
 #include "osd_id.h"

-int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)> hdr_fn,
+int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)> hdr_fn,
    std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn)
 {
    if (dsk.meta_block_size % DIRECT_IO_ALIGNMENT)
@@ -28,38 +28,12 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
    lseek64(dsk.meta_fd, dsk.meta_offset, 0);
    read_blocking(dsk.meta_fd, data, dsk.meta_block_size);
    // Check superblock
-    blockstore_meta_header_v2_t *hdr = (blockstore_meta_header_v2_t *)data;
-    if (hdr->zero == 0 && hdr->magic == BLOCKSTORE_META_MAGIC_V1)
+    blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)data;
+    if (hdr->zero == 0 &&
+        hdr->magic == BLOCKSTORE_META_MAGIC_V1 &&
+        hdr->version == BLOCKSTORE_META_VERSION_V1)
    {
-        if (hdr->version == BLOCKSTORE_META_FORMAT_V1)
-        {
-            // Vitastor 0.6-0.8 - static array of clean_disk_entry with bitmaps
-            hdr->data_csum_type = 0;
-            hdr->csum_block_size = 0;
-            hdr->header_csum = 0;
-        }
-        else if (hdr->version == BLOCKSTORE_META_FORMAT_V2)
-        {
-            // Vitastor 0.9 - static array of clean_disk_entry with bitmaps and checksums
-            if (hdr->data_csum_type != 0 &&
-                hdr->data_csum_type != BLOCKSTORE_CSUM_CRC32C)
-            {
-                fprintf(stderr, "I don't know checksum format %u, the only supported format is crc32c = %u.\n", hdr->data_csum_type, BLOCKSTORE_CSUM_CRC32C);
-                free(data);
-                close(dsk.meta_fd);
-                dsk.meta_fd = -1;
-                return 1;
-            }
-        }
-        else
-        {
-            // Unsupported version
-            fprintf(stderr, "Metadata format is too new for me (stored version is %lu, max supported %u).\n", hdr->version, BLOCKSTORE_META_FORMAT_V2);
-            free(data);
-            close(dsk.meta_fd);
-            dsk.meta_fd = -1;
-            return 1;
-        }
+        // Vitastor 0.6-0.7 - static array of clean_disk_entry with bitmaps
        if (hdr->meta_block_size != dsk.meta_block_size)
        {
            fprintf(stderr, "Using block size of %u bytes based on information from the superblock\n", hdr->meta_block_size);
@@ -71,24 +45,14 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
                memcpy(new_data, data, dsk.meta_block_size);
                free(data);
                data = new_data;
-                hdr = (blockstore_meta_header_v2_t *)data;
+                hdr = (blockstore_meta_header_v1_t *)data;
            }
        }
-        dsk.meta_format = hdr->version;
-        dsk.data_block_size = hdr->data_block_size;
-        dsk.csum_block_size = hdr->csum_block_size;
-        dsk.data_csum_type = hdr->data_csum_type;
        dsk.bitmap_granularity = hdr->bitmap_granularity;
-        dsk.clean_entry_bitmap_size = (hdr->data_block_size / hdr->bitmap_granularity + 7) / 8;
-        dsk.clean_entry_size = sizeof(clean_disk_entry) + 2*dsk.clean_entry_bitmap_size
-            + (hdr->data_csum_type
-                ? ((hdr->data_block_size+hdr->csum_block_size-1)/hdr->csum_block_size
-                    *(hdr->data_csum_type & 0xff))
-                : 0)
-            + (dsk.meta_format == BLOCKSTORE_META_FORMAT_V2 ? 4 /*entry_csum*/ : 0);
+        dsk.clean_entry_bitmap_size = hdr->data_block_size / hdr->bitmap_granularity / 8;
+        dsk.clean_entry_size = sizeof(clean_disk_entry) + 2*dsk.clean_entry_bitmap_size;
        uint64_t block_num = 0;
        hdr_fn(hdr);
-        hdr = NULL;
        meta_pos = dsk.meta_block_size;
        lseek64(dsk.meta_fd, dsk.meta_offset+meta_pos, 0);
        while (meta_pos < dsk.meta_len)
@@ -103,15 +67,6 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
                    clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)data + blk + ioff);
                    if (entry->oid.inode)
                    {
-                        if (dsk.data_csum_type)
-                        {
-                            uint32_t *entry_csum = (uint32_t*)((uint8_t*)entry + dsk.clean_entry_size - 4);
-                            if (*entry_csum != crc32c(0, entry, dsk.clean_entry_size - 4))
-                            {
-                                fprintf(stderr, "Metadata entry %lu is corrupt (checksum mismatch), skipping\n", block_num);
-                                continue;
-                            }
-                        }
                        record_fn(block_num, entry, entry->bitmap);
                    }
                }
@@ -152,35 +107,21 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
 int disk_tool_t::dump_meta()
 {
    int r = process_meta(
-        [this](blockstore_meta_header_v2_t *hdr) { dump_meta_header(hdr); },
+        [this](blockstore_meta_header_v1_t *hdr) { dump_meta_header(hdr); },
        [this](uint64_t block_num, clean_disk_entry *entry, uint8_t *bitmap) { dump_meta_entry(block_num, entry, bitmap); }
    );
-    if (r == 0)
-        printf("\n]}\n");
+    printf("\n]}\n");
    return r;
 }

-void disk_tool_t::dump_meta_header(blockstore_meta_header_v2_t *hdr)
+void disk_tool_t::dump_meta_header(blockstore_meta_header_v1_t *hdr)
 {
    if (hdr)
    {
-        if (hdr->version == BLOCKSTORE_META_FORMAT_V1)
-        {
-            printf(
-                "{\"version\":\"0.6\",\"meta_block_size\":%u,\"data_block_size\":%u,\"bitmap_granularity\":%u,"
-                "\"entries\":[\n",
-                hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity
-            );
-        }
-        else if (hdr->version == BLOCKSTORE_META_FORMAT_V2)
-        {
-            printf(
-                "{\"version\":\"0.9\",\"meta_block_size\":%u,\"data_block_size\":%u,\"bitmap_granularity\":%u,"
-                "\"data_csum_type\":%s,\"csum_block_size\":%u,\"entries\":[\n",
-                hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity,
-                csum_type_str(hdr->data_csum_type).c_str(), hdr->csum_block_size
-            );
-        }
+        printf(
+            "{\"version\":\"0.6\",\"meta_block_size\":%u,\"data_block_size\":%u,\"bitmap_granularity\":%u,\"entries\":[\n",
+            hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity
+        );
    }
    else
    {
@@ -210,15 +151,6 @@ void disk_tool_t::dump_meta_entry(uint64_t block_num, clean_disk_entry *entry, u
        {
            printf("%02x", bitmap[dsk.clean_entry_bitmap_size + i]);
        }
-        if (dsk.csum_block_size && dsk.data_csum_type)
-        {
-            uint8_t *csums = bitmap + dsk.clean_entry_bitmap_size*2;
-            printf("\",\"block_csums\":\"");
-            for (uint64_t i = 0; i < (dsk.data_block_size+dsk.csum_block_size-1)/dsk.csum_block_size*(dsk.data_csum_type & 0xFF); i++)
-            {
-                printf("%02x", csums[i]);
-            }
-        }
        printf("\"}");
    }
    else
@@ -232,30 +164,18 @@ int disk_tool_t::write_json_meta(json11::Json meta)
 {
    new_meta_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_meta_len);
    memset(new_meta_buf, 0, new_meta_len);
-    blockstore_meta_header_v2_t *new_hdr = (blockstore_meta_header_v2_t *)new_meta_buf;
+    blockstore_meta_header_v1_t *new_hdr = (blockstore_meta_header_v1_t *)new_meta_buf;
    new_hdr->zero = 0;
    new_hdr->magic = BLOCKSTORE_META_MAGIC_V1;
-    new_hdr->version = meta["version"].uint64_value() == BLOCKSTORE_META_FORMAT_V1
-        ? BLOCKSTORE_META_FORMAT_V1 : BLOCKSTORE_META_FORMAT_V2;
+    new_hdr->version = BLOCKSTORE_META_VERSION_V1;
    new_hdr->meta_block_size = meta["meta_block_size"].uint64_value()
        ? meta["meta_block_size"].uint64_value() : 4096;
    new_hdr->data_block_size = meta["data_block_size"].uint64_value()
        ? meta["data_block_size"].uint64_value() : 131072;
    new_hdr->bitmap_granularity = meta["bitmap_granularity"].uint64_value()
        ? meta["bitmap_granularity"].uint64_value() : 4096;
-    new_hdr->data_csum_type = meta["data_csum_type"].is_number()
-        ? meta["data_csum_type"].uint64_value()
-        : (meta["data_csum_type"].string_value() == "crc32c"
-            ? BLOCKSTORE_CSUM_CRC32C
-            : BLOCKSTORE_CSUM_NONE);
-    new_hdr->csum_block_size = meta["csum_block_size"].uint64_value();
-    uint32_t new_clean_entry_header_size = (new_hdr->version == BLOCKSTORE_META_FORMAT_V1
-        ? sizeof(clean_disk_entry) : sizeof(clean_disk_entry) + 4 /*entry_csum*/);
-    new_clean_entry_bitmap_size = (new_hdr->data_block_size / new_hdr->bitmap_granularity + 7) / 8;
-    new_data_csum_size = (new_hdr->data_csum_type
-        ? ((new_hdr->data_block_size+new_hdr->csum_block_size-1)/new_hdr->csum_block_size*(new_hdr->data_csum_type & 0xFF))
-        : 0);
-    new_clean_entry_size = new_clean_entry_header_size + 2*new_clean_entry_bitmap_size + new_data_csum_size;
+    new_clean_entry_bitmap_size = new_hdr->data_block_size / new_hdr->bitmap_granularity / 8;
+    new_clean_entry_size = sizeof(clean_disk_entry) + 2*new_clean_entry_bitmap_size;
    new_entries_per_block = new_hdr->meta_block_size / new_clean_entry_size;
    for (const auto & e: meta["entries"].array_items())
    {
@@ -274,21 +194,8 @@ int disk_tool_t::write_json_meta(json11::Json meta)
        new_entry->oid.inode = (sscanf_json(NULL, e["pool"]) << (64-POOL_ID_BITS)) | sscanf_json(NULL, e["inode"]);
        new_entry->oid.stripe = sscanf_json(NULL, e["stripe"]);
        new_entry->version = sscanf_json(NULL, e["version"]);
-        fromhexstr(e["bitmap"].string_value(), new_clean_entry_bitmap_size,
-            ((uint8_t*)new_entry) + sizeof(clean_disk_entry));
-        fromhexstr(e["ext_bitmap"].string_value(), new_clean_entry_bitmap_size,
-            ((uint8_t*)new_entry) + sizeof(clean_disk_entry) + new_clean_entry_bitmap_size);
-        if (new_hdr->version == BLOCKSTORE_META_FORMAT_V2)
-        {
-            if (new_hdr->data_csum_type != 0)
-            {
-                fromhexstr(e["data_csum"].string_value(), new_data_csum_size,
-                    ((uint8_t*)new_entry) + sizeof(clean_disk_entry) + 2*new_clean_entry_bitmap_size);
-            }
-            uint32_t *new_entry_csum = (uint32_t*)(((uint8_t*)new_entry) + sizeof(clean_disk_entry) +
-                2*new_clean_entry_bitmap_size + new_data_csum_size);
-            *new_entry_csum = crc32c(0, new_entry, new_clean_entry_size - 4);
-        }
+        fromhexstr(e["bitmap"].string_value(), new_clean_entry_bitmap_size, ((uint8_t*)new_entry) + sizeof(clean_disk_entry));
+        fromhexstr(e["ext_bitmap"].string_value(), new_clean_entry_bitmap_size, ((uint8_t*)new_entry) + sizeof(clean_disk_entry) + new_clean_entry_bitmap_size);
    }
    int r = resize_write_new_meta();
    free(new_meta_buf);
--- a/src/disk_tool_prepare.cpp
+++ b/src/disk_tool_prepare.cpp
@@ -8,9 +8,6 @@
 int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_hdd)
 {
    static const char *allow_additional_params[] = {
-        "data_io",
-        "meta_io",
-        "journal_io",
        "max_write_iodepth",
        "max_write_iodepth",
        "min_flusher_count",
@@ -102,16 +99,15 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
    if (options["journal_size"] == "")
    {
        if (options["journal_device"] == "")
-            options["journal_size"] = is_hdd ? "128M" : "32M";
+            options["journal_size"] = "32M";
        else if (is_hdd)
            options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
    }
-    bool is_hybrid = is_hdd && options["journal_device"] != "" && options["journal_device"] != options["data_device"];
    if (is_hdd)
    {
        if (options["block_size"] == "")
            options["block_size"] = "1M";
-        if (is_hybrid && options["throttle_small_writes"] == "")
+        if (options["throttle_small_writes"] == "")
            options["throttle_small_writes"] = "1";
    }
    json11::Json::object sb;
@@ -119,7 +115,6 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
    try
    {
        dsk.parse_config(options);
-        dsk.data_io = dsk.meta_io = dsk.journal_io = "direct";
        dsk.open_data();
        dsk.open_meta();
        dsk.open_journal();
@@ -139,7 +134,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
            { "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) },
            { "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) +
                (dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) },
-            { "journal_no_same_sector_overwrites", !is_hdd || is_hybrid },
+            { "journal_no_same_sector_overwrites", true },
            { "journal_sector_buffer_count", 1024 },
            { "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
            { "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
@@ -151,7 +146,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
        for (int i = 0; i < sizeof(allow_additional_params)/sizeof(allow_additional_params[0]); i++)
        {
            auto it = options.find(allow_additional_params[i]);
-            if (it != options.end() && it->second != "")
+            if (it != options.end())
            {
                sb[it->first] = it->second;
            }
@@ -483,7 +478,6 @@ int disk_tool_t::get_meta_partition(std::vector<vitastor_dev_info_t> & ssds, std
    {
        blockstore_disk_t dsk;
        dsk.parse_config(options);
-        dsk.data_io = dsk.meta_io = dsk.journal_io = "direct";
        dsk.open_data();
        dsk.open_meta();
        dsk.open_journal();
@@ -626,7 +620,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
                    }
                }
                // Treat all disks as SSDs if not in the hybrid mode
-                prepare_one(options, dev.is_hdd ? 1 : 0);
+                prepare_one(options, hybrid && dev.is_hdd ? 1 : 0);
                if (hybrid)
                {
                    options.erase("journal_device");
--- a/src/disk_tool_resize.cpp
+++ b/src/disk_tool_resize.cpp
@@ -29,7 +29,7 @@ int disk_tool_t::resize_data()
    fprintf(stderr, "Reading metadata\n");
    data_alloc = new allocator((new_data_len < dsk.data_len ? dsk.data_len : new_data_len) / dsk.data_block_size);
    r = process_meta(
-        [this](blockstore_meta_header_v2_t *hdr)
+        [this](blockstore_meta_header_v1_t *hdr)
        {
            resize_init(hdr);
        },
@@ -91,7 +91,6 @@ int disk_tool_t::resize_parse_params()
    try
    {
        dsk.parse_config(options);
-        dsk.data_io = dsk.meta_io = dsk.journal_io = "direct";
        dsk.open_data();
        dsk.open_meta();
        dsk.open_journal();
@@ -140,7 +139,7 @@ int disk_tool_t::resize_parse_params()
    return 0;
 }

-void disk_tool_t::resize_init(blockstore_meta_header_v2_t *hdr)
+void disk_tool_t::resize_init(blockstore_meta_header_v1_t *hdr)
 {
    if (hdr && dsk.data_block_size != hdr->data_block_size)
    {
@@ -150,15 +149,6 @@ void disk_tool_t::resize_init(blockstore_meta_header_v2_t *hdr)
        }
        dsk.data_block_size = hdr->data_block_size;
    }
-    if (hdr && (dsk.data_csum_type != hdr->data_csum_type || dsk.csum_block_size != hdr->csum_block_size))
-    {
-        if (dsk.data_csum_type)
-        {
-            fprintf(stderr, "Using data checksum type %s from metadata superblock\n", csum_type_str(hdr->data_csum_type).c_str());
-        }
-        dsk.data_csum_type = hdr->data_csum_type;
-        dsk.csum_block_size = hdr->csum_block_size;
-    }
    if (((new_data_len-dsk.data_len) % dsk.data_block_size) ||
        ((new_data_offset-dsk.data_offset) % dsk.data_block_size))
    {
@@ -170,12 +160,8 @@ void disk_tool_t::resize_init(blockstore_meta_header_v2_t *hdr)
    free_last = (new_data_offset+new_data_len < dsk.data_offset+dsk.data_len)
        ? (dsk.data_offset+dsk.data_len-new_data_offset-new_data_len) / dsk.data_block_size
        : 0;
-    uint32_t new_clean_entry_header_size = sizeof(clean_disk_entry) + 4 /*entry_csum*/;
    new_clean_entry_bitmap_size = dsk.data_block_size / (hdr ? hdr->bitmap_granularity : 4096) / 8;
-    new_data_csum_size = (dsk.data_csum_type
-        ? ((dsk.data_block_size+dsk.csum_block_size-1)/dsk.csum_block_size*(dsk.data_csum_type & 0xFF))
-        : 0);
-    new_clean_entry_size = new_clean_entry_header_size + 2*new_clean_entry_bitmap_size + new_data_csum_size;
+    new_clean_entry_size = sizeof(clean_disk_entry) + 2 * new_clean_entry_bitmap_size;
    new_entries_per_block = dsk.meta_block_size/new_clean_entry_size;
    uint64_t new_meta_blocks = 1 + (new_data_len/dsk.data_block_size + new_entries_per_block-1) / new_entries_per_block;
    if (!new_meta_len)
@@ -363,25 +349,13 @@ int disk_tool_t::resize_rewrite_journal()
        {
            if (je->type == JE_START)
            {
-                if (je_start.data_csum_type != dsk.data_csum_type ||
-                    je_start.csum_block_size != dsk.csum_block_size)
-                {
-                    fprintf(
-                        stderr, "Error: journal header has different checksum parameters: %s/%u vs %s/%u\n",
-                        csum_type_str(je_start.data_csum_type).c_str(), je_start.csum_block_size,
-                        csum_type_str(dsk.data_csum_type).c_str(), dsk.csum_block_size
-                    );
-                    exit(1);
-                }
                journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
                *((journal_entry_start*)ne) = (journal_entry_start){
                    .magic = JOURNAL_MAGIC,
                    .type = JE_START,
                    .size = sizeof(journal_entry_start),
                    .journal_start = dsk.journal_block_size,
-                    .version = JOURNAL_VERSION_V2,
-                    .data_csum_type = dsk.data_csum_type,
-                    .csum_block_size = dsk.csum_block_size,
+                    .version = JOURNAL_VERSION,
                };
                ne->crc32 = je_crc32(ne);
                new_journal_ptr += dsk.journal_block_size;
@@ -462,17 +436,15 @@ int disk_tool_t::resize_rewrite_meta()
    new_meta_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_meta_len);
    memset(new_meta_buf, 0, new_meta_len);
    int r = process_meta(
-        [this](blockstore_meta_header_v2_t *hdr)
+        [this](blockstore_meta_header_v1_t *hdr)
        {
-            blockstore_meta_header_v2_t *new_hdr = (blockstore_meta_header_v2_t *)new_meta_buf;
+            blockstore_meta_header_v1_t *new_hdr = (blockstore_meta_header_v1_t *)new_meta_buf;
            new_hdr->zero = 0;
            new_hdr->magic = BLOCKSTORE_META_MAGIC_V1;
-            new_hdr->version = BLOCKSTORE_META_FORMAT_V1;
+            new_hdr->version = BLOCKSTORE_META_VERSION_V1;
            new_hdr->meta_block_size = dsk.meta_block_size;
            new_hdr->data_block_size = dsk.data_block_size;
            new_hdr->bitmap_granularity = dsk.bitmap_granularity ? dsk.bitmap_granularity : 4096;
-            new_hdr->data_csum_type = dsk.data_csum_type;
-            new_hdr->csum_block_size = dsk.csum_block_size;
        },
        [this](uint64_t block_num, clean_disk_entry *entry, uint8_t *bitmap)
        {
@@ -491,7 +463,7 @@ int disk_tool_t::resize_rewrite_meta()
            new_entry->oid = entry->oid;
            new_entry->version = entry->version;
            if (bitmap)
-                memcpy(new_entry->bitmap, bitmap, 2*new_clean_entry_bitmap_size + new_data_csum_size);
+                memcpy(new_entry->bitmap, bitmap, 2*new_clean_entry_bitmap_size);
            else
                memset(new_entry->bitmap, 0xff, 2*new_clean_entry_bitmap_size);
        }
--- a/src/disk_tool_utils.cpp
+++ b/src/disk_tool_utils.cpp
@@ -264,7 +264,6 @@ int write_zero(int fd, uint64_t offset, uint64_t size)
 {
    uint64_t buf_len = 1024*1024;
    void *zero_buf = memalign_or_die(MEM_ALIGNMENT, buf_len);
-    memset(zero_buf, 0, buf_len);
    ssize_t r;
    while (size > 0)
    {
@@ -374,22 +373,3 @@ int fix_partition_type(std::string dev_by_uuid)
    std::string out;
    return shell_exec({ "sfdisk", "--no-reread", "--force", "/dev/"+parent_dev }, script, &out, NULL);
 }
-
-std::string csum_type_str(uint32_t data_csum_type)
-{
-    std::string csum_type;
-    if (data_csum_type == BLOCKSTORE_CSUM_NONE)
-        csum_type = "none";
-    else if (data_csum_type == BLOCKSTORE_CSUM_CRC32C)
-        csum_type = "crc32c";
-    else
-        csum_type = std::to_string(data_csum_type);
-    return csum_type;
-}
-
-uint32_t csum_type_from_str(std::string data_csum_type)
-{
-    if (data_csum_type == "crc32c")
-        return BLOCKSTORE_CSUM_CRC32C;
-    return stoull_full(data_csum_type, 0);
-}
--- a/src/etcd_state_client.cpp
+++ b/src/etcd_state_client.cpp
@@ -187,30 +187,22 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
            check_addr = addr;
        if (pos == std::string::npos)
            addr += "/v3";
-        bool local = false;
        int i;
        for (i = 0; i < local_ips.size(); i++)
        {
            if (local_ips[i] == check_addr)
            {
-                local = true;
+                this->etcd_local.push_back(addr);
                break;
            }
        }
-        auto & to = local ? this->etcd_local : this->etcd_addresses;
-        for (i = 0; i < to.size(); i++)
-        {
-            if (to[i] == addr)
-                break;
-        }
-        if (i >= to.size())
-            to.push_back(addr);
+        if (i >= local_ips.size())
+            this->etcd_addresses.push_back(addr);
    }
 }

 void etcd_state_client_t::parse_config(const json11::Json & config)
 {
-    this->etcd_local.clear();
    this->etcd_addresses.clear();
    if (config["etcd_address"].is_string())
    {
@@ -357,7 +349,7 @@ void etcd_state_client_t::start_etcd_watcher()
                        watch_id == ETCD_OSD_STATE_WATCH_ID)
                        etcd_watches_initialised++;
                    if (etcd_watches_initialised == 4 && this->log_level > 0)
-                        fprintf(stderr, "Successfully subscribed to etcd at %s\n", cur_addr.c_str());
+                        fprintf(stderr, "Successfully subscribed to etcd at %s\n", selected_etcd_address.c_str());
                }
                if (data["result"]["canceled"].bool_value())
                {
@@ -368,17 +360,15 @@ void etcd_state_client_t::start_etcd_watcher()
                        // so we should restart from the beginning if we can
                        if (on_reload_hook != NULL)
                        {
-                            // check to not trigger on_reload_hook multiple times
-                            if (etcd_watch_ws != NULL)
+                            fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
+                                data["result"]["compact_revision"].uint64_value());
+                            if (etcd_watch_ws)
                            {
-                                fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
-                                    data["result"]["compact_revision"].uint64_value());
                                http_close(etcd_watch_ws);
                                etcd_watch_ws = NULL;
-                                etcd_watch_revision = 0;
-                                on_reload_hook();
                            }
-                            return;
+                            etcd_watch_revision = 0;
+                            on_reload_hook();
                        }
                        else
                        {
@@ -425,9 +415,13 @@ void etcd_state_client_t::start_etcd_watcher()
        }
        if (msg->eof)
        {
-            fprintf(stderr, "Disconnected from etcd %s\n", cur_addr.c_str());
            if (cur_addr == selected_etcd_address)
+            {
+                fprintf(stderr, "Disconnected from etcd %s\n", selected_etcd_address.c_str());
                selected_etcd_address = "";
+            }
+            else
+                fprintf(stderr, "Disconnected from etcd\n");
            if (etcd_watch_ws)
            {
                http_close(etcd_watch_ws);
@@ -444,7 +438,6 @@ void etcd_state_client_t::start_etcd_watcher()
            else if (etcd_watches_initialised > 0)
            {
                // Connection was live, retry immediately
-                etcd_watches_initialised = 0;
                start_etcd_watcher();
            }
        }
@@ -684,8 +677,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
            // ID
            pool_id_t pool_id;
            char null_byte = 0;
-            int scanned = sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
-            if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
+            sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
+            if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
            {
                fprintf(stderr, "Pool ID %s is invalid (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
                continue;
@@ -829,8 +822,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
        {
            pool_id_t pool_id;
            char null_byte = 0;
-            int scanned = sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
-            if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
+            sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
+            if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
            {
                fprintf(stderr, "Pool ID %s is invalid in PG configuration (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
                continue;
@@ -838,8 +831,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
            for (auto & pg_item: pool_item.second.object_items())
            {
                pg_num_t pg_num = 0;
-                int scanned = sscanf(pg_item.first.c_str(), "%u%c", &pg_num, &null_byte);
-                if (scanned != 1 || !pg_num)
+                sscanf(pg_item.first.c_str(), "%u%c", &pg_num, &null_byte);
+                if (!pg_num || null_byte != 0)
                {
                    fprintf(stderr, "Bad key in pool %u PG configuration: %s (must be a number), skipped\n", pool_id, pg_item.first.c_str());
                    continue;
@@ -889,8 +882,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
        pool_id_t pool_id = 0;
        pg_num_t pg_num = 0;
        char null_byte = 0;
-        int scanned = sscanf(key.c_str() + etcd_prefix.length()+12, "%u/%u%c", &pool_id, &pg_num, &null_byte);
-        if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !pg_num)
+        sscanf(key.c_str() + etcd_prefix.length()+12, "%u/%u%c", &pool_id, &pg_num, &null_byte);
+        if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0)
        {
            fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
        }
@@ -944,8 +937,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
        pool_id_t pool_id = 0;
        pg_num_t pg_num = 0;
        char null_byte = 0;
-        int scanned = sscanf(key.c_str() + etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
-        if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !pg_num)
+        sscanf(key.c_str() + etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
+        if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0)
        {
            fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
        }
@@ -1015,8 +1008,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
        uint64_t pool_id = 0;
        uint64_t inode_num = 0;
        char null_byte = 0;
-        int scanned = sscanf(key.c_str() + etcd_prefix.length()+14, "%lu/%lu%c", &pool_id, &inode_num, &null_byte);
-        if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !inode_num || (inode_num >> (64-POOL_ID_BITS)))
+        sscanf(key.c_str() + etcd_prefix.length()+14, "%lu/%lu%c", &pool_id, &inode_num, &null_byte);
+        if (!pool_id || pool_id >= POOL_ID_MAX || !inode_num || (inode_num >> (64-POOL_ID_BITS)) || null_byte != 0)
        {
            fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
        }
--- a/src/fio_cluster.cpp
+++ b/src/fio_cluster.cpp
@@ -24,7 +24,6 @@
 #include <netinet/tcp.h>

 #include <vector>
-#include <string>

 #include "vitastor_c.h"
 #include "fio_headers.h"
@@ -204,15 +203,6 @@ static void watch_callback(void *opaque, long watch)
    bsd->watch = (void*)watch;
 }

-static void opt_push(std::vector<char *> & options, const char *opt, const char *value)
-{
-    if (value)
-    {
-        options.push_back(strdup(opt));
-        options.push_back(strdup(value));
-    }
-}
-
 static int sec_setup(struct thread_data *td)
 {
    sec_options *o = (sec_options*)td->eo;
@@ -264,27 +254,8 @@ static int sec_setup(struct thread_data *td)
    {
        o->inode = 0;
    }
-    std::vector<char *> options;
-    opt_push(options, "config_path", o->config_path);
-    opt_push(options, "etcd_address", o->etcd_host);
-    opt_push(options, "etcd_prefix", o->etcd_prefix);
-    if (o->use_rdma != -1)
-        opt_push(options, "use_rdma", std::to_string(o->use_rdma).c_str());
-    opt_push(options, "rdma_device", o->rdma_device);
-    if (o->rdma_port_num)
-        opt_push(options, "rdma_port_num", std::to_string(o->rdma_port_num).c_str());
-    if (o->rdma_gid_index)
-        opt_push(options, "rdma_gid_index", std::to_string(o->rdma_gid_index).c_str());
-    if (o->rdma_mtu)
-        opt_push(options, "rdma_mtu", std::to_string(o->rdma_mtu).c_str());
-    if (o->cluster_log)
-        opt_push(options, "log_level", std::to_string(o->cluster_log).c_str());
-    // allow writeback caching if -direct is not set
-    opt_push(options, "client_writeback_allowed", td->o.odirect ? "0" : "1");
-    bsd->cli = vitastor_c_create_uring_json((const char**)options.data(), options.size());
-    for (auto opt: options)
-        free(opt);
-    options.clear();
+    bsd->cli = vitastor_c_create_uring(o->config_path, o->etcd_host, o->etcd_prefix,
+        o->use_rdma, o->rdma_device, o->rdma_port_num, o->rdma_gid_index, o->rdma_mtu, o->cluster_log);
    if (o->image)
    {
        bsd->watch = NULL;
--- a/src/fio_sec_osd.cpp
+++ b/src/fio_sec_osd.cpp
@@ -242,7 +242,6 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
            op.sec_rw.version = UINT64_MAX; // last unstable
            op.sec_rw.offset = io->offset % bsd->block_size;
            op.sec_rw.len = io->xfer_buflen;
-            op.sec_rw.attr_len = 0;
        }
        else
        {
@@ -264,7 +263,6 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
            op.sec_rw.version = 0; // assign automatically
            op.sec_rw.offset = io->offset % bsd->block_size;
            op.sec_rw.len = io->xfer_buflen;
-            op.sec_rw.attr_len = 0;
        }
        else
        {
--- a/src/messenger.cpp
+++ b/src/messenger.cpp
@@ -11,9 +11,6 @@

 #include "addr_util.h"
 #include "messenger.h"
-#ifdef WITH_RDMA
-#include "msgr_rdma.h"
-#endif

 void osd_messenger_t::init()
 {
@@ -395,27 +392,24 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
            },
        },
    };
-    json11::Json::object payload;
-    if (this->osd_num)
-    {
-        payload["osd_num"] = this->osd_num;
-    }
 #ifdef WITH_RDMA
    if (rdma_context)
    {
        cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg);
        if (cl->rdma_conn)
        {
-            payload["connect_rdma"] = cl->rdma_conn->addr.to_string();
-            payload["rdma_max_msg"] = cl->rdma_conn->max_msg;
+            json11::Json payload = json11::Json::object {
+                { "connect_rdma", cl->rdma_conn->addr.to_string() },
+                { "rdma_max_msg", cl->rdma_conn->max_msg },
+            };
+            std::string payload_str = payload.dump();
+            op->req.show_conf.json_len = payload_str.size();
+            op->buf = malloc_or_die(payload_str.size());
+            op->iov.push_back(op->buf, payload_str.size());
+            memcpy(op->buf, payload_str.c_str(), payload_str.size());
        }
    }
 #endif
-    std::string payload_str = json11::Json(payload).dump();
-    op->req.show_conf.json_len = payload_str.size();
-    op->buf = malloc_or_die(payload_str.size());
-    op->iov.push_back(op->buf, payload_str.size());
-    memcpy(op->buf, payload_str.c_str(), payload_str.size());
    op->callback = [this, cl](osd_op_t *op)
    {
        std::string json_err;
--- a/src/messenger.h
+++ b/src/messenger.h
@@ -18,6 +18,10 @@
 #include "timerfd_manager.h"
 #include <ringloop.h>

+#ifdef WITH_RDMA
+#include "msgr_rdma.h"
+#endif
+
 #define CL_READ_HDR 1
 #define CL_READ_DATA 2
 #define CL_READ_REPLY_DATA 3
@@ -40,11 +44,6 @@ struct msgr_sendp_t
    int flags;
 };

-#ifdef WITH_RDMA
-struct msgr_rdma_connection_t;
-struct msgr_rdma_context_t;
-#endif
-
 struct osd_client_t
 {
    int refs = 0;
@@ -133,6 +132,7 @@ protected:
    uint64_t rdma_max_msg = 0;
 #endif

+    bool has_send_loop = false;
    std::vector<int> read_ready_clients;
    std::vector<int> write_ready_clients;
    // We don't use ringloop->set_immediate here because we may have no ringloop in client :)
@@ -160,6 +160,7 @@ public:
    std::function<bool(osd_client_t*, json11::Json)> check_config_hook;
    void read_requests();
    void send_replies();
+    bool can_send();
    void accept_connections(int listen_fd);
    ~osd_messenger_t();

--- a/Show More
+++ b/Show More