Change etcd_host to etcd_address in QEMU driver, add etcd_host and etcd aliases for convenience

2021-07-09 02:12:43 +03:00
70 changed files with 550 additions and 3705 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)

 project(vitastor)

-set(VERSION "0.6.6")
+set(VERSION "0.6.4")

 add_subdirectory(src)
--- a/README-ru.md
+++ b/README-ru.md
@@ -40,7 +40,7 @@ Vitastor на данный момент находится в статусе п
 - Драйвер диска для QEMU (собирается вне дерева исходников QEMU)
 - Драйвер диска для утилиты тестирования производительности fio (также собирается вне дерева исходников fio)
 - NBD-прокси для монтирования образов ядром ("блочное устройство в режиме пользователя")
- Утилита для удаления образов/инодов (vitastor-cli rm)
+- Утилита удаления образов/инодов (vitastor-rm)
 - Пакеты для Debian и CentOS
 - Статистика операций ввода/вывода и занятого места в разрезе инодов
 - Именование инодов через хранение их метаданных в etcd
@@ -48,15 +48,12 @@ Vitastor на данный момент находится в статусе п
 - Сглаживание производительности случайной записи в SSD+HDD конфигурациях
 - Поддержка RDMA/RoCEv2 через libibverbs
 - CSI-плагин для Kubernetes
- Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt
- Слияние снапшотов (vitastor-cli {snap-rm,flatten,merge})

 ## Планы развития

- Поддержка удаления снапшотов (слияния слоёв)
 - Более корректные скрипты разметки дисков и автоматического запуска OSD
 - Другие инструменты администрирования
- Плагины для OpenNebula, Proxmox и других облачных систем
+- Плагины для OpenStack, OpenNebula, Proxmox и других облачных систем
 - iSCSI-прокси
 - Более быстрое переключение при отказах
 - Фоновая проверка целостности без контрольных сумм (сверка реплик)
@@ -373,7 +370,7 @@ Vitastor с однопоточной NBD прокси на том же стен
 - Установите gcc и g++ 8.x или новее.
 - Склонируйте данный репозиторий с подмодулями: `git clone https://yourcmc.ru/git/vitalif/vitastor/`.
 - Желательно пересобрать QEMU с патчем, который делает необязательным запуск через LD_PRELOAD.
-  См `patches/qemu-*.*-vitastor.patch` - выберите версию, наиболее близкую вашей версии QEMU.
+  См `qemu-*.*-vitastor.patch` - выберите версию, наиболее близкую вашей версии QEMU.
 - Установите QEMU 3.0 или новее, возьмите исходные коды установленного пакета, начните его пересборку,
  через некоторое время остановите её и скопируйте следующие заголовки:
   - `<qemu>/include` &rarr; `<vitastor>/qemu/include`
@@ -492,10 +489,10 @@ qemu-system-x86_64 -enable-kvm -m 1024

 ### Удалить образ

-Используйте утилиту vitastor-cli rm. Например:
+Используйте утилиту vitastor-rm. Например:

 ```
-vitastor-cli rm --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
+vitastor-rm --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
 ```

 ### NBD
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ breaking changes in the future. However, the following is implemented:
 - QEMU driver (built out-of-tree)
 - Loadable fio engine for benchmarks (also built out-of-tree)
 - NBD proxy for kernel mounts
- Inode removal tool (vitastor-cli rm)
+- Inode removal tool (vitastor-rm)
 - Packaging for Debian and CentOS
 - Per-inode I/O and space usage statistics
 - Inode metadata storage in etcd
@@ -42,15 +42,12 @@ breaking changes in the future. However, the following is implemented:
 - Write throttling to smooth random write workloads in SSD+HDD configurations
 - RDMA/RoCEv2 support via libibverbs
 - CSI plugin for Kubernetes
- Basic OpenStack support: Cinder driver, Nova and libvirt patches
- Snapshot merge tool (vitastor-cli {snap-rm,flatten,merge})

 ## Roadmap

- Snapshot deletion (layer merge) support
 - Better OSD creation and auto-start tools
 - Other administrative tools
- Plugins for OpenNebula, Proxmox and other cloud systems
+- Plugins for OpenStack, OpenNebula, Proxmox and other cloud systems
 - iSCSI proxy
 - Faster failover
 - Scrubbing without checksums (verification of replicas)
@@ -341,7 +338,7 @@ Vitastor with single-thread NBD on the same hardware:
      * For QEMU 2.0+: `<qemu>/qapi-types.h` &rarr; `<vitastor>/qemu/b/qemu/qapi-types.h`
   - `config-host.h` and `qapi` are required because they contain generated headers
 - You can also rebuild QEMU with a patch that makes LD_PRELOAD unnecessary to load vitastor driver.
-  See `patches/qemu-*.*-vitastor.patch`.
+  See `qemu-*.*-vitastor.patch`.
 - Install fio 3.7 or later, get its source and symlink it into `<vitastor>/fio`.
 - Build & install Vitastor with `mkdir build && cd build && cmake .. && make -j8 && make install`.
  Pay attention to the `QEMU_PLUGINDIR` cmake option - it must be set to `qemu-kvm` on RHEL.
--- a/cinder-driver/libvirt-5.0-vitastor.diff
+++ b/cinder-driver/libvirt-5.0-vitastor.diff
--- a/cinder-driver/libvirt-7.0-vitastor.diff
+++ b/cinder-driver/libvirt-7.0-vitastor.diff
--- a/cinder-driver/libvirt-7.5-vitastor.diff
+++ b/cinder-driver/libvirt-7.5-vitastor.diff
--- a/cinder-driver/libvirt-example.xml
+++ b/cinder-driver/libvirt-example.xml
--- a/csi/Makefile
+++ b/csi/Makefile
@@ -1,4 +1,4 @@
-VERSION ?= v0.6.6
+VERSION ?= v0.6.4

 all: build push

--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v0.6.6
+          image: vitalif/vitastor-csi:v0.6.4
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@@ -116,7 +116,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v0.6.6
+          image: vitalif/vitastor-csi:v0.6.4
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/src/config.go
+++ b/csi/src/config.go
@@ -5,7 +5,7 @@ package vitastor

 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "0.6.6"
+    vitastorCSIDriverVersion = "0.6.4"
 )

 // Config struct fills the parameters of request or user input
--- a/csi/src/controllerserver.go
+++ b/csi/src/controllerserver.go
@@ -354,9 +354,9 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
        return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
    }

-    // Delete inode data by invoking vitastor-cli
+    // Delete inode data by invoking vitastor-rm
    args := []string{
-        "rm", "--etcd_address", strings.Join(etcdUrl, ","),
+        "--etcd_address", strings.Join(etcdUrl, ","),
        "--pool", fmt.Sprintf("%d", idx.PoolId),
        "--inode", fmt.Sprintf("%d", idx.Id),
    }
@@ -364,7 +364,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
    {
        args = append(args, "--config_path", ctxVars["configPath"])
    }
-    c := exec.Command("/usr/bin/vitastor-cli", args...)
+    c := exec.Command("/usr/bin/vitastor-rm", args...)
    var stderr bytes.Buffer
    c.Stdout = nil
    c.Stderr = &stderr
@@ -372,7 +372,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
    stderrStr := string(stderr.Bytes())
    if (err != nil)
    {
-        klog.Errorf("vitastor-cli rm failed: %s, status %s\n", stderrStr, err)
+        klog.Errorf("vitastor-rm failed: %s, status %s\n", stderrStr, err)
        return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
    }

--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-vitastor (0.6.6-1) unstable; urgency=medium
+vitastor (0.6.4-1) unstable; urgency=medium

  * RDMA support
  * Bugfixes
--- a/debian/patched-qemu.Dockerfile
+++ b/debian/patched-qemu.Dockerfile
@@ -11,10 +11,6 @@ RUN if [ "$REL" = "buster" ]; then \
        echo 'Package: *' >> /etc/apt/preferences; \
        echo 'Pin: release a=buster-backports' >> /etc/apt/preferences; \
        echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
-        echo >> /etc/apt/preferences; \
-        echo 'Package: libglvnd* libgles* libglx* libgl1 libegl* libopengl* mesa*' >> /etc/apt/preferences; \
-        echo 'Pin: release a=buster-backports' >> /etc/apt/preferences; \
-        echo 'Pin-Priority: 50' >> /etc/apt/preferences; \
    fi; \
    grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
    echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
@@ -24,22 +20,20 @@ RUN apt-get update
 RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
 RUN apt-get -y build-dep qemu
 RUN apt-get -y build-dep fio
-# To build a custom version
-#RUN cp /root/packages/qemu-orig/* /root
 RUN apt-get --download-only source qemu
 RUN apt-get --download-only source fio

-ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch /root/vitastor/patches/
+ADD qemu-5.0-vitastor.patch qemu-5.1-vitastor.patch /root/vitastor/
 RUN set -e; \
    mkdir -p /root/packages/qemu-$REL; \
    rm -rf /root/packages/qemu-$REL/*; \
    cd /root/packages/qemu-$REL; \
    dpkg-source -x /root/qemu*.dsc; \
    if [ -d /root/packages/qemu-$REL/qemu-5.0 ]; then \
-        cp /root/vitastor/patches/qemu-5.0-vitastor.patch /root/packages/qemu-$REL/qemu-5.0/debian/patches; \
+        cp /root/vitastor/qemu-5.0-vitastor.patch /root/packages/qemu-$REL/qemu-5.0/debian/patches; \
        echo qemu-5.0-vitastor.patch >> /root/packages/qemu-$REL/qemu-5.0/debian/patches/series; \
    else \
-        cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
+        cp /root/vitastor/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
        P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
        echo qemu-5.1-vitastor.patch >> $P/series; \
    fi; \
--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@@ -40,10 +40,10 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    cp -r /root/vitastor vitastor-0.6.6; \
-    ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.6/qemu; \
-    ln -s /root/fio-build/fio-*/ vitastor-0.6.6/fio; \
-    cd vitastor-0.6.6; \
+    cp -r /root/vitastor vitastor-0.6.4; \
+    ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.4/qemu; \
+    ln -s /root/fio-build/fio-*/ vitastor-0.6.4/fio; \
+    cd vitastor-0.6.4; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    QEMU=$(head -n1 qemu/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    sh copy-qemu-includes.sh; \
@@ -59,8 +59,8 @@ RUN set -e -x; \
    echo "dep:fio=$FIO" > debian/substvars; \
    echo "dep:qemu=$QEMU" >> debian/substvars; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.6.orig.tar.xz vitastor-0.6.6; \
-    cd vitastor-0.6.6; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.4.orig.tar.xz vitastor-0.6.4; \
+    cd vitastor-0.6.4; \
    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,9 +0,0 @@
-# Build Docker image with Vitastor packages
-
-FROM debian:bullseye
-
-ADD vitastor.list /etc/apt/sources.list.d
-ADD vitastor.gpg /etc/apt/trusted.gpg.d
-ADD vitastor.pref /etc/apt/preferences.d
-ADD apt.conf /etc/apt/
-RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean
--- a/docker/apt.conf
+++ b/docker/apt.conf
@@ -1 +0,0 @@
-APT::Install-Recommends false;
--- a/docker/vitastor.gpg
+++ b/docker/vitastor.gpg
--- a/docker/vitastor.list
+++ b/docker/vitastor.list
@@ -1 +0,0 @@
-deb http://vitastor.io/debian bullseye main
--- a/docker/vitastor.pref
+++ b/docker/vitastor.pref
@@ -1,3 +0,0 @@
-Package: *
-Pin: origin "vitastor.io"
-Pin-Priority: 1000
--- a/patches/cinder-vitastor.py
+++ b/patches/cinder-vitastor.py
@@ -1,948 +0,0 @@
-# Vitastor Driver for OpenStack Cinder
-#
-# --------------------------------------------
-# Install as cinder/volume/drivers/vitastor.py
-# --------------------------------------------
-#
-# Copyright 2020 Vitaliy Filippov
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-"""Cinder Vitastor Driver"""
-
-import binascii
-import base64
-import errno
-import json
-import math
-import os
-import tempfile
-
-from castellan import key_manager
-from oslo_config import cfg
-from oslo_log import log as logging
-from oslo_service import loopingcall
-from oslo_concurrency import processutils
-from oslo_utils import encodeutils
-from oslo_utils import excutils
-from oslo_utils import fileutils
-from oslo_utils import units
-import six
-from six.moves.urllib import request
-
-from cinder import exception
-from cinder.i18n import _
-from cinder.image import image_utils
-from cinder import interface
-from cinder import objects
-from cinder.objects import fields
-from cinder import utils
-from cinder.volume import configuration
-from cinder.volume import driver
-from cinder.volume import volume_utils
-
-VERSION = '0.6.6'
-
-LOG = logging.getLogger(__name__)
-
-VITASTOR_OPTS = [
-    cfg.StrOpt(
-        'vitastor_config_path',
-        default='/etc/vitastor/vitastor.conf',
-        help='Vitastor configuration file path'
-    ),
-    cfg.StrOpt(
-        'vitastor_etcd_address',
-        default='',
-        help='Vitastor etcd address(es)'),
-    cfg.StrOpt(
-        'vitastor_etcd_prefix',
-        default='/vitastor',
-        help='Vitastor etcd prefix'
-    ),
-    cfg.StrOpt(
-        'vitastor_pool_id',
-        default='',
-        help='Vitastor pool ID to use for volumes'
-    ),
-    # FIXME exclusive_cinder_pool ?
-]
-
-CONF = cfg.CONF
-CONF.register_opts(VITASTOR_OPTS, group = configuration.SHARED_CONF_GROUP)
-
-class VitastorDriverException(exception.VolumeDriverException):
-    message = _("Vitastor Cinder driver failure: %(reason)s")
-
-@interface.volumedriver
-class VitastorDriver(driver.CloneableImageVD,
-    driver.ManageableVD, driver.ManageableSnapshotsVD,
-    driver.BaseVD):
-    """Implements Vitastor volume commands."""
-
-    cfg = {}
-    _etcd_urls = []
-
-    def __init__(self, active_backend_id = None, *args, **kwargs):
-        super(VitastorDriver, self).__init__(*args, **kwargs)
-        self.configuration.append_config_values(VITASTOR_OPTS)
-
-    @classmethod
-    def get_driver_options(cls):
-        additional_opts = cls._get_oslo_driver_opts(
-            'reserved_percentage',
-            'max_over_subscription_ratio',
-            'volume_dd_blocksize'
-        )
-        return VITASTOR_OPTS + additional_opts
-
-    def do_setup(self, context):
-        """Performs initialization steps that could raise exceptions."""
-        super(VitastorDriver, self).do_setup(context)
-        # Make sure configuration is in UTF-8
-        for attr in [ 'config_path', 'etcd_address', 'etcd_prefix', 'pool_id' ]:
-            val = self.configuration.safe_get('vitastor_'+attr)
-            if val is not None:
-                self.cfg[attr] = utils.convert_str(val)
-        self.cfg = self._load_config(self.cfg)
-
-    def _load_config(self, cfg):
-        # Try to load configuration file
-        try:
-            f = open(cfg['config_path'] or '/etc/vitastor/vitastor.conf')
-            conf = json.loads(f.read())
-            f.close()
-            for k in conf:
-                cfg[k] = cfg.get(k, conf[k])
-        except:
-            pass
-        if isinstance(cfg['etcd_address'], str):
-            cfg['etcd_address'] = cfg['etcd_address'].split(',')
-        # Sanitize etcd URLs
-        for i, etcd_url in enumerate(cfg['etcd_address']):
-            ssl = False
-            if etcd_url.lower().startswith('http://'):
-                etcd_url = etcd_url[7:]
-            elif etcd_url.lower().startswith('https://'):
-                etcd_url = etcd_url[8:]
-                ssl = True
-            if etcd_url.find('/') < 0:
-                etcd_url += '/v3'
-            if ssl:
-                etcd_url = 'https://'+etcd_url
-            else:
-                etcd_url = 'http://'+etcd_url
-            cfg['etcd_address'][i] = etcd_url
-        return cfg
-
-    def check_for_setup_error(self):
-        """Returns an error if prerequisites aren't met."""
-
-    def _encode_etcd_key(self, key):
-        if not isinstance(key, bytes):
-            key = str(key).encode('utf-8')
-        return base64.b64encode(self.cfg['etcd_prefix'].encode('utf-8')+b'/'+key).decode('utf-8')
-
-    def _encode_etcd_value(self, value):
-        if not isinstance(value, bytes):
-            value = str(value).encode('utf-8')
-        return base64.b64encode(value).decode('utf-8')
-
-    def _encode_etcd_requests(self, obj):
-        for v in obj:
-            for rt in v:
-                if 'key' in v[rt]:
-                    v[rt]['key'] = self._encode_etcd_key(v[rt]['key'])
-                if 'range_end' in v[rt]:
-                    v[rt]['range_end'] = self._encode_etcd_key(v[rt]['range_end'])
-                if 'value' in v[rt]:
-                    v[rt]['value'] = self._encode_etcd_value(v[rt]['value'])
-
-    def _etcd_txn(self, params):
-        if 'compare' in params:
-            for v in params['compare']:
-                if 'key' in v:
-                    v['key'] = self._encode_etcd_key(v['key'])
-        if 'failure' in params:
-            self._encode_etcd_requests(params['failure'])
-        if 'success' in params:
-            self._encode_etcd_requests(params['success'])
-        body = json.dumps(params).encode('utf-8')
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        err = None
-        for etcd_url in self.cfg['etcd_address']:
-            try:
-                resp = request.urlopen(request.Request(etcd_url+'/kv/txn', body, headers), timeout = 5)
-                data = json.loads(resp.read())
-                if 'responses' not in data:
-                    data['responses'] = []
-                for i, resp in enumerate(data['responses']):
-                    if 'response_range' in resp:
-                        if 'kvs' not in resp['response_range']:
-                            resp['response_range']['kvs'] = []
-                        for kv in resp['response_range']['kvs']:
-                            kv['key'] = base64.b64decode(kv['key'].encode('utf-8')).decode('utf-8')
-                            if kv['key'].startswith(self.cfg['etcd_prefix']+'/'):
-                                kv['key'] = kv['key'][len(self.cfg['etcd_prefix'])+1 : ]
-                            kv['value'] = json.loads(base64.b64decode(kv['value'].encode('utf-8')))
-                    if len(resp.keys()) != 1:
-                        LOG.exception('unknown responses['+str(i)+'] format: '+json.dumps(resp))
-                    else:
-                        resp = data['responses'][i] = resp[list(resp.keys())[0]]
-                return data
-            except Exception as e:
-                LOG.exception('error calling etcd transaction: '+body.decode('utf-8')+'\nerror: '+str(e))
-                err = e
-        raise err
-
-    def _etcd_foreach(self, prefix, add_fn):
-        total = 0
-        batch = 1000
-        begin = prefix+'/'
-        while True:
-            resp = self._etcd_txn({ 'success': [
-                { 'request_range': {
-                    'key': begin,
-                    'range_end': prefix+'0',
-                    'limit': batch+1,
-                } },
-            ] })
-            i = 0
-            while i < batch and i < len(resp['responses'][0]['kvs']):
-                kv = resp['responses'][0]['kvs'][i]
-                add_fn(kv)
-                i += 1
-            if len(resp['responses'][0]['kvs']) <= batch:
-                break
-            begin = resp['responses'][0]['kvs'][batch]['key']
-        return total
-
-    def _update_volume_stats(self):
-        location_info = json.dumps({
-            'config': self.configuration.vitastor_config_path,
-            'etcd_address': self.configuration.vitastor_etcd_address,
-            'etcd_prefix': self.configuration.vitastor_etcd_prefix,
-            'pool_id': self.configuration.vitastor_pool_id,
-        })
-
-        stats = {
-            'vendor_name': 'Vitastor',
-            'driver_version': self.VERSION,
-            'storage_protocol': 'vitastor',
-            'total_capacity_gb': 'unknown',
-            'free_capacity_gb': 'unknown',
-            # FIXME check if safe_get is required
-            'reserved_percentage': self.configuration.safe_get('reserved_percentage'),
-            'multiattach': True,
-            'thin_provisioning_support': True,
-            'max_over_subscription_ratio': self.configuration.safe_get('max_over_subscription_ratio'),
-            'location_info': location_info,
-            'backend_state': 'down',
-            'volume_backend_name': self.configuration.safe_get('volume_backend_name') or 'vitastor',
-            'replication_enabled': False,
-        }
-
-        try:
-            pool_stats = self._etcd_txn({ 'success': [
-                { 'request_range': { 'key': 'pool/stats/'+str(self.cfg['pool_id']) } }
-            ] })
-            total_provisioned = 0
-            def add_total(kv):
-                nonlocal total_provisioned
-                if kv['key'].find('@') >= 0:
-                    total_provisioned += kv['value']['size']
-            self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']), lambda kv: add_total(kv))
-            stats['provisioned_capacity_gb'] = round(total_provisioned/1024.0/1024.0/1024.0, 2)
-            pool_stats = pool_stats['responses'][0]['kvs']
-            if len(pool_stats):
-                pool_stats = pool_stats[0]
-                stats['free_capacity_gb'] = round(1024.0*(pool_stats['total_raw_tb']-pool_stats['used_raw_tb'])/pool_stats['raw_to_usable'], 2)
-                stats['total_capacity_gb'] = round(1024.0*pool_stats['total_raw_tb'], 2)
-            stats['backend_state'] = 'up'
-        except Exception as e:
-            # just log and return unknown capacities
-            LOG.exception('error getting vitastor pool stats: '+str(e))
-
-        self._stats = stats
-
-    def _next_id(self, resp):
-        if len(resp['kvs']) == 0:
-            return (1, 0)
-        else:
-            return (1 + resp['kvs'][0]['value'], resp['kvs'][0]['mod_revision'])
-
-    def create_volume(self, volume):
-        """Creates a logical volume."""
-
-        size = int(volume.size) * units.Gi
-        # FIXME: Check if convert_str is really required
-        vol_name = utils.convert_str(volume.name)
-        if vol_name.find('@') >= 0 or vol_name.find('/') >= 0:
-            raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
-
-        LOG.debug("creating volume '%s'", vol_name)
-
-        self._create_image(vol_name, { 'size': size })
-
-        if volume.encryption_key_id:
-            self._create_encrypted_volume(volume, volume.obj_context)
-
-        volume_update = {}
-        return volume_update
-
-    def _create_encrypted_volume(self, volume, context):
-        """Create a new LUKS encrypted image directly in Vitastor."""
-        vol_name = utils.convert_str(volume.name)
-        f, opts = self._encrypt_opts(volume, context)
-        # FIXME: Check if it works at all :-)
-        self._execute(
-            'qemu-img', 'convert', '-f', 'luks', *opts,
-            'vitastor:image='+vol_name.replace(':', '\\:')+self._qemu_args(),
-            '%sM' % (volume.size * 1024)
-        )
-        f.close()
-
-    def _encrypt_opts(self, volume, context):
-        encryption = volume_utils.check_encryption_provider(self.db, volume, context)
-        # Fetch the key associated with the volume and decode the passphrase
-        keymgr = key_manager.API(CONF)
-        key = keymgr.get(context, encryption['encryption_key_id'])
-        passphrase = binascii.hexlify(key.get_encoded()).decode('utf-8')
-        # Decode the dm-crypt style cipher spec into something qemu-img can use
-        cipher_spec = image_utils.decode_cipher(encryption['cipher'], encryption['key_size'])
-        tmp_dir = volume_utils.image_conversion_dir()
-        f = tempfile.NamedTemporaryFile(prefix = 'luks_', dir = tmp_dir)
-        f.write(passphrase)
-        f.flush()
-        return (f, [
-            '--object', 'secret,id=luks_sec,format=raw,file=%(passfile)s' % {'passfile': f.name},
-            '-o', 'key-secret=luks_sec,cipher-alg=%(cipher_alg)s,cipher-mode=%(cipher_mode)s,ivgen-alg=%(ivgen_alg)s' % cipher_spec,
-        ])
-
-    def create_snapshot(self, snapshot):
-        """Creates a volume snapshot."""
-
-        vol_name = utils.convert_str(snapshot.volume_name)
-        snap_name = utils.convert_str(snapshot.name)
-        if snap_name.find('@') >= 0 or snap_name.find('/') >= 0:
-            raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
-        self._create_snapshot(vol_name, vol_name+'@'+snap_name)
-
-    def snapshot_revert_use_temp_snapshot(self):
-        """Disable the use of a temporary snapshot on revert."""
-        return False
-
-    def revert_to_snapshot(self, context, volume, snapshot):
-        """Revert a volume to a given snapshot."""
-
-        # FIXME Delete the image, then recreate it from the snapshot
-
-    def delete_snapshot(self, snapshot):
-        """Deletes a snapshot."""
-
-        vol_name = utils.convert_str(snapshot.volume_name)
-        snap_name = utils.convert_str(snapshot.name)
-
-        # Find the snapshot
-        resp = self._etcd_txn({ 'success': [
-            { 'request_range': { 'key': 'index/image/'+vol_name+'@'+snap_name } },
-        ] })
-        if len(resp['responses'][0]['kvs']) == 0:
-            raise exception.SnapshotNotFound(snapshot_id = snap_name)
-        inode_id = int(resp['responses'][0]['kvs'][0]['value']['id'])
-        pool_id = int(resp['responses'][0]['kvs'][0]['value']['pool_id'])
-        parents = {}
-        parents[(pool_id << 48) | (inode_id & 0xffffffffffff)] = True
-
-        # Check if there are child volumes
-        children = self._child_count(parents)
-        if children > 0:
-            raise exception.SnapshotIsBusy(snapshot_name = snap_name)
-
-        # FIXME: We can't delete snapshots because we can't merge layers yet
-        raise exception.VolumeBackendAPIException(data = 'Snapshot delete (layer merge) is not implemented yet')
-
-    def _child_count(self, parents):
-        children = 0
-        def add_child(kv):
-            nonlocal children
-            children += self._check_parent(kv, parents)
-        self._etcd_foreach('config/inode', lambda kv: add_child(kv))
-        return children
-
-    def _check_parent(self, kv, parents):
-        if 'parent_id' not in kv['value']:
-            return 0
-        parent_id = kv['value']['parent_id']
-        _, _, pool_id, inode_id = kv['key'].split('/')
-        parent_pool_id = pool_id
-        if 'parent_pool_id' in kv['value'] and kv['value']['parent_pool_id']:
-            parent_pool_id = kv['value']['parent_pool_id']
-        inode = (int(pool_id) << 48) | (int(inode_id) & 0xffffffffffff)
-        parent = (int(parent_pool_id) << 48) | (int(parent_id) & 0xffffffffffff)
-        if parent in parents and inode not in parents:
-            return 1
-        return 0
-
-    def create_cloned_volume(self, volume, src_vref):
-        """Create a cloned volume from another volume."""
-
-        size = int(volume.size) * units.Gi
-        src_name = utils.convert_str(src_vref.name)
-        dest_name = utils.convert_str(volume.name)
-        if dest_name.find('@') >= 0 or dest_name.find('/') >= 0:
-            raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
-
-        # FIXME Do full copy if requested (cfg.disable_clone)
-
-        if src_vref.admin_metadata.get('readonly') == 'True':
-            # source volume is a volume-image cache entry or other readonly volume
-            # clone without intermediate snapshot
-            src = self._get_image(src_name)
-            LOG.debug("creating image '%s' from '%s'", dest_name, src_name)
-            new_cfg = self._create_image(dest_name, {
-                'size': size,
-                'parent_id': src['idx']['id'],
-                'parent_pool_id': src['idx']['pool_id'],
-            })
-            return {}
-
-        clone_snap = "%s@%s.clone_snap" % (src_name, dest_name)
-        make_img = True
-        if (volume.display_name and
-            volume.display_name.startswith('image-') and
-            src_vref.project_id != volume.project_id):
-            # idiotic openstack creates image-volume cache entries
-            # as clones of normal VM volumes... :-X prevent it :-D
-            clone_snap = dest_name
-            make_img = False
-
-        LOG.debug("creating layer '%s' under '%s'", clone_snap, src_name)
-        new_cfg = self._create_snapshot(src_name, clone_snap, True)
-        if make_img:
-            # Then create a clone from it
-            new_cfg = self._create_image(dest_name, {
-                'size': size,
-                'parent_id': new_cfg['parent_id'],
-                'parent_pool_id': new_cfg['parent_pool_id'],
-            })
-
-        return {}
-
-    def create_volume_from_snapshot(self, volume, snapshot):
-        """Creates a cloned volume from an existing snapshot."""
-
-        vol_name = utils.convert_str(volume.name)
-        snap_name = utils.convert_str(snapshot.name)
-
-        snap = self._get_image(vol_name+'@'+snap_name)
-        if not snap:
-            raise exception.SnapshotNotFound(snapshot_id = snap_name)
-        snap_inode_id = int(resp['responses'][0]['kvs'][0]['value']['id'])
-        snap_pool_id = int(resp['responses'][0]['kvs'][0]['value']['pool_id'])
-
-        size = snap['cfg']['size']
-        if int(volume.size):
-            size = int(volume.size) * units.Gi
-        new_cfg = self._create_image(vol_name, {
-            'size': size,
-            'parent_id': snap['idx']['id'],
-            'parent_pool_id': snap['idx']['pool_id'],
-        })
-
-        return {}
-
-    def _vitastor_args(self):
-        args = []
-        for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
-            v = self.configuration.safe_get('vitastor_'+k)
-            if v:
-                args.extend(['--'+k, v])
-        return args
-
-    def _qemu_args(self):
-        args = ''
-        for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
-            v = self.configuration.safe_get('vitastor_'+k)
-            kk = k
-            if kk == 'etcd_address':
-                # FIXME use etcd_address in qemu driver
-                kk = 'etcd_host'
-            if v:
-                args += ':'+kk+'='+v.replace(':', '\\:')
-        return args
-
-    def delete_volume(self, volume):
-        """Deletes a logical volume."""
-
-        vol_name = utils.convert_str(volume.name)
-
-        # Find the volume and all its snapshots
-        range_end = b'index/image/' + vol_name.encode('utf-8')
-        range_end = range_end[0 : len(range_end)-1] + six.int2byte(range_end[len(range_end)-1] + 1)
-        resp = self._etcd_txn({ 'success': [
-            { 'request_range': { 'key': 'index/image/'+vol_name, 'range_end': range_end } },
-        ] })
-        if len(resp['responses'][0]['kvs']) == 0:
-            # already deleted
-            LOG.info("volume %s no longer exists in backend", vol_name)
-            return
-        layers = resp['responses'][0]['kvs']
-        layer_ids = {}
-        for kv in layers:
-            inode_id = int(kv['value']['id'])
-            pool_id = int(kv['value']['pool_id'])
-            inode_pool_id = (pool_id << 48) | (inode_id & 0xffffffffffff)
-            layer_ids[inode_pool_id] = True
-
-        # Check if the volume has clones and raise 'busy' if so
-        children = self._child_count(layer_ids)
-        if children > 0:
-            raise exception.VolumeIsBusy(volume_name = vol_name)
-
-        # Clear data
-        for kv in layers:
-            args = [
-                'vitastor-cli', 'rm', '--pool', str(kv['value']['pool_id']),
-                '--inode', str(kv['value']['id']), '--progress', '0',
-                *(self._vitastor_args())
-            ]
-            try:
-                self._execute(*args)
-            except processutils.ProcessExecutionError as exc:
-                LOG.error("Failed to remove layer "+kv['key']+": "+exc)
-                raise exception.VolumeBackendAPIException(data = exc.stderr)
-
-        # Delete all layers from etcd
-        requests = []
-        for kv in layers:
-            requests.append({ 'request_delete_range': { 'key': kv['key'] } })
-            requests.append({ 'request_delete_range': { 'key': 'config/inode/'+str(kv['value']['pool_id'])+'/'+str(kv['value']['id']) } })
-        self._etcd_txn({ 'success': requests })
-
-    def retype(self, context, volume, new_type, diff, host):
-        """Change extra type specifications for a volume."""
-
-        # FIXME Maybe (in the future) support multiple pools as different types
-        return True, {}
-
-    def ensure_export(self, context, volume):
-        """Synchronously recreates an export for a logical volume."""
-        pass
-
-    def create_export(self, context, volume, connector):
-        """Exports the volume."""
-        pass
-
-    def remove_export(self, context, volume):
-        """Removes an export for a logical volume."""
-        pass
-
-    def _create_image(self, vol_name, cfg):
-        pool_s = str(self.cfg['pool_id'])
-        image_id = 0
-        while image_id == 0:
-            # check if the image already exists and find a free ID
-            resp = self._etcd_txn({ 'success': [
-                { 'request_range': { 'key': 'index/image/'+vol_name } },
-                { 'request_range': { 'key': 'index/maxid/'+pool_s } },
-            ] })
-            if len(resp['responses'][0]['kvs']) > 0:
-                # already exists
-                raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' already exists')
-            image_id, id_mod = self._next_id(resp['responses'][1])
-            # try to create the image
-            resp = self._etcd_txn({ 'compare': [
-                { 'target': 'MOD', 'mod_revision': id_mod, 'key': 'index/maxid/'+pool_s },
-                { 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+vol_name },
-                { 'target': 'VERSION', 'version': 0, 'key': 'config/inode/'+pool_s+'/'+str(image_id) },
-            ], 'success': [
-                { 'request_put': { 'key': 'index/maxid/'+pool_s, 'value': image_id } },
-                { 'request_put': { 'key': 'index/image/'+vol_name, 'value': json.dumps({
-                    'id': image_id, 'pool_id': self.cfg['pool_id']
-                }) } },
-                { 'request_put': { 'key': 'config/inode/'+pool_s+'/'+str(image_id), 'value': json.dumps({
-                    **cfg, 'name': vol_name,
-                }) } },
-            ] })
-            if not resp.get('succeeded'):
-                # repeat
-                image_id = 0
-
-    def _create_snapshot(self, vol_name, snap_vol_name, allow_existing = False):
-        while True:
-            # check if the image already exists and snapshot doesn't
-            resp = self._etcd_txn({ 'success': [
-                { 'request_range': { 'key': 'index/image/'+vol_name } },
-                { 'request_range': { 'key': 'index/image/'+snap_vol_name } },
-            ] })
-            if len(resp['responses'][0]['kvs']) == 0:
-                raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
-            if len(resp['responses'][1]['kvs']) > 0:
-                if allow_existing:
-                    snap_idx = resp['responses'][1]['kvs'][0]['value']
-                    resp = self._etcd_txn({ 'success': [
-                        { 'request_range': { 'key': 'config/inode/'+str(snap_idx['pool_id'])+'/'+str(snap_idx['id']) } },
-                    ] })
-                    if len(resp['responses'][0]['kvs']) == 0:
-                        raise exception.VolumeBackendAPIException(data =
-                            'Volume '+snap_vol_name+' is already indexed, but does not exist'
-                        )
-                    return resp['responses'][0]['kvs'][0]['value']
-                raise exception.VolumeBackendAPIException(
-                    data = 'Volume '+snap_vol_name+' already exists'
-                )
-            vol_idx = resp['responses'][0]['kvs'][0]['value']
-            vol_idx_mod = resp['responses'][0]['kvs'][0]['mod_revision']
-            # get image inode config and find a new ID
-            resp = self._etcd_txn({ 'success': [
-                { 'request_range': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) } },
-                { 'request_range': { 'key': 'index/maxid/'+str(self.cfg['pool_id']) } },
-            ] })
-            if len(resp['responses'][0]['kvs']) == 0:
-                raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
-            vol_cfg = resp['responses'][0]['kvs'][0]['value']
-            vol_mod = resp['responses'][0]['kvs'][0]['mod_revision']
-            new_id, id_mod = self._next_id(resp['responses'][1])
-            # try to redirect image to the new inode
-            new_cfg = {
-                **vol_cfg, 'name': vol_name, 'parent_id': vol_idx['id'], 'parent_pool_id': vol_idx['pool_id']
-            }
-            resp = self._etcd_txn({ 'compare': [
-                { 'target': 'MOD', 'mod_revision': vol_idx_mod, 'key': 'index/image/'+vol_name },
-                { 'target': 'MOD', 'mod_revision': vol_mod, 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) },
-                { 'target': 'MOD', 'mod_revision': id_mod, 'key': 'index/maxid/'+str(self.cfg['pool_id']) },
-                { 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+snap_vol_name },
-                { 'target': 'VERSION', 'version': 0, 'key': 'config/inode/'+str(self.cfg['pool_id'])+'/'+str(new_id) },
-            ], 'success': [
-                { 'request_put': { 'key': 'index/maxid/'+str(self.cfg['pool_id']), 'value': new_id } },
-                { 'request_put': { 'key': 'index/image/'+vol_name, 'value': json.dumps({
-                    'id': new_id, 'pool_id': self.cfg['pool_id']
-                }) } },
-                { 'request_put': { 'key': 'config/inode/'+str(self.cfg['pool_id'])+'/'+str(new_id), 'value': json.dumps(new_cfg) } },
-                { 'request_put': { 'key': 'index/image/'+snap_vol_name, 'value': json.dumps({
-                    'id': vol_idx['id'], 'pool_id': vol_idx['pool_id']
-                }) } },
-                { 'request_put': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']), 'value': json.dumps({
-                    **vol_cfg, 'name': snap_vol_name, 'readonly': True
-                }) } }
-            ] })
-            if resp.get('succeeded'):
-                return new_cfg
-
-    def initialize_connection(self, volume, connector):
-        data = {
-            'driver_volume_type': 'vitastor',
-            'data': {
-                'config_path': self.configuration.vitastor_config_path,
-                'etcd_address': self.configuration.vitastor_etcd_address,
-                'etcd_prefix': self.configuration.vitastor_etcd_prefix,
-                'name': volume.name,
-                'logical_block_size': 512,
-                'physical_block_size': 4096,
-            }
-        }
-        LOG.debug('connection data: %s', data)
-        return data
-
-    def terminate_connection(self, volume, connector, **kwargs):
-        pass
-
-    def clone_image(self, context, volume, image_location, image_meta, image_service):
-        if image_location:
-            # Note: image_location[0] is glance image direct_url.
-            # image_location[1] contains the list of all locations (including
-            # direct_url) or None if show_multiple_locations is False in
-            # glance configuration.
-            if image_location[1]:
-                url_locations = [location['url'] for location in image_location[1]]
-            else:
-                url_locations = [image_location[0]]
-            # iterate all locations to look for a cloneable one.
-            for url_location in url_locations:
-                if url_location and url_location.startswith('cinder://'):
-                    # The idea is to use cinder://<volume-id> Glance volumes as base images
-                    base_vol = self.db.volume_get(context, url_location[len('cinder://') : ])
-                    if not base_vol or base_vol.volume_type_id != volume.volume_type_id:
-                        continue
-                    size = int(volume.size) * units.Gi
-                    dest_name = utils.convert_str(volume.name)
-                    # Find or create the base snapshot
-                    snap_cfg = self._create_snapshot(base_vol.name, base_vol.name+'@.clone_snap', True)
-                    # Then create a clone from it
-                    new_cfg = self._create_image(dest_name, {
-                        'size': size,
-                        'parent_id': snap_cfg['parent_id'],
-                        'parent_pool_id': snap_cfg['parent_pool_id'],
-                    })
-                    return ({}, True)
-        return ({}, False)
-
-    def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id):
-        self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True)
-
-    def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False):
-        tmp_dir = volume_utils.image_conversion_dir()
-        with tempfile.NamedTemporaryFile(dir = tmp_dir) as tmp:
-            image_utils.fetch_to_raw(
-                context, image_service, image_id, tmp.name,
-                self.configuration.volume_dd_blocksize, size = volume.size
-            )
-            out_format = [ '-O', 'raw' ]
-            if encrypted:
-                key_file, opts = self._encrypt_opts(volume, context)
-                out_format = [ '-O', 'luks', *opts ]
-            dest_name = utils.convert_str(volume.name)
-            self._try_execute(
-                'qemu-img', 'convert', '-f', 'raw', tmp.name, *out_format,
-                'vitastor:image='+dest_name.replace(':', '\\:')+self._qemu_args()
-            )
-            if encrypted:
-                key_file.close()
-
-    def copy_volume_to_image(self, context, volume, image_service, image_meta):
-        tmp_dir = volume_utils.image_conversion_dir()
-        tmp_file = os.path.join(tmp_dir, volume.name + '-' + image_meta['id'])
-        with fileutils.remove_path_on_error(tmp_file):
-            vol_name = utils.convert_str(volume.name)
-            self._try_execute(
-                'qemu-img', 'convert', '-f', 'raw',
-                'vitastor:image='+vol_name.replace(':', '\\:')+self._qemu_args(),
-                '-O', 'raw', tmp_file
-            )
-            # FIXME: Copy directly if the destination image is also in Vitastor
-            volume_utils.upload_volume(context, image_service, image_meta, tmp_file, volume)
-        os.unlink(tmp_file)
-
-    def _get_image(self, vol_name):
-        # find the image
-        resp = self._etcd_txn({ 'success': [
-            { 'request_range': { 'key': 'index/image/'+vol_name } },
-        ] })
-        if len(resp['responses'][0]['kvs']) == 0:
-            return None
-        vol_idx = resp['responses'][0]['kvs'][0]['value']
-        vol_idx_mod = resp['responses'][0]['kvs'][0]['mod_revision']
-        # get image inode config
-        resp = self._etcd_txn({ 'success': [
-            { 'request_range': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) } },
-        ] })
-        if len(resp['responses'][0]['kvs']) == 0:
-            return None
-        vol_cfg = resp['responses'][0]['kvs'][0]['value']
-        vol_cfg_mod = resp['responses'][0]['kvs'][0]['mod_revision']
-        return {
-            'cfg': vol_cfg,
-            'cfg_mod': vol_cfg_mod,
-            'idx': vol_idx,
-            'idx_mod': vol_idx_mod,
-        }
-
-    def extend_volume(self, volume, new_size):
-        """Extend an existing volume."""
-        vol_name = utils.convert_str(volume.name)
-        while True:
-            vol = self._get_image(vol_name)
-            if not vol:
-                raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
-            # change size
-            size = int(new_size) * units.Gi
-            if size == vol['cfg']['size']:
-                break
-            resp = self._etcd_txn({ 'compare': [ {
-                'target': 'MOD',
-                'mod_revision': vol['cfg_mod'],
-                'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
-            } ], 'success': [
-                { 'request_put': {
-                    'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
-                    'value': json.dumps({ **vol['cfg'], 'size': size }),
-                } },
-            ] })
-            if resp.get('succeeded'):
-                break
-        LOG.debug(
-            "Extend volume from %(old_size)s GB to %(new_size)s GB.",
-            {'old_size': volume.size, 'new_size': new_size}
-        )
-
-    def _add_manageable_volume(self, kv, manageable_volumes, cinder_ids):
-        cfg = kv['value']
-        if kv['key'].find('@') >= 0:
-            # snapshot
-            return
-        image_id = volume_utils.extract_id_from_volume_name(cfg['name'])
-        image_info = {
-            'reference': {'source-name': image_name},
-            'size': int(math.ceil(float(cfg['size']) / units.Gi)),
-            'cinder_id': None,
-            'extra_info': None,
-        }
-        if image_id in cinder_ids:
-            image_info['cinder_id'] = image_id
-            image_info['safe_to_manage'] = False
-            image_info['reason_not_safe'] = 'already managed'
-        else:
-            image_info['safe_to_manage'] = True
-            image_info['reason_not_safe'] = None
-        manageable_volumes.append(image_info)
-
-    def get_manageable_volumes(self, cinder_volumes, marker, limit, offset, sort_keys, sort_dirs):
-        manageable_volumes = []
-        cinder_ids = [resource['id'] for resource in cinder_volumes]
-
-        # List all volumes
-        # FIXME: It's possible to use pagination in our case, but.. do we want it?
-        self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']),
-            lambda kv: self._add_manageable_volume(kv, manageable_volumes, cinder_ids))
-
-        return volume_utils.paginate_entries_list(
-            manageable_volumes, marker, limit, offset, sort_keys, sort_dirs)
-
-    def _get_existing_name(existing_ref):
-        if not isinstance(existing_ref, dict):
-            existing_ref = {"source-name": existing_ref}
-        if 'source-name' not in existing_ref:
-            reason = _('Reference must contain source-name element.')
-            raise exception.ManageExistingInvalidReference(existing_ref=existing_ref, reason=reason)
-        src_name = utils.convert_str(existing_ref['source-name'])
-        if not src_name:
-            reason = _('Reference must contain source-name element.')
-            raise exception.ManageExistingInvalidReference(existing_ref=existing_ref, reason=reason)
-        return src_name
-
-    def manage_existing_get_size(self, volume, existing_ref):
-        """Return size of an existing image for manage_existing.
-
-        :param volume: volume ref info to be set
-        :param existing_ref: {'source-name': <image name>}
-        """
-        src_name = self._get_existing_name(existing_ref)
-        vol = self._get_image(src_name)
-        if not vol:
-            raise exception.VolumeBackendAPIException(data = 'Volume '+src_name+' does not exist')
-        return int(math.ceil(float(vol['cfg']['size']) / units.Gi))
-
-    def manage_existing(self, volume, existing_ref):
-        """Manages an existing image.
-
-        Renames the image name to match the expected name for the volume.
-
-        :param volume: volume ref info to be set
-        :param existing_ref: {'source-name': <image name>}
-        """
-        from_name = self._get_existing_name(existing_ref)
-        to_name = utils.convert_str(volume.name)
-        self._rename(from_name, to_name)
-
-    def _rename(self, from_name, to_name):
-        while True:
-            vol = self._get_image(from_name)
-            if not vol:
-                raise exception.VolumeBackendAPIException(data = 'Volume '+from_name+' does not exist')
-            to = self._get_image(to_name)
-            if to:
-                raise exception.VolumeBackendAPIException(data = 'Volume '+to_name+' already exists')
-            resp = self._etcd_txn({ 'compare': [
-                { 'target': 'MOD', 'mod_revision': vol['idx_mod'], 'key': 'index/image/'+vol['cfg']['name'] },
-                { 'target': 'MOD', 'mod_revision': vol['cfg_mod'], 'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']) },
-                { 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+to_name },
-            ], 'success': [
-                { 'request_delete_range': { 'key': 'index/image/'+vol['cfg']['name'] } },
-                { 'request_put': { 'key': 'index/image/'+to_name, 'value': json.dumps(vol['idx']) } },
-                { 'request_put': { 'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
-                    'value': json.dumps({ **vol['cfg'], 'name': to_name }) } },
-            ] })
-            if resp.get('succeeded'):
-                break
-
-    def unmanage(self, volume):
-        pass
-
-    def _add_manageable_snapshot(self, kv, manageable_snapshots, cinder_ids):
-        cfg = kv['value']
-        dog = kv['key'].find('@')
-        if dog < 0:
-            # snapshot
-            return
-        image_name = kv['key'][0 : dog]
-        snap_name = kv['key'][dog+1 : ]
-        snapshot_id = volume_utils.extract_id_from_snapshot_name(snap_name)
-        snapshot_info = {
-            'reference': {'source-name': snap_name},
-            'size': int(math.ceil(float(cfg['size']) / units.Gi)),
-            'cinder_id': None,
-            'extra_info': None,
-            'safe_to_manage': False,
-            'reason_not_safe': None,
-            'source_reference': {'source-name': image_name}
-        }
-        if snapshot_id in cinder_ids:
-            # Exclude snapshots already managed.
-            snapshot_info['reason_not_safe'] = ('already managed')
-            snapshot_info['cinder_id'] = snapshot_id
-        elif snap_name.endswith('.clone_snap'):
-            # Exclude clone snapshot.
-            snapshot_info['reason_not_safe'] = ('used for clone snap')
-        else:
-            snapshot_info['safe_to_manage'] = True
-        manageable_snapshots.append(snapshot_info)
-
-    def get_manageable_snapshots(self, cinder_snapshots, marker, limit, offset, sort_keys, sort_dirs):
-        """List manageable snapshots in Vitastor."""
-        manageable_snapshots = []
-        cinder_snapshot_ids = [resource['id'] for resource in cinder_snapshots]
-        # List all volumes
-        # FIXME: It's possible to use pagination in our case, but.. do we want it?
-        self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']),
-            lambda kv: self._add_manageable_volume(kv, manageable_snapshots, cinder_snapshot_ids))
-        return volume_utils.paginate_entries_list(
-            manageable_snapshots, marker, limit, offset, sort_keys, sort_dirs)
-
-    def manage_existing_snapshot_get_size(self, snapshot, existing_ref):
-        """Return size of an existing image for manage_existing.
-
-        :param snapshot: snapshot ref info to be set
-        :param existing_ref: {'source-name': <name of snapshot>}
-        """
-        vol_name = utils.convert_str(snapshot.volume_name)
-        snap_name = self._get_existing_name(existing_ref)
-        vol = self._get_image(vol_name+'@'+snap_name)
-        if not vol:
-            raise exception.ManageExistingInvalidReference(
-                existing_ref=snapshot_name, reason='Specified snapshot does not exist.'
-            )
-        return int(math.ceil(float(vol['cfg']['size']) / units.Gi))
-
-    def manage_existing_snapshot(self, snapshot, existing_ref):
-        """Manages an existing snapshot.
-
-        Renames the snapshot name to match the expected name for the snapshot.
-        Error checking done by manage_existing_get_size is not repeated.
-
-        :param snapshot: snapshot ref info to be set
-        :param existing_ref: {'source-name': <name of snapshot>}
-        """
-        vol_name = utils.convert_str(snapshot.volume_name)
-        snap_name = self._get_existing_name(existing_ref)
-        from_name = vol_name+'@'+snap_name
-        to_name = vol_name+'@'+utils.convert_str(snapshot.name)
-        self._rename(from_name, to_name)
-
-    def unmanage_snapshot(self, snapshot):
-        """Removes the specified snapshot from Cinder management."""
-        pass
-
-    def _dumps(self, obj):
-        return json.dumps(obj, separators=(',', ':'), sort_keys=True)
--- a/patches/devstack-local.conf
+++ b/patches/devstack-local.conf
@@ -1,23 +0,0 @@
-# Devstack configuration for bridged networking
-
-[[local|localrc]]
-ADMIN_PASSWORD=secret
-DATABASE_PASSWORD=$ADMIN_PASSWORD
-RABBIT_PASSWORD=$ADMIN_PASSWORD
-SERVICE_PASSWORD=$ADMIN_PASSWORD
-HOST_IP=10.0.2.15
-Q_USE_SECGROUP=True
-FLOATING_RANGE="10.0.2.0/24"
-IPV4_ADDRS_SAFE_TO_USE="10.0.5.0/24"
-Q_FLOATING_ALLOCATION_POOL=start=10.0.2.50,end=10.0.2.100
-PUBLIC_NETWORK_GATEWAY=10.0.2.2
-PUBLIC_INTERFACE=ens3
-Q_USE_PROVIDERNET_FOR_PUBLIC=True
-Q_AGENT=linuxbridge
-Q_ML2_PLUGIN_MECHANISM_DRIVERS=linuxbridge
-LB_PHYSICAL_INTERFACE=ens3
-PUBLIC_PHYSICAL_NETWORK=default
-LB_INTERFACE_MAPPINGS=default:ens3
-Q_SERVICE_PLUGIN_CLASSES=
-Q_ML2_PLUGIN_TYPE_DRIVERS=flat
-Q_ML2_PLUGIN_EXT_DRIVERS=
--- a/patches/nova-20.diff
+++ b/patches/nova-20.diff
@@ -1,287 +0,0 @@
-diff --git a/nova/virt/image/model.py b/nova/virt/image/model.py
-index 971f7e9c07..70ed70d5e2 100644
--- a/nova/virt/image/model.py
-+++ b/nova/virt/image/model.py
-@@ -129,3 +129,22 @@ class RBDImage(Image):
-         self.user = user
-         self.password = password
-         self.servers = servers
-+
-+
-+class VitastorImage(Image):
-+    """Class for images in a remote Vitastor cluster"""
-+
-+    def __init__(self, name, etcd_address = None, etcd_prefix = None, config_path = None):
-+        """Create a new Vitastor image object
-+
-+        :param name: name of the image
-+        :param etcd_address: etcd URL(s) (optional)
-+        :param etcd_prefix: etcd prefix (optional)
-+        :param config_path: path to the configuration (optional)
-+        """
-+        super(RBDImage, self).__init__(FORMAT_RAW)
-+
-+        self.name = name
-+        self.etcd_address = etcd_address
-+        self.etcd_prefix = etcd_prefix
-+        self.config_path = config_path
-diff --git a/nova/virt/images.py b/nova/virt/images.py
-index 5358f3766a..ebe3d6effb 100644
--- a/nova/virt/images.py
-+++ b/nova/virt/images.py
-@@ -41,7 +41,7 @@ IMAGE_API = glance.API()
- 
- def qemu_img_info(path, format=None):
-     """Return an object containing the parsed output from qemu-img info."""
-    if not os.path.exists(path) and not path.startswith('rbd:'):
-+    if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
-         raise exception.DiskNotFound(location=path)
- 
-     info = nova.privsep.qemu.unprivileged_qemu_img_info(path, format=format)
-@@ -50,7 +50,7 @@ def qemu_img_info(path, format=None):
- 
- def privileged_qemu_img_info(path, format=None, output_format='json'):
-     """Return an object containing the parsed output from qemu-img info."""
-    if not os.path.exists(path) and not path.startswith('rbd:'):
-+    if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
-         raise exception.DiskNotFound(location=path)
- 
-     info = nova.privsep.qemu.privileged_qemu_img_info(path, format=format)
-diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
-index f9475776b3..51573fe41d 100644
--- a/nova/virt/libvirt/config.py
-+++ b/nova/virt/libvirt/config.py
-@@ -1060,6 +1060,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
-         self.driver_iommu = False
-         self.source_path = None
-         self.source_protocol = None
-+        self.source_query = None
-+        self.source_config = None
-         self.source_name = None
-         self.source_hosts = []
-         self.source_ports = []
-@@ -1186,7 +1188,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
-         elif self.source_type == "mount":
-             dev.append(etree.Element("source", dir=self.source_path))
-         elif self.source_type == "network" and self.source_protocol:
-            source = etree.Element("source", protocol=self.source_protocol)
-+            source = etree.Element("source", protocol=self.source_protocol,
-+                query=self.source_query, config=self.source_config)
-             if self.source_name is not None:
-                 source.set('name', self.source_name)
-             hosts_info = zip(self.source_hosts, self.source_ports)
-diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
-index 391231c527..34dc60dcdd 100644
--- a/nova/virt/libvirt/driver.py
-+++ b/nova/virt/libvirt/driver.py
-@@ -179,6 +179,7 @@ VOLUME_DRIVERS = {
-     'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
-     'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
-     'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
-+    'vitastor': 'nova.virt.libvirt.volume.vitastor.LibvirtVitastorVolumeDriver',
-     'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
-     'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
-     'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver',  # noqa:E501
-@@ -385,10 +386,10 @@ class LibvirtDriver(driver.ComputeDriver):
-         # This prevents the risk of one test setting a capability
-         # which bleeds over into other tests.
- 
-        # LVM and RBD require raw images. If we are not configured to
-+        # LVM, RBD, Vitastor require raw images. If we are not configured to
-         # force convert images into raw format, then we _require_ raw
-         # images only.
-        raw_only = ('rbd', 'lvm')
-+        raw_only = ('rbd', 'lvm', 'vitastor')
-         requires_raw_image = (CONF.libvirt.images_type in raw_only and
-                               not CONF.force_raw_images)
-         requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
-@@ -775,12 +776,12 @@ class LibvirtDriver(driver.ComputeDriver):
-         # Some imagebackends are only able to import raw disk images,
-         # and will fail if given any other format. See the bug
-         # https://bugs.launchpad.net/nova/+bug/1816686 for more details.
-        if CONF.libvirt.images_type in ('rbd',):
-+        if CONF.libvirt.images_type in ('rbd', 'vitastor'):
-             if not CONF.force_raw_images:
-                 msg = _("'[DEFAULT]/force_raw_images = False' is not "
-                        "allowed with '[libvirt]/images_type = rbd'. "
-+                        "allowed with '[libvirt]/images_type = rbd' or 'vitastor'. "
-                         "Please check the two configs and if you really "
-                        "do want to use rbd as images_type, set "
-+                        "do want to use rbd or vitastor as images_type, set "
-                         "force_raw_images to True.")
-                 raise exception.InvalidConfiguration(msg)
- 
-@@ -2603,6 +2604,16 @@ class LibvirtDriver(driver.ComputeDriver):
-                     if connection_info['data'].get('auth_enabled'):
-                         username = connection_info['data']['auth_username']
-                         path = f"rbd:{volume_name}:id={username}"
-+                elif connection_info['driver_volume_type'] == 'vitastor':
-+                    volume_name = connection_info['data']['name']
-+                    path = 'vitastor:image='+volume_name.replace(':', '\\:')
-+                    for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
-+                        if k in connection_info['data']:
-+                            kk = k
-+                            if kk == 'etcd_address':
-+                                # FIXME use etcd_address in qemu driver
-+                                kk = 'etcd_host'
-+                            path += ":"+kk+"="+connection_info['data'][k].replace(':', '\\:')
-                 else:
-                     path = 'unknown'
-                     raise exception.DiskNotFound(location='unknown')
-@@ -2827,8 +2838,8 @@ class LibvirtDriver(driver.ComputeDriver):
- 
-         image_format = CONF.libvirt.snapshot_image_format or source_type
- 
-        # NOTE(bfilippov): save lvm and rbd as raw
-        if image_format == 'lvm' or image_format == 'rbd':
-+        # NOTE(bfilippov): save lvm and rbd and vitastor as raw
-+        if image_format == 'lvm' or image_format == 'rbd' or image_format == 'vitastor':
-             image_format = 'raw'
- 
-         metadata = self._create_snapshot_metadata(instance.image_meta,
-@@ -2899,7 +2910,7 @@ class LibvirtDriver(driver.ComputeDriver):
-                               expected_state=task_states.IMAGE_UPLOADING)
- 
-             # TODO(nic): possibly abstract this out to the root_disk
-            if source_type == 'rbd' and live_snapshot:
-+            if (source_type == 'rbd' or source_type == 'vitastor') and live_snapshot:
-                 # Standard snapshot uses qemu-img convert from RBD which is
-                 # not safe to run with live_snapshot.
-                 live_snapshot = False
-@@ -4099,7 +4110,7 @@ class LibvirtDriver(driver.ComputeDriver):
-         # cleanup rescue volume
-         lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
-                                 if lvmdisk.endswith('.rescue')])
-        if CONF.libvirt.images_type == 'rbd':
-+        if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
-             filter_fn = lambda disk: (disk.startswith(instance.uuid) and
-                                       disk.endswith('.rescue'))
-             rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
-@@ -4356,6 +4367,8 @@ class LibvirtDriver(driver.ComputeDriver):
-         # TODO(mikal): there is a bug here if images_type has
-         # changed since creation of the instance, but I am pretty
-         # sure that this bug already exists.
-+        if CONF.libvirt.images_type == 'vitastor':
-+            return 'vitastor'
-         return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
- 
-     @staticmethod
-@@ -4764,10 +4777,10 @@ class LibvirtDriver(driver.ComputeDriver):
-                 finally:
-                     # NOTE(mikal): if the config drive was imported into RBD,
-                     # then we no longer need the local copy
-                    if CONF.libvirt.images_type == 'rbd':
-+                    if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
-                         LOG.info('Deleting local config drive %(path)s '
-                                 'because it was imported into RBD.',
-                                 {'path': config_disk_local_path},
-+                                 'because it was imported into %(type).',
-+                                 {'path': config_disk_local_path, 'type': CONF.libvirt.images_type},
-                                  instance=instance)
-                         os.unlink(config_disk_local_path)
- 
-diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
-index da2a6e8b8a..52c02e72f1 100644
--- a/nova/virt/libvirt/utils.py
-+++ b/nova/virt/libvirt/utils.py
-@@ -340,6 +340,10 @@ def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
-             disk_path = disk.source_name
-             if disk_path:
-                 disk_path = 'rbd:' + disk_path
-+        elif not disk_path and disk.source_protocol == 'vitastor':
-+            disk_path = disk.source_name
-+            if disk_path:
-+                disk_path = 'vitastor:' + disk_path
- 
-     if not disk_path:
-         raise RuntimeError(_("Can't retrieve root device path "
-@@ -354,6 +358,8 @@ def get_disk_type_from_path(path: str) -> ty.Optional[str]:
-         return 'lvm'
-     elif path.startswith('rbd:'):
-         return 'rbd'
-+    elif path.startswith('vitastor:'):
-+        return 'vitastor'
-     elif (os.path.isdir(path) and
-           os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
-         return 'ploop'
-diff --git a/nova/virt/libvirt/volume/vitastor.py b/nova/virt/libvirt/volume/vitastor.py
-new file mode 100644
-index 0000000000..0256df62c1
--- /dev/null
-+++ b/nova/virt/libvirt/volume/vitastor.py
-@@ -0,0 +1,75 @@
-+# Copyright (c) 2021+, Vitaliy Filippov <vitalif@yourcmc.ru>
-+#
-+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-+#    not use this file except in compliance with the License. You may obtain
-+#    a copy of the License at
-+#
-+#         http://www.apache.org/licenses/LICENSE-2.0
-+#
-+#    Unless required by applicable law or agreed to in writing, software
-+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-+#    License for the specific language governing permissions and limitations
-+#    under the License.
-+
-+from os_brick import exception as os_brick_exception
-+from os_brick import initiator
-+from os_brick.initiator import connector
-+from oslo_log import log as logging
-+
-+import nova.conf
-+from nova import utils
-+from nova.virt.libvirt.volume import volume as libvirt_volume
-+
-+
-+CONF = nova.conf.CONF
-+LOG = logging.getLogger(__name__)
-+
-+
-+class LibvirtVitastorVolumeDriver(libvirt_volume.LibvirtBaseVolumeDriver):
-+    """Driver to attach Vitastor volumes to libvirt."""
-+    def __init__(self, host):
-+        super(LibvirtVitastorVolumeDriver, self).__init__(host, is_block_dev=False)
-+
-+    def connect_volume(self, connection_info, instance):
-+        pass
-+
-+    def disconnect_volume(self, connection_info, instance):
-+        pass
-+
-+    def get_config(self, connection_info, disk_info):
-+        """Returns xml for libvirt."""
-+        conf = super(LibvirtVitastorVolumeDriver, self).get_config(connection_info, disk_info)
-+        conf.source_type = 'network'
-+        conf.source_protocol = 'vitastor'
-+        conf.source_name = connection_info['data'].get('name')
-+        conf.source_query = connection_info['data'].get('etcd_prefix') or None
-+        conf.source_config = connection_info['data'].get('config_path') or None
-+        conf.source_hosts = []
-+        conf.source_ports = []
-+        addresses = connection_info['data'].get('etcd_address', '')
-+        if addresses:
-+            if not isinstance(addresses, list):
-+                addresses = addresses.split(',')
-+            for addr in addresses:
-+                if addr.startswith('https://'):
-+                    raise NotImplementedError('Vitastor block driver does not support SSL for etcd communication yet')
-+                if addr.startswith('http://'):
-+                    addr = addr[7:]
-+                addr = addr.rstrip('/')
-+                if addr.endswith('/v3'):
-+                    addr = addr[0:-3]
-+                p = addr.find('/')
-+                if p > 0:
-+                    raise NotImplementedError('libvirt does not support custom URL paths for Vitastor etcd yet. Use /etc/vitastor/vitastor.conf')
-+                p = addr.find(':')
-+                port = '2379'
-+                if p > 0:
-+                    port = addr[p+1:]
-+                    addr = addr[0:p]
-+                conf.source_hosts.append(addr)
-+                conf.source_ports.append(port)
-+        return conf
-+
-+    def extend_volume(self, connection_info, instance, requested_size):
-+        raise NotImplementedError
--- a/patches/qemu-3.1-vitastor.patch
+++ b/patches/qemu-3.1-vitastor.patch
@@ -24,7 +24,7 @@ Index: qemu-3.1+dfsg/qapi/block-core.json
 +# @pool:        Pool ID
 +# @size:        Desired image size in bytes
 +# @config_path: Path to Vitastor configuration
-+# @etcd_host:   etcd connection address(es)
+# @etcd_address: etcd connection address(es)
 +# @etcd_prefix: etcd key/value prefix
 +##
 +{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu-3.1+dfsg/qapi/block-core.json
 +            '*size': 'uint64',
 +            '*image': 'str',
 +            '*config_path': 'str',
-+            '*etcd_host': 'str',
+            '*etcd_address': 'str',
 +            '*etcd_prefix': 'str' } }
 +
 +##
--- a/patches/qemu-4.2-vitastor.patch
+++ b/patches/qemu-4.2-vitastor.patch
@@ -24,7 +24,7 @@ Index: qemu/qapi/block-core.json
 +# @pool:        Pool ID
 +# @size:        Desired image size in bytes
 +# @config_path: Path to Vitastor configuration
-+# @etcd_host:   etcd connection address(es)
+# @etcd_address: etcd connection address(es)
 +# @etcd_prefix: etcd key/value prefix
 +##
 +{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu/qapi/block-core.json
 +            '*size': 'uint64',
 +            '*image': 'str',
 +            '*config_path': 'str',
-+            '*etcd_host': 'str',
+            '*etcd_address': 'str',
 +            '*etcd_prefix': 'str' } }
 +
 +##
--- a/patches/qemu-5.0-vitastor.patch
+++ b/patches/qemu-5.0-vitastor.patch
@@ -24,7 +24,7 @@ Index: qemu/qapi/block-core.json
 +# @pool:        Pool ID
 +# @size:        Desired image size in bytes
 +# @config_path: Path to Vitastor configuration
-+# @etcd_host:   etcd connection address(es)
+# @etcd_address: etcd connection address(es)
 +# @etcd_prefix: etcd key/value prefix
 +##
 +{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu/qapi/block-core.json
 +            '*size': 'uint64',
 +            '*image': 'str',
 +            '*config_path': 'str',
-+            '*etcd_host': 'str',
+            '*etcd_address': 'str',
 +            '*etcd_prefix': 'str' } }
 +
 +##
--- a/patches/qemu-5.1-vitastor.patch
+++ b/patches/qemu-5.1-vitastor.patch
@@ -24,7 +24,7 @@ Index: qemu-5.1+dfsg/qapi/block-core.json
 +# @pool:        Pool ID
 +# @size:        Desired image size in bytes
 +# @config_path: Path to Vitastor configuration
-+# @etcd_host:   etcd connection address(es)
+# @etcd_address: etcd connection address(es)
 +# @etcd_prefix: etcd key/value prefix
 +##
 +{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu-5.1+dfsg/qapi/block-core.json
 +            '*size': 'uint64',
 +            '*image': 'str',
 +            '*config_path': 'str',
-+            '*etcd_host': 'str',
+            '*etcd_address': 'str',
 +            '*etcd_prefix': 'str' } }
 +
 +##
--- a/rpm/build-tarball.sh
+++ b/rpm/build-tarball.sh
@@ -48,4 +48,4 @@ FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Ve
 QEMU=`rpm -qi qemu qemu-kvm | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
 perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
 perl -i -pe 's/(Requires:\s*qemu(?:-kvm)?)([^\n]+)?/$1 = '$QEMU'/' $VITASTOR/rpm/vitastor-el$EL.spec
-tar --transform 's#^#vitastor-0.6.6/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.6$(rpm --eval '%dist').tar.gz *
+tar --transform 's#^#vitastor-0.6.4/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.4$(rpm --eval '%dist').tar.gz *
--- a/rpm/qemu-el8.Dockerfile
+++ b/rpm/qemu-el8.Dockerfile
@@ -11,7 +11,7 @@ RUN rm -rf /var/lib/dnf/*; dnf download --disablerepo='*' --enablerepo='centos-a
 RUN rpm --nomd5 -i qemu*.src.rpm
 RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=PowerTools --spec qemu-kvm.spec

-ADD patches/qemu-*-vitastor.patch /root/vitastor/patches/
+ADD qemu-*-vitastor.patch /root/vitastor/

 RUN set -e; \
    mkdir -p /root/packages/qemu-el8; \
@@ -25,7 +25,7 @@ RUN set -e; \
    echo "Patch$((PN+1)): qemu-4.2-vitastor.patch" >> qemu-kvm.spec; \
    tail -n +2 xx01 >> qemu-kvm.spec; \
    perl -i -pe 's/(^Release:\s*\d+)/$1.vitastor/' qemu-kvm.spec; \
-    cp /root/vitastor/patches/qemu-4.2-vitastor.patch ~/rpmbuild/SOURCES; \
+    cp /root/vitastor/qemu-4.2-vitastor.patch ~/rpmbuild/SOURCES; \
    rpmbuild --nocheck -ba qemu-kvm.spec; \
    cp ~/rpmbuild/RPMS/*/*qemu* /root/packages/qemu-el8/; \
    cp ~/rpmbuild/SRPMS/*qemu* /root/packages/qemu-el8/
--- a/rpm/vitastor-el7.Dockerfile
+++ b/rpm/vitastor-el7.Dockerfile
@@ -15,8 +15,8 @@ RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
 RUN rpm --nomd5 -i qemu*.src.rpm
 RUN rpm --nomd5 -i fio*.src.rpm
 RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
-RUN cd ~/rpmbuild/SPECS && yum-builddep -y qemu-kvm.spec
-RUN cd ~/rpmbuild/SPECS && yum-builddep -y fio.spec
+RUN cd ~/rpmbuild/SPECS && yum-builddep -y --enablerepo='*' --disablerepo=centos-sclo-rh --disablerepo=centos-sclo-rh-source --disablerepo=centos-sclo-sclo-testing qemu-kvm.spec
+RUN cd ~/rpmbuild/SPECS && yum-builddep -y --enablerepo='*' --disablerepo=centos-sclo-rh --disablerepo=centos-sclo-rh-source --disablerepo=centos-sclo-sclo-testing fio.spec
 RUN yum -y install rdma-core-devel

 ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
@@ -38,7 +38,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-0.6.6.el7.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-0.6.4.el7.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el7.spec
+++ b/rpm/vitastor-el7.spec
@@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        0.6.6
+Version:        0.6.4
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-0.6.6.el7.tar.gz
+Source0:        vitastor-0.6.4.el7.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
@@ -57,7 +57,6 @@ cp -r mon %buildroot/usr/lib/vitastor/mon
 %_bindir/vitastor-dump-journal
 %_bindir/vitastor-nbd
 %_bindir/vitastor-osd
-%_bindir/vitastor-cli
 %_bindir/vitastor-rm
 %_libdir/qemu-kvm/block-vitastor.so
 %_libdir/libfio_vitastor.so
--- a/rpm/vitastor-el8.Dockerfile
+++ b/rpm/vitastor-el8.Dockerfile
@@ -15,7 +15,7 @@ RUN rpm --nomd5 -i qemu*.src.rpm
 RUN rpm --nomd5 -i fio*.src.rpm
 RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec qemu-kvm.spec
 RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec && dnf install -y cmake
-RUN yum -y install libibverbs-devel libarchive
+RUN yum -y install libibverbs-devel

 ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root

@@ -36,7 +36,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-0.6.6.el8.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-0.6.4.el8.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el8.spec
+++ b/rpm/vitastor-el8.spec
@@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        0.6.6
+Version:        0.6.4
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-0.6.6.el8.tar.gz
+Source0:        vitastor-0.6.4.el8.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
@@ -54,7 +54,6 @@ cp -r mon %buildroot/usr/lib/vitastor
 %_bindir/vitastor-dump-journal
 %_bindir/vitastor-nbd
 %_bindir/vitastor-osd
-%_bindir/vitastor-cli
 %_bindir/vitastor-rm
 %_libdir/qemu-kvm/block-vitastor.so
 %_libdir/libfio_vitastor.so
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -15,8 +15,8 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
 	set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
 endif()

-add_definitions(-DVERSION="0.6.6")
-add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
+add_definitions(-DVERSION="0.6.4")
+add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -I ${CMAKE_SOURCE_DIR}/src)
 if (${WITH_ASAN})
 	add_definitions(-fsanitize=address -fno-omit-frame-pointer)
 	add_link_options(-fsanitize=address -fno-omit-frame-pointer)
@@ -36,11 +36,6 @@ string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_F
 string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL}")
 string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")

-macro(install_symlink filepath sympath)
-	install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${filepath} ${sympath})")
-	install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")")
-endmacro(install_symlink)
-
 find_package(PkgConfig)
 pkg_check_modules(LIBURING REQUIRED liburing)
 if (${WITH_QEMU})
@@ -121,7 +116,6 @@ endif (${WITH_FIO})
 # libvitastor_client.so
 add_library(vitastor_client SHARED
 	cluster_client.cpp
-	cluster_client_list.cpp
 	vitastor_c.cpp
 )
 set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
@@ -151,11 +145,11 @@ target_link_libraries(vitastor-nbd
 	vitastor_client
 )

-# vitastor-cli
-add_executable(vitastor-cli
-	cli.cpp cli_flatten.cpp cli_merge.cpp cli_rm.cpp cli_snap_rm.cpp
+# vitastor-rm
+add_executable(vitastor-rm
+	rm_inode.cpp
 )
-target_link_libraries(vitastor-cli
+target_link_libraries(vitastor-rm
 	vitastor_client
 )

@@ -226,7 +220,7 @@ target_link_libraries(test_cas
 # test_cluster_client
 add_executable(test_cluster_client
 	test_cluster_client.cpp
-	pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
+	pg_states.cpp osd_ops.cpp cluster_client.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
 	etcd_state_client.cpp timerfd_manager.cpp ../json11/json11.cpp
 )
 target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
@@ -240,8 +234,7 @@ target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mo

 ### Install

-install(TARGETS vitastor-osd vitastor-dump-journal vitastor-nbd vitastor-cli RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-install_symlink(${CMAKE_INSTALL_BINDIR}/vitastor-rm vitastor-cli)
+install(TARGETS vitastor-osd vitastor-dump-journal vitastor-nbd vitastor-rm RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 install(
 	TARGETS vitastor_blk vitastor_client
 	LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
--- a/src/blockstore.cpp
+++ b/src/blockstore.cpp
@@ -48,11 +48,6 @@ std::map<uint64_t, uint64_t> & blockstore_t::get_inode_space_stats()
    return impl->inode_space_stats;
 }

-void blockstore_t::dump_diagnostics()
-{
-    return impl->dump_diagnostics();
-}
-
 uint32_t blockstore_t::get_block_size()
 {
    return impl->get_block_size();
--- a/src/blockstore.h
+++ b/src/blockstore.h
@@ -186,9 +186,6 @@ public:
    // Get per-inode space usage statistics
    std::map<uint64_t, uint64_t> & get_inode_space_stats();

-    // Print diagnostics to stdout
-    void dump_diagnostics();
-
    // FIXME rename to object_size
    uint32_t get_block_size();
    uint64_t get_block_count();
--- a/src/blockstore_flush.cpp
+++ b/src/blockstore_flush.cpp
@@ -182,75 +182,6 @@ void journal_flusher_t::release_trim()
    trim_wanted--;
 }

-void journal_flusher_t::dump_diagnostics()
-{
-    const char *unflushable_type = "";
-    obj_ver_id unflushable = { 0 };
-    // Try to find out if there is a flushable object for information
-    for (object_id cur_oid: flush_queue)
-    {
-        obj_ver_id cur = { .oid = cur_oid, .version = flush_versions[cur_oid] };
-        auto dirty_end = bs->dirty_db.find(cur);
-        if (dirty_end == bs->dirty_db.end())
-        {
-            // Already flushed
-            continue;
-        }
-        auto repeat_it = sync_to_repeat.find(cur.oid);
-        if (repeat_it != sync_to_repeat.end())
-        {
-            // Someone is already flushing it
-            unflushable_type = "locked,";
-            unflushable = cur;
-            break;
-        }
-        if (dirty_end->second.journal_sector >= bs->journal.dirty_start &&
-            (bs->journal.dirty_start >= bs->journal.used_start ||
-            dirty_end->second.journal_sector < bs->journal.used_start))
-        {
-            // Object is more recent than possible to flush
-            bool found = try_find_older(dirty_end, cur);
-            if (!found)
-            {
-                unflushable_type = "dirty,";
-                unflushable = cur;
-                break;
-            }
-        }
-        unflushable_type = "ok,";
-        unflushable = cur;
-        break;
-    }
-    printf(
-        "Flusher: queued=%ld first=%s%lx:%lx trim_wanted=%d dequeuing=%d trimming=%d cur=%d target=%d active=%d syncing=%d\n",
-        flush_queue.size(), unflushable_type, unflushable.oid.inode, unflushable.oid.stripe,
-        trim_wanted, dequeuing, trimming, cur_flusher_count, target_flusher_count,
-        active_flushers, syncing_flushers
-    );
-}
-
-bool journal_flusher_t::try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur)
-{
-    bool found = false;
-    while (dirty_end != bs->dirty_db.begin())
-    {
-        dirty_end--;
-        if (dirty_end->first.oid != cur.oid)
-        {
-            break;
-        }
-        if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
-            (bs->journal.dirty_start >= bs->journal.used_start ||
-            dirty_end->second.journal_sector < bs->journal.used_start)))
-        {
-            found = true;
-            cur.version = dirty_end->first.version;
-            break;
-        }
-    }
-    return found;
-}
-
 #define await_sqe(label) \
    resume_##label:\
        sqe = bs->get_sqe();\
@@ -355,15 +286,30 @@ stop_flusher:
            // And it may even block writes if we don't flush the older version
            // (if it's in the beginning of the journal)...
            // So first try to find an older version of the same object to flush.
-            bool found = flusher->try_find_older(dirty_end, cur);
+            bool found = false;
+            while (dirty_end != bs->dirty_db.begin())
+            {
+                dirty_end--;
+                if (dirty_end->first.oid != cur.oid)
+                {
+                    break;
+                }
+                if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
+                    (bs->journal.dirty_start >= bs->journal.used_start ||
+                    dirty_end->second.journal_sector < bs->journal.used_start)))
+                {
+                    found = true;
+                    cur.version = dirty_end->first.version;
+                    break;
+                }
+            }
            if (!found)
            {
                // Try other objects
                flusher->sync_to_repeat.erase(cur.oid);
                int search_left = flusher->flush_queue.size() - 1;
 #ifdef BLOCKSTORE_DEBUG
-                printf("Flusher overran writers (%lx:%lx v%lu, dirty_start=%08lx) - searching for older flushes (%d left)\n",
-                    cur.oid.inode, cur.oid.stripe, cur.version, bs->journal.dirty_start, search_left);
+                printf("Flusher overran writers (dirty_start=%08lx) - searching for older flushes (%d left)\n", bs->journal.dirty_start, search_left);
 #endif
                while (search_left > 0)
                {
@@ -386,12 +332,7 @@ stop_flusher:
                        else
                        {
                            repeat_it = flusher->sync_to_repeat.find(cur.oid);
-                            if (repeat_it != flusher->sync_to_repeat.end())
-                            {
-                                if (repeat_it->second < cur.version)
-                                    repeat_it->second = cur.version;
-                            }
-                            else
+                            if (repeat_it == flusher->sync_to_repeat.end())
                            {
                                flusher->sync_to_repeat[cur.oid] = 0;
                                break;
--- a/src/blockstore_flush.h
+++ b/src/blockstore_flush.h
@@ -97,9 +97,6 @@ class journal_flusher_t
    std::map<uint64_t, meta_sector_t> meta_sectors;
    std::deque<object_id> flush_queue;
    std::map<object_id, uint64_t> flush_versions;
-
-    bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
-
 public:
    journal_flusher_t(blockstore_impl_t *bs);
    ~journal_flusher_t();
@@ -111,5 +108,4 @@ public:
    void enqueue_flush(obj_ver_id oid);
    void unshift_flush(obj_ver_id oid, bool force);
    void remove_flush(object_id oid);
-    void dump_diagnostics();
 };
--- a/src/blockstore_impl.cpp
+++ b/src/blockstore_impl.cpp
@@ -595,9 +595,3 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
    op->buf = stable;
    FINISH_OP(op);
 }
-
-void blockstore_impl_t::dump_diagnostics()
-{
-    journal.dump_diagnostics();
-    flusher->dump_diagnostics();
-}
--- a/src/blockstore_impl.h
+++ b/src/blockstore_impl.h
@@ -361,9 +361,6 @@ public:
    // Space usage statistics
    std::map<uint64_t, uint64_t> inode_space_stats;

-    // Print diagnostics to stdout
-    void dump_diagnostics();
-
    inline uint32_t get_block_size() { return block_size; }
    inline uint64_t get_block_count() { return block_count; }
    inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
--- a/src/blockstore_journal.cpp
+++ b/src/blockstore_journal.cpp
@@ -218,19 +218,3 @@ uint64_t journal_t::get_trim_pos()
    // Can't trim journal
    return used_start;
 }
-
-void journal_t::dump_diagnostics()
-{
-    auto journal_used_it = used_sectors.lower_bound(used_start);
-    if (journal_used_it == used_sectors.end())
-    {
-        // Journal is cleared to its end, restart from the beginning
-        journal_used_it = used_sectors.begin();
-    }
-    printf(
-        "Journal: used_start=%08lx next_free=%08lx dirty_start=%08lx trim_to=%08lx trim_to_refs=%ld\n",
-        used_start, next_free, dirty_start,
-        journal_used_it == used_sectors.end() ? 0 : journal_used_it->first,
-        journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
-    );
-}
--- a/src/blockstore_journal.h
+++ b/src/blockstore_journal.h
@@ -180,7 +180,6 @@ struct journal_t
    ~journal_t();
    bool trim();
    uint64_t get_trim_pos();
-    void dump_diagnostics();
    inline bool entry_fits(int size)
    {
        return !(block_size - in_sector_pos < size ||
--- a/src/blockstore_write.cpp
+++ b/src/blockstore_write.cpp
@@ -478,15 +478,15 @@ resume_2:
    }
 resume_4:
    // Switch object state
+#ifdef BLOCKSTORE_DEBUG
+    printf("Ack write %lx:%lx v%lu = state 0x%x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state);
+#endif
    {
        auto dirty_it = dirty_db.find((obj_ver_id){
            .oid = op->oid,
            .version = op->version,
        });
        assert(dirty_it != dirty_db.end());
-#ifdef BLOCKSTORE_DEBUG
-        printf("Ack write %lx:%lx v%lu = state 0x%x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state);
-#endif
        bool is_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE;
        bool imm = is_big ? (immediate_commit == IMMEDIATE_ALL) : (immediate_commit != IMMEDIATE_NONE);
        if (imm)
--- a/src/cli.cpp
+++ b/src/cli.cpp
@@ -1,251 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-/**
- * CLI tool
- * Currently can (a) remove inodes and (b) merge snapshot/clone layers
- */
-
-#include <vector>
-#include <algorithm>
-
-#include "cli.h"
-#include "epoll_manager.h"
-#include "cluster_client.h"
-#include "pg_states.h"
-#include "base64.h"
-
-static const char *exe_name = NULL;
-
-json11::Json::object cli_tool_t::parse_args(int narg, const char *args[])
-{
-    json11::Json::object cfg;
-    json11::Json::array cmd;
-    cfg["progress"] = "1";
-    for (int i = 1; i < narg; i++)
-    {
-        if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
-        {
-            help();
-        }
-        else if (args[i][0] == '-' && args[i][1] == '-')
-        {
-            const char *opt = args[i]+2;
-            cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "wait-list") || i == narg-1 ? "1" : args[++i];
-        }
-        else
-        {
-            cmd.push_back(std::string(args[i]));
-        }
-    }
-    if (!cmd.size())
-    {
-        std::string exe(exe_name);
-        if (exe.substr(exe.size()-11) == "vitastor-rm")
-        {
-            cmd.push_back("rm-data");
-        }
-    }
-    cfg["command"] = cmd;
-    return cfg;
-}
-
-void cli_tool_t::help()
-{
-    printf(
-        "Vitastor command-line tool\n"
-        "(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n\n"
-        "USAGE:\n"
-        "%s rm-data [OPTIONS] --pool <pool> --inode <inode> [--wait-list]\n"
-        "  Remove inode data without changing metadata.\n"
-        "  --wait-list means first retrieve objects listings and then remove it.\n"
-        "  --wait-list requires more memory, but allows to show correct stats.\n"
-        "\n"
-        "%s merge-data [OPTIONS] <from> <to> [--target <target>]\n"
-        "  Merge layer data without changing metadata. Merge <from>..<to> to <target>.\n"
-        "  <to> must be a child of <from> and <target> may be one of the layers between\n"
-        "  <from> and <to>, including <from> and <to>.\n"
-        "\n"
-        "%s flatten [OPTIONS] <layer>\n"
-        "  Flatten a layer, i.e. merge data and detach it from parents\n"
-        "\n"
-        "%s rm [OPTIONS] <from> [<to>] [--writers-stopped 1]\n"
-        "  Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n"
-        "  rebasing all their children accordingly. One of deleted parents may be renamed to one\n"
-        "  of children \"to be rebased\", but only if that child itself is readonly or if\n"
-        "  --writers-stopped 1 is specified\n"
-        "\n"
-        "OPTIONS (global):\n"
-        "  --etcd_address <etcd_address>\n"
-        "  --iodepth N         Send N operations in parallel to each OSD when possible (default 32)\n"
-        "  --parallel_osds M   Work with M osds in parallel when possible (default 4)\n"
-        "  --progress 1|0      Report progress (default 1)\n"
-        "  --cas 1|0           Use online CAS writes when possible (default auto)\n"
-        ,
-        exe_name, exe_name, exe_name, exe_name
-    );
-    exit(0);
-}
-
-void cli_tool_t::change_parent(inode_t cur, inode_t new_parent)
-{
-    auto cur_cfg_it = cli->st_cli.inode_config.find(cur);
-    if (cur_cfg_it == cli->st_cli.inode_config.end())
-    {
-        fprintf(stderr, "Inode 0x%lx disappeared\n", cur);
-        exit(1);
-    }
-    inode_config_t new_cfg = cur_cfg_it->second;
-    std::string cur_name = new_cfg.name;
-    std::string cur_cfg_key = base64_encode(cli->st_cli.etcd_prefix+
-        "/config/inode/"+std::to_string(INODE_POOL(cur))+
-        "/"+std::to_string(INODE_NO_POOL(cur)));
-    new_cfg.parent_id = new_parent;
-    json11::Json::object cur_cfg_json = cli->st_cli.serialize_inode_cfg(&new_cfg);
-    waiting++;
-    cli->st_cli.etcd_txn(json11::Json::object {
-        { "compare", json11::Json::array {
-            json11::Json::object {
-                { "target", "MOD" },
-                { "key", cur_cfg_key },
-                { "result", "LESS" },
-                { "mod_revision", new_cfg.mod_revision+1 },
-            },
-        } },
-        { "success", json11::Json::array {
-            json11::Json::object {
-                { "request_put", json11::Json::object {
-                    { "key", cur_cfg_key },
-                    { "value", base64_encode(json11::Json(cur_cfg_json).dump()) },
-                } }
-            },
-        } },
-    }, ETCD_SLOW_TIMEOUT, [this, new_parent, cur, cur_name](std::string err, json11::Json res)
-    {
-        if (err != "")
-        {
-            fprintf(stderr, "Error changing parent of %s: %s\n", cur_name.c_str(), err.c_str());
-            exit(1);
-        }
-        if (!res["succeeded"].bool_value())
-        {
-            fprintf(stderr, "Inode %s was modified during snapshot deletion\n", cur_name.c_str());
-            exit(1);
-        }
-        if (new_parent)
-        {
-            auto new_parent_it = cli->st_cli.inode_config.find(new_parent);
-            std::string new_parent_name = new_parent_it != cli->st_cli.inode_config.end()
-                ? new_parent_it->second.name : "<unknown>";
-            printf(
-                "Parent of layer %s (inode %lu in pool %u) changed to %s (inode %lu in pool %u)\n",
-                cur_name.c_str(), INODE_NO_POOL(cur), INODE_POOL(cur),
-                new_parent_name.c_str(), INODE_NO_POOL(new_parent), INODE_POOL(new_parent)
-            );
-        }
-        else
-        {
-            printf(
-                "Parent of layer %s (inode %lu in pool %u) detached\n",
-                cur_name.c_str(), INODE_NO_POOL(cur), INODE_POOL(cur)
-            );
-        }
-        waiting--;
-        ringloop->wakeup();
-    });
-}
-
-inode_config_t* cli_tool_t::get_inode_cfg(const std::string & name)
-{
-    for (auto & ic: cli->st_cli.inode_config)
-    {
-        if (ic.second.name == name)
-        {
-            return &ic.second;
-        }
-    }
-    fprintf(stderr, "Layer %s not found\n", name.c_str());
-    exit(1);
-}
-
-void cli_tool_t::run(json11::Json cfg)
-{
-    json11::Json::array cmd = cfg["command"].array_items();
-    if (!cmd.size())
-    {
-        fprintf(stderr, "command is missing\n");
-        exit(1);
-    }
-    else if (cmd[0] == "rm-data")
-    {
-        // Delete inode data
-        action_cb = start_rm(cfg);
-    }
-    else if (cmd[0] == "merge-data")
-    {
-        // Merge layer data without affecting metadata
-        action_cb = start_merge(cfg);
-    }
-    else if (cmd[0] == "flatten")
-    {
-        // Merge layer data without affecting metadata
-        action_cb = start_flatten(cfg);
-    }
-    else if (cmd[0] == "rm")
-    {
-        // Remove multiple snapshots and rebase their children
-        action_cb = start_snap_rm(cfg);
-    }
-    else
-    {
-        fprintf(stderr, "unknown command: %s\n", cmd[0].string_value().c_str());
-        exit(1);
-    }
-    iodepth = cfg["iodepth"].uint64_value();
-    if (!iodepth)
-        iodepth = 32;
-    parallel_osds = cfg["parallel_osds"].uint64_value();
-    if (!parallel_osds)
-        parallel_osds = 4;
-    log_level = cfg["log_level"].int64_value();
-    progress = cfg["progress"].uint64_value() ? true : false;
-    list_first = cfg["wait-list"].uint64_value() ? true : false;
-    // Create client
-    ringloop = new ring_loop_t(512);
-    epmgr = new epoll_manager_t(ringloop);
-    cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
-    cli->on_ready([this]()
-    {
-        // Initialize job
-        consumer.loop = [this]()
-        {
-            if (action_cb != NULL)
-            {
-                bool done = action_cb();
-                if (done)
-                {
-                    action_cb = NULL;
-                }
-            }
-            ringloop->submit();
-        };
-        ringloop->register_consumer(&consumer);
-        consumer.loop();
-    });
-    // Loop until it completes
-    while (action_cb != NULL)
-    {
-        ringloop->loop();
-        ringloop->wait();
-    }
-}
-
-int main(int narg, const char *args[])
-{
-    setvbuf(stdout, NULL, _IONBF, 0);
-    setvbuf(stderr, NULL, _IONBF, 0);
-    exe_name = args[0];
-    cli_tool_t *p = new cli_tool_t();
-    p->run(cli_tool_t::parse_args(narg, args));
-    return 0;
-}
--- a/src/cli.h
+++ b/src/cli.h
@@ -1,56 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-// Common CLI tool header
-
-#pragma once
-
-#include "json11/json11.hpp"
-#include "object_id.h"
-#include "ringloop.h"
-#include <functional>
-
-struct rm_inode_t;
-struct snap_merger_t;
-struct snap_flattener_t;
-struct snap_remover_t;
-
-class epoll_manager_t;
-class cluster_client_t;
-struct inode_config_t;
-
-class cli_tool_t
-{
-public:
-    uint64_t iodepth = 0, parallel_osds = 0;
-    bool progress = true;
-    bool list_first = false;
-    int log_level = 0;
-    int mode = 0;
-
-    ring_loop_t *ringloop = NULL;
-    epoll_manager_t *epmgr = NULL;
-    cluster_client_t *cli = NULL;
-
-    int waiting = 0;
-    ring_consumer_t consumer;
-    std::function<bool(void)> action_cb;
-
-    void run(json11::Json cfg);
-
-    void change_parent(inode_t cur, inode_t new_parent);
-    inode_config_t* get_inode_cfg(const std::string & name);
-
-    static json11::Json::object parse_args(int narg, const char *args[]);
-    static void help();
-
-    friend struct rm_inode_t;
-    friend struct snap_merger_t;
-    friend struct snap_flattener_t;
-    friend struct snap_remover_t;
-
-    std::function<bool(void)> start_rm(json11::Json);
-    std::function<bool(void)> start_merge(json11::Json);
-    std::function<bool(void)> start_flatten(json11::Json);
-    std::function<bool(void)> start_snap_rm(json11::Json);
-};
--- a/src/cli_flatten.cpp
+++ b/src/cli_flatten.cpp
@@ -1,124 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-#include "cli.h"
-#include "cluster_client.h"
-
-// Flatten a layer: merge all parents into a layer and break the connection completely
-struct snap_flattener_t
-{
-    cli_tool_t *parent;
-
-    // target to flatten
-    std::string target_name;
-    // writers are stopped, we can safely change writable layers
-    bool writers_stopped = false;
-    // use CAS writes (0 = never, 1 = auto, 2 = always)
-    int use_cas = 1;
-    // interval between fsyncs
-    int fsync_interval = 128;
-
-    std::string top_parent_name;
-    inode_t target_id = 0;
-    int state = 0;
-    std::function<bool(void)> merger_cb;
-
-    void get_merge_parents()
-    {
-        // Get all parents of target
-        inode_config_t *target_cfg = parent->get_inode_cfg(target_name);
-        target_id = target_cfg->num;
-        std::vector<inode_t> chain_list;
-        inode_config_t *cur = target_cfg;
-        chain_list.push_back(cur->num);
-        while (cur->parent_id != 0 && cur->parent_id != target_cfg->num)
-        {
-            auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
-            if (it == parent->cli->st_cli.inode_config.end())
-            {
-                fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
-                exit(1);
-            }
-            cur = &it->second;
-            chain_list.push_back(cur->num);
-        }
-        if (cur->parent_id != 0)
-        {
-            fprintf(stderr, "Layer %s has a loop in parents\n", target_name.c_str());
-            exit(1);
-        }
-        top_parent_name = cur->name;
-    }
-
-    bool is_done()
-    {
-        return state == 5;
-    }
-
-    void loop()
-    {
-        if (state == 1)
-            goto resume_1;
-        else if (state == 2)
-            goto resume_2;
-        else if (state == 3)
-            goto resume_3;
-        // Get parent layers
-        get_merge_parents();
-        // Start merger
-        merger_cb = parent->start_merge(json11::Json::object {
-            { "command", json11::Json::array{ "merge-data", top_parent_name, target_name } },
-            { "target", target_name },
-            { "delete-source", false },
-            { "cas", use_cas },
-            { "fsync-interval", fsync_interval },
-        });
-        // Wait for it
-resume_1:
-        while (!merger_cb())
-        {
-            state = 1;
-            return;
-        }
-        merger_cb = NULL;
-        // Change parent
-        parent->change_parent(target_id, 0);
-        // Wait for it to complete
-        state = 2;
-resume_2:
-        if (parent->waiting > 0)
-            return;
-        state = 3;
-resume_3:
-        // Done
-        return;
-    }
-};
-
-std::function<bool(void)> cli_tool_t::start_flatten(json11::Json cfg)
-{
-    json11::Json::array cmd = cfg["command"].array_items();
-    auto flattener = new snap_flattener_t();
-    flattener->parent = this;
-    flattener->target_name = cmd.size() > 1 ? cmd[1].string_value() : "";
-    if (flattener->target_name == "")
-    {
-        fprintf(stderr, "Layer to flatten argument is missing\n");
-        exit(1);
-    }
-    flattener->fsync_interval = cfg["fsync-interval"].uint64_value();
-    if (!flattener->fsync_interval)
-        flattener->fsync_interval = 128;
-    if (!cfg["cas"].is_null())
-        flattener->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
-    return [flattener]()
-    {
-        flattener->loop();
-        if (flattener->is_done())
-        {
-            delete flattener;
-            return true;
-        }
-        return false;
-    };
-}
--- a/src/cli_merge.cpp
+++ b/src/cli_merge.cpp
@@ -1,583 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-#include "cli.h"
-#include "cluster_client.h"
-#include "cpp-btree/safe_btree_set.h"
-
-struct snap_rw_op_t
-{
-    uint64_t offset = 0;
-    void *buf = NULL;
-    cluster_op_t op;
-    int todo = 0;
-    uint32_t start = 0, end = 0;
-};
-
-// Layer merge is the base for multiple operations:
-// 1) Delete snapshot "up" = merge child layer into the parent layer, remove the child
-//    and rename the parent to the child
-// 2) Delete snapshot "down" = merge parent layer into the child layer and remove the parent
-// 3) Flatten image = merge parent layers into the child layer and break the connection
-struct snap_merger_t
-{
-    cli_tool_t *parent;
-
-    // -- CONFIGURATION --
-    // merge from..to into target (target may be one of from..to)
-    std::string from_name, to_name, target_name;
-    // inode=>rank (bigger rank means child layers)
-    std::map<inode_t,int> sources;
-    // delete merged source inode data during merge
-    bool delete_source = false;
-    // use CAS writes (0 = never, 1 = auto, 2 = always)
-    int use_cas = 1;
-    // don't necessarily delete source data, but perform checks as if we were to do it
-    bool check_delete_source = false;
-    // interval between fsyncs
-    int fsync_interval = 128;
-
-    // -- STATE --
-    inode_t target;
-    int target_rank;
-    bool inside_continue = false;
-    int state = 0;
-    int lists_todo = 0;
-    uint64_t target_block_size = 0;
-    btree::safe_btree_set<uint64_t> merge_offsets;
-    btree::safe_btree_set<uint64_t>::iterator oit;
-    std::map<inode_t, std::vector<uint64_t>> layer_lists;
-    std::map<inode_t, uint64_t> layer_block_size;
-    std::map<inode_t, uint64_t> layer_list_pos;
-    int in_flight = 0;
-    uint64_t last_fsync_offset = 0;
-    uint64_t last_written_offset = 0;
-    int deleted_unsynced = 0;
-    uint64_t processed = 0, to_process = 0;
-
-    void start_merge()
-    {
-        check_delete_source = delete_source || check_delete_source;
-        inode_config_t *from_cfg = parent->get_inode_cfg(from_name);
-        inode_config_t *to_cfg = parent->get_inode_cfg(to_name);
-        inode_config_t *target_cfg = target_name == "" ? from_cfg : parent->get_inode_cfg(target_name);
-        if (to_cfg->num == from_cfg->num)
-        {
-            fprintf(stderr, "Only one layer specified, nothing to merge\n");
-            exit(1);
-        }
-        // Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
-        std::vector<inode_t> chain_list;
-        inode_config_t *cur = to_cfg;
-        chain_list.push_back(cur->num);
-        layer_block_size[cur->num] = get_block_size(cur->num);
-        while (cur->parent_id != from_cfg->num &&
-            cur->parent_id != to_cfg->num &&
-            cur->parent_id != 0)
-        {
-            auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
-            if (it == parent->cli->st_cli.inode_config.end())
-            {
-                fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
-                exit(1);
-            }
-            cur = &it->second;
-            chain_list.push_back(cur->num);
-            layer_block_size[cur->num] = get_block_size(cur->num);
-        }
-        if (cur->parent_id != from_cfg->num)
-        {
-            fprintf(stderr, "Layer %s is not a child of %s\n", to_name.c_str(), from_name.c_str());
-            exit(1);
-        }
-        chain_list.push_back(from_cfg->num);
-        layer_block_size[from_cfg->num] = get_block_size(from_cfg->num);
-        int i = chain_list.size()-1;
-        for (inode_t item: chain_list)
-        {
-            sources[item] = i--;
-        }
-        if (sources.find(target_cfg->num) == sources.end())
-        {
-            fprintf(stderr, "Layer %s is not between %s and %s\n", target_name.c_str(), to_name.c_str(), from_name.c_str());
-            exit(1);
-        }
-        target = target_cfg->num;
-        target_rank = sources.at(target);
-        int to_rank = sources.at(to_cfg->num);
-        bool to_has_children = false;
-        // Check that there are no other inodes dependent on altered layers
-        //
-        // 1) everything between <target> and <to> except <to> is not allowed
-        //    to have children other than <to> if <to> is a child of <target>:
-        //
-        //    <target> - <layer 3> - <to>
-        //            \- <layer 4> <--------X--------- NOT ALLOWED
-        //
-        // 2) everything between <from> and <target>, except <target>, is not allowed
-        //    to have children other than <target> if sources are to be deleted after merging:
-        //
-        //    <from> - <layer 1> - <target> - <to>
-        //          \- <layer 2> <---------X-------- NOT ALLOWED
-        for (auto & ic: parent->cli->st_cli.inode_config)
-        {
-            auto it = sources.find(ic.second.num);
-            if (it == sources.end() && ic.second.parent_id != 0)
-            {
-                it = sources.find(ic.second.parent_id);
-                if (it != sources.end())
-                {
-                    int parent_rank = it->second;
-                    if (parent_rank < to_rank && (parent_rank >= target_rank || check_delete_source))
-                    {
-                        fprintf(
-                            stderr, "Layers at or above %s, but below %s are not allowed"
-                                " to have other children, but %s is a child of %s\n",
-                            (check_delete_source ? from_name.c_str() : target_name.c_str()),
-                            to_name.c_str(), ic.second.name.c_str(),
-                            parent->cli->st_cli.inode_config.at(ic.second.parent_id).name.c_str()
-                        );
-                        exit(1);
-                    }
-                    if (parent_rank >= to_rank)
-                    {
-                        to_has_children = true;
-                    }
-                }
-            }
-        }
-        if ((target_rank < to_rank || to_has_children) && use_cas == 1)
-        {
-            // <to> has children itself, no need for CAS
-            use_cas = 0;
-        }
-        sources.erase(target);
-        printf(
-            "Merging %ld layer(s) into target %s%s (inode %lu in pool %u)\n",
-            sources.size(), target_cfg->name.c_str(),
-            use_cas ? " online (with CAS)" : "", INODE_NO_POOL(target), INODE_POOL(target)
-        );
-        target_block_size = get_block_size(target);
-    }
-
-    uint64_t get_block_size(inode_t inode)
-    {
-        auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(inode));
-        uint64_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
-        return parent->cli->get_bs_block_size() * pg_data_size;
-    }
-
-    void continue_merge_reent()
-    {
-        if (!inside_continue)
-        {
-            inside_continue = true;
-            continue_merge();
-            inside_continue = false;
-        }
-    }
-
-    bool is_done()
-    {
-        return state == 6;
-    }
-
-    void continue_merge()
-    {
-        if (state == 1)
-            goto resume_1;
-        else if (state == 2)
-            goto resume_2;
-        else if (state == 3)
-            goto resume_3;
-        else if (state == 4)
-            goto resume_4;
-        else if (state == 5)
-            goto resume_5;
-        else if (state == 6)
-            goto resume_6;
-        // Get parents and so on
-        start_merge();
-        // First list lower layers
-        list_layers(true);
-        state = 1;
-    resume_1:
-        while (lists_todo > 0)
-        {
-            // Wait for lists
-            return;
-        }
-        if (merge_offsets.size() > 0)
-        {
-            state = 2;
-            oit = merge_offsets.begin();
-            processed = 0;
-            to_process = merge_offsets.size();
-    resume_2:
-            // Then remove blocks already filled in target by issuing zero-length reads and checking bitmaps
-            while (in_flight < parent->iodepth*parent->parallel_osds && oit != merge_offsets.end())
-            {
-                in_flight++;
-                check_if_full(*oit);
-                oit++;
-                processed++;
-                if (parent->progress && !(processed % 128))
-                {
-                    printf("\rFiltering target blocks: %lu/%lu", processed, to_process);
-                }
-            }
-            if (in_flight > 0 || oit != merge_offsets.end())
-            {
-                // Wait until reads finish
-                return;
-            }
-            if (parent->progress)
-            {
-                printf("\r%lu full blocks of target filtered out\n", to_process-merge_offsets.size());
-            }
-        }
-        state = 3;
-    resume_3:
-        // Then list upper layers
-        list_layers(false);
-        state = 4;
-    resume_4:
-        while (lists_todo > 0)
-        {
-            // Wait for lists
-            return;
-        }
-        state = 5;
-        processed = 0;
-        to_process = merge_offsets.size();
-        oit = merge_offsets.begin();
-    resume_5:
-        // Now read, overwrite and optionally delete offsets one by one
-        while (in_flight < parent->iodepth*parent->parallel_osds && oit != merge_offsets.end())
-        {
-            in_flight++;
-            read_and_write(*oit);
-            oit++;
-            processed++;
-            if (parent->progress && !(processed % 128))
-            {
-                printf("\rOverwriting blocks: %lu/%lu", processed, to_process);
-            }
-        }
-        if (in_flight > 0 || oit != merge_offsets.end())
-        {
-            // Wait until overwrites finish
-            return;
-        }
-        if (parent->progress)
-        {
-            printf("\rOverwriting blocks: %lu/%lu\n", to_process, to_process);
-        }
-        // Done
-        printf("Done, layers from %s to %s merged into %s\n", from_name.c_str(), to_name.c_str(), target_name.c_str());
-        state = 6;
-    resume_6:
-        return;
-    }
-
-    void list_layers(bool lower)
-    {
-        for (auto & sp: sources)
-        {
-            inode_t src = sp.first;
-            if (lower ? (sp.second < target_rank) : (sp.second > target_rank))
-            {
-                lists_todo++;
-                inode_list_t* lst = parent->cli->list_inode_start(src, [this, src](
-                    inode_list_t *lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)
-                {
-                    uint64_t layer_block = layer_block_size.at(src);
-                    for (object_id obj: objects)
-                    {
-                        merge_offsets.insert(obj.stripe - obj.stripe % target_block_size);
-                        for (int i = target_block_size; i < layer_block; i += target_block_size)
-                        {
-                            merge_offsets.insert(obj.stripe - obj.stripe % target_block_size + i);
-                        }
-                    }
-                    if (delete_source)
-                    {
-                        // Also store individual lists
-                        auto & layer_list = layer_lists[src];
-                        int pos = layer_list.size();
-                        layer_list.resize(pos + objects.size());
-                        for (object_id obj: objects)
-                        {
-                            layer_list[pos++] = obj.stripe;
-                        }
-                    }
-                    if (status & INODE_LIST_DONE)
-                    {
-                        auto & name = parent->cli->st_cli.inode_config.at(src).name;
-                        printf("Got listing of layer %s (inode %lu in pool %u)\n", name.c_str(), INODE_NO_POOL(src), INODE_POOL(src));
-                        if (delete_source)
-                        {
-                            // Sort the inode listing
-                            std::sort(layer_lists[src].begin(), layer_lists[src].end());
-                        }
-                        lists_todo--;
-                        continue_merge_reent();
-                    }
-                    else
-                    {
-                        parent->cli->list_inode_next(lst, 1);
-                    }
-                });
-                parent->cli->list_inode_next(lst, parent->parallel_osds);
-            }
-        }
-    }
-
-    // Check if <offset> is fully written in <target> and remove it from merge_offsets if so
-    void check_if_full(uint64_t offset)
-    {
-        cluster_op_t *op = new cluster_op_t;
-        op->opcode = OSD_OP_READ_BITMAP;
-        op->inode = target;
-        op->offset = offset;
-        op->len = 0;
-        op->callback = [this](cluster_op_t *op)
-        {
-            if (op->retval < 0)
-            {
-                fprintf(stderr, "error reading target bitmap at offset %lx: %s\n", op->offset, strerror(-op->retval));
-            }
-            else
-            {
-                uint64_t bitmap_bytes = target_block_size/parent->cli->get_bs_bitmap_granularity()/8;
-                int i;
-                for (i = 0; i < bitmap_bytes; i++)
-                {
-                    if (((uint8_t*)op->bitmap_buf)[i] != 0xff)
-                    {
-                        break;
-                    }
-                }
-                if (i == bitmap_bytes)
-                {
-                    // full
-                    merge_offsets.erase(op->offset);
-                }
-            }
-            delete op;
-            in_flight--;
-            continue_merge_reent();
-        };
-        parent->cli->execute(op);
-    }
-
-    // Read <offset> from <to>, write it to <target> and optionally delete it
-    // from all layers except <target> after fsync'ing
-    void read_and_write(uint64_t offset)
-    {
-        snap_rw_op_t *rwo = new snap_rw_op_t;
-        // Initialize counter to 1 to later allow write_subop() to return immediately
-        // (even though it shouldn't really do that)
-        rwo->todo = 1;
-        rwo->buf = malloc(target_block_size);
-        rwo->offset = offset;
-        rwo_read(rwo);
-    }
-
-    void rwo_read(snap_rw_op_t *rwo)
-    {
-        cluster_op_t *op = &rwo->op;
-        op->opcode = OSD_OP_READ;
-        op->inode = target;
-        op->offset = rwo->offset;
-        op->len = target_block_size;
-        op->iov.push_back(rwo->buf, target_block_size);
-        op->callback = [this, rwo](cluster_op_t *op)
-        {
-            if (op->retval != op->len)
-            {
-                fprintf(stderr, "error reading target at offset %lx: %s\n", op->offset, strerror(-op->retval));
-                exit(1);
-            }
-            next_write(rwo);
-        };
-        parent->cli->execute(op);
-    }
-
-    void next_write(snap_rw_op_t *rwo)
-    {
-        // Write each non-empty range using an individual operation
-        // FIXME: Allow to use single write with "holes" (OSDs don't allow it yet)
-        uint32_t gran = parent->cli->get_bs_bitmap_granularity();
-        uint64_t bitmap_size = target_block_size / gran;
-        while (rwo->end < bitmap_size)
-        {
-            auto bit = ((*(uint8_t*)(rwo->op.bitmap_buf + (rwo->end >> 3))) & (1 << (rwo->end & 0x7)));
-            if (!bit)
-            {
-                if (rwo->end > rwo->start)
-                {
-                    // write start->end
-                    rwo->todo++;
-                    write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
-                    rwo->start = rwo->end;
-                    if (use_cas)
-                    {
-                        // Submit one by one if using CAS writes
-                        return;
-                    }
-                }
-                rwo->start = rwo->end = rwo->end+1;
-            }
-            else
-            {
-                rwo->end++;
-            }
-        }
-        if (rwo->end > rwo->start)
-        {
-            // write start->end
-            rwo->todo++;
-            write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
-            rwo->start = rwo->end;
-            if (use_cas)
-            {
-                return;
-            }
-        }
-        rwo->todo--;
-        // Just in case, if everything is done
-        autofree_op(rwo);
-    }
-
-    void write_subop(snap_rw_op_t *rwo, uint32_t start, uint32_t end, uint64_t version)
-    {
-        cluster_op_t *subop = new cluster_op_t;
-        subop->opcode = OSD_OP_WRITE;
-        subop->inode = target;
-        subop->offset = rwo->offset+start;
-        subop->len = end-start;
-        subop->version = version;
-        subop->flags = OSD_OP_IGNORE_READONLY;
-        subop->iov.push_back(rwo->buf+start, end-start);
-        subop->callback = [this, rwo](cluster_op_t *subop)
-        {
-            rwo->todo--;
-            if (subop->retval != subop->len)
-            {
-                if (use_cas && subop->retval == -EINTR)
-                {
-                    // CAS failure - reread and repeat optimistically
-                    rwo->start = subop->offset - rwo->offset;
-                    rwo_read(rwo);
-                    delete subop;
-                    return;
-                }
-                fprintf(stderr, "error writing target at offset %lx: %s\n", subop->offset, strerror(-subop->retval));
-                exit(1);
-            }
-            // Increment CAS version
-            rwo->op.version++;
-            if (use_cas)
-                next_write(rwo);
-            else
-                autofree_op(rwo);
-            delete subop;
-        };
-        parent->cli->execute(subop);
-    }
-
-    void delete_offset(inode_t inode_num, uint64_t offset)
-    {
-        cluster_op_t *subop = new cluster_op_t;
-        subop->opcode = OSD_OP_DELETE;
-        subop->inode = inode_num;
-        subop->offset = offset;
-        subop->len = 0;
-        subop->flags = OSD_OP_IGNORE_READONLY;
-        subop->callback = [this](cluster_op_t *subop)
-        {
-            if (subop->retval != 0)
-            {
-                fprintf(stderr, "error deleting from layer 0x%lx at offset %lx: %s", subop->inode, subop->offset, strerror(-subop->retval));
-            }
-            delete subop;
-        };
-        parent->cli->execute(subop);
-    }
-
-    void autofree_op(snap_rw_op_t *rwo)
-    {
-        if (!rwo->todo)
-        {
-            if (last_written_offset < rwo->op.offset+target_block_size)
-            {
-                last_written_offset = rwo->op.offset+target_block_size;
-            }
-            if (delete_source)
-            {
-                deleted_unsynced++;
-                if (deleted_unsynced >= fsync_interval)
-                {
-                    uint64_t from = last_fsync_offset, to = last_written_offset;
-                    cluster_op_t *subop = new cluster_op_t;
-                    subop->opcode = OSD_OP_SYNC;
-                    subop->callback = [this, from, to](cluster_op_t *subop)
-                    {
-                        delete subop;
-                        // We can now delete source data between <from> and <to>
-                        // But to do this we have to keep all object lists in memory :-(
-                        for (auto & lp: layer_list_pos)
-                        {
-                            auto & layer_list = layer_lists.at(lp.first);
-                            uint64_t layer_block = layer_block_size.at(lp.first);
-                            int cur_pos = lp.second;
-                            while (cur_pos < layer_list.size() && layer_list[cur_pos]+layer_block < to)
-                            {
-                                delete_offset(lp.first, layer_list[cur_pos]);
-                                cur_pos++;
-                            }
-                            lp.second = cur_pos;
-                        }
-                    };
-                    parent->cli->execute(subop);
-                }
-            }
-            free(rwo->buf);
-            delete rwo;
-            in_flight--;
-            continue_merge_reent();
-        }
-    }
-};
-
-std::function<bool(void)> cli_tool_t::start_merge(json11::Json cfg)
-{
-    json11::Json::array cmd = cfg["command"].array_items();
-    auto merger = new snap_merger_t();
-    merger->parent = this;
-    merger->from_name = cmd.size() > 1 ? cmd[1].string_value() : "";
-    merger->to_name = cmd.size() > 2 ? cmd[2].string_value() : "";
-    merger->target_name = cfg["target"].string_value();
-    if (merger->from_name == "" || merger->to_name == "")
-    {
-        fprintf(stderr, "Beginning or end of the merge sequence is missing\n");
-        exit(1);
-    }
-    merger->delete_source = cfg["delete-source"].string_value() != "";
-    merger->fsync_interval = cfg["fsync-interval"].uint64_value();
-    if (!merger->fsync_interval)
-        merger->fsync_interval = 128;
-    if (!cfg["cas"].is_null())
-        merger->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
-    return [merger]()
-    {
-        merger->continue_merge_reent();
-        if (merger->is_done())
-        {
-            delete merger;
-            return true;
-        }
-        return false;
-    };
-}
--- a/src/cli_rm.cpp
+++ b/src/cli_rm.cpp
@@ -1,195 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-#include "cli.h"
-#include "cluster_client.h"
-
-#define RM_LISTING 1
-#define RM_REMOVING 2
-#define RM_END 3
-
-struct rm_pg_t
-{
-    pg_num_t pg_num;
-    osd_num_t rm_osd_num;
-    std::set<object_id> objects;
-    std::set<object_id>::iterator obj_pos;
-    uint64_t obj_count = 0, obj_done = 0, obj_prev_done = 0;
-    int state = 0;
-    int in_flight = 0;
-};
-
-struct rm_inode_t
-{
-    uint64_t inode = 0;
-    pool_id_t pool_id = 0;
-
-    cli_tool_t *parent = NULL;
-    inode_list_t *lister = NULL;
-    std::vector<rm_pg_t*> lists;
-    uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
-    uint64_t pgs_to_list = 0;
-    bool lists_done = false;
-    int state = 0;
-
-    void start_delete()
-    {
-        lister = parent->cli->list_inode_start(inode, [this](inode_list_t *lst,
-            std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)
-        {
-            rm_pg_t *rm = new rm_pg_t((rm_pg_t){
-                .pg_num = pg_num,
-                .rm_osd_num = primary_osd,
-                .objects = objects,
-                .obj_count = objects.size(),
-                .obj_done = 0,
-                .obj_prev_done = 0,
-            });
-            rm->obj_pos = rm->objects.begin();
-            lists.push_back(rm);
-            if (parent->list_first)
-            {
-                parent->cli->list_inode_next(lister, 1);
-            }
-            if (status & INODE_LIST_DONE)
-            {
-                lists_done = true;
-            }
-            pgs_to_list--;
-            continue_delete();
-        });
-        if (!lister)
-        {
-            fprintf(stderr, "Failed to list inode %lu from pool %u objects\n", INODE_NO_POOL(inode), INODE_POOL(inode));
-            exit(1);
-        }
-        pgs_to_list = parent->cli->list_pg_count(lister);
-        parent->cli->list_inode_next(lister, parent->parallel_osds);
-    }
-
-    void send_ops(rm_pg_t *cur_list)
-    {
-        if (parent->cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
-            parent->cli->msgr.osd_peer_fds.end())
-        {
-            // Initiate connection
-            parent->cli->msgr.connect_peer(cur_list->rm_osd_num, parent->cli->st_cli.peer_states[cur_list->rm_osd_num]);
-            return;
-        }
-        while (cur_list->in_flight < parent->iodepth && cur_list->obj_pos != cur_list->objects.end())
-        {
-            osd_op_t *op = new osd_op_t();
-            op->op_type = OSD_OP_OUT;
-            op->peer_fd = parent->cli->msgr.osd_peer_fds[cur_list->rm_osd_num];
-            op->req = (osd_any_op_t){
-                .rw = {
-                    .header = {
-                        .magic = SECONDARY_OSD_OP_MAGIC,
-                        .id = parent->cli->next_op_id(),
-                        .opcode = OSD_OP_DELETE,
-                    },
-                    .inode = cur_list->obj_pos->inode,
-                    .offset = cur_list->obj_pos->stripe,
-                    .len = 0,
-                },
-            };
-            op->callback = [this, cur_list](osd_op_t *op)
-            {
-                cur_list->in_flight--;
-                if (op->reply.hdr.retval < 0)
-                {
-                    fprintf(stderr, "Failed to remove object %lx:%lx from PG %u (OSD %lu) (retval=%ld)\n",
-                        op->req.rw.inode, op->req.rw.offset,
-                        cur_list->pg_num, cur_list->rm_osd_num, op->reply.hdr.retval);
-                }
-                delete op;
-                cur_list->obj_done++;
-                total_done++;
-                continue_delete();
-            };
-            cur_list->obj_pos++;
-            cur_list->in_flight++;
-            parent->cli->msgr.outbox_push(op);
-        }
-    }
-
-    void continue_delete()
-    {
-        if (parent->list_first && !lists_done)
-        {
-            return;
-        }
-        for (int i = 0; i < lists.size(); i++)
-        {
-            if (!lists[i]->in_flight && lists[i]->obj_pos == lists[i]->objects.end())
-            {
-                delete lists[i];
-                lists.erase(lists.begin()+i, lists.begin()+i+1);
-                i--;
-                if (!lists_done)
-                {
-                    parent->cli->list_inode_next(lister, 1);
-                }
-            }
-            else
-            {
-                send_ops(lists[i]);
-            }
-        }
-        if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
-        {
-            printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
-            total_prev_pct = total_done*1000/total_count;
-        }
-        if (lists_done && !lists.size())
-        {
-            printf("Done, inode %lu in pool %u data removed\n", INODE_NO_POOL(inode), pool_id);
-            state = 2;
-        }
-    }
-
-    bool loop()
-    {
-        if (state == 0)
-        {
-            start_delete();
-            state = 1;
-        }
-        else if (state == 1)
-        {
-            continue_delete();
-        }
-        else if (state == 2)
-        {
-            return true;
-        }
-        return false;
-    }
-};
-
-std::function<bool(void)> cli_tool_t::start_rm(json11::Json cfg)
-{
-    auto remover = new rm_inode_t();
-    remover->parent = this;
-    remover->inode = cfg["inode"].uint64_value();
-    remover->pool_id = cfg["pool"].uint64_value();
-    if (remover->pool_id)
-    {
-        remover->inode = (remover->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
-    }
-    remover->pool_id = INODE_POOL(remover->inode);
-    if (!remover->pool_id)
-    {
-        fprintf(stderr, "pool is missing\n");
-        exit(1);
-    }
-    return [remover]()
-    {
-        if (remover->loop())
-        {
-            delete remover;
-            return true;
-        }
-        return false;
-    };
-}
--- a/src/cli_snap_rm.cpp
+++ b/src/cli_snap_rm.cpp
@@ -1,565 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 (see README.md for details)
-
-#include "cli.h"
-#include "cluster_client.h"
-#include "base64.h"
-
-// Remove layer(s): similar to merge, but alters metadata and processes multiple merge targets
-//
-// Exactly one child of the requested layers may be merged using the "inverted" workflow,
-// where we merge it "down" into one of the "to-be-removed" layers and then rename the
-// "to-be-removed" layer to the child. It may be done either if all writers are stopped
-// before trying to delete layers (which is signaled by --writers-stopped) or if that child
-// is a read-only layer (snapshot) itself.
-//
-// This "inverted" workflow trades copying data of one of the deleted layers for copying
-// data of one child of the chain which is also a child of the "traded" layer. So we
-// choose the (parent,child) pair which has the largest difference between "parent" and
-// "child" inode sizes.
-//
-// All other children of the chain are processed by iterating though them, merging removed
-// parents into them and rebasing them to the last layer which isn't a member of the removed
-// chain.
-//
-// Example:
-//
-// <parent> - <from> - <layer 2> - <to> - <child 1>
-//                 \           \       \- <child 2>
-//                  \           \- <child 3>
-//                   \-<child 4>
-//
-// 1) Find optimal pair for the "reverse" scenario
-//    Imagine that it's (<layer 2>, <child 1>) in this example
-// 2) Process all children except <child 1>:
-//    - Merge <from>..<to> to <child 2>
-//    - Set <child 2> parent to <parent>
-//    - Repeat for others
-// 3) Process <child 1>:
-//    - Merge <from>..<child 1> to <layer 2>
-//    - Set <layer 2> parent to <parent>
-//    - Rename <layer 2> to <child 1>
-// 4) Delete other layers of the chain (<from>, <to>)
-struct snap_remover_t
-{
-    cli_tool_t *parent;
-
-    // remove from..to
-    std::string from_name, to_name;
-    // writers are stopped, we can safely change writable layers
-    bool writers_stopped = false;
-    // use CAS writes (0 = never, 1 = auto, 2 = always)
-    int use_cas = 1;
-    // interval between fsyncs
-    int fsync_interval = 128;
-
-    std::map<inode_t,int> sources;
-    std::map<inode_t,uint64_t> inode_used;
-    std::vector<inode_t> merge_children;
-    std::vector<inode_t> chain_list;
-    std::map<inode_t,int> inverse_candidates;
-    inode_t inverse_parent = 0, inverse_child = 0;
-    inode_t new_parent = 0;
-    int state = 0;
-    int current_child = 0;
-    std::function<bool(void)> cb;
-
-    bool is_done()
-    {
-        return state == 9;
-    }
-
-    void loop()
-    {
-        if (state == 1)
-            goto resume_1;
-        else if (state == 2)
-            goto resume_2;
-        else if (state == 3)
-            goto resume_3;
-        else if (state == 4)
-            goto resume_4;
-        else if (state == 5)
-            goto resume_5;
-        else if (state == 6)
-            goto resume_6;
-        else if (state == 7)
-            goto resume_7;
-        else if (state == 8)
-            goto resume_8;
-        else if (state == 9)
-            goto resume_9;
-        // Get children to merge
-        get_merge_children();
-        // Try to select an inode for the "inverse" optimized scenario
-        // Read statistics from etcd to do it
-        read_stats();
-        state = 1;
-resume_1:
-        if (parent->waiting > 0)
-            return;
-        choose_inverse_candidate();
-        // Merge children one by one, except our "inverse" child
-        for (current_child = 0; current_child < merge_children.size(); current_child++)
-        {
-            if (merge_children[current_child] == inverse_child)
-                continue;
-            start_merge_child(merge_children[current_child], merge_children[current_child]);
-resume_2:
-            while (!cb())
-            {
-                state = 2;
-                return;
-            }
-            cb = NULL;
-            parent->change_parent(merge_children[current_child], new_parent);
-            state = 3;
-resume_3:
-            if (parent->waiting > 0)
-                return;
-        }
-        // Merge our "inverse" child into our "inverse" parent
-        if (inverse_child != 0)
-        {
-            start_merge_child(inverse_child, inverse_parent);
-resume_4:
-            while (!cb())
-            {
-                state = 4;
-                return;
-            }
-            cb = NULL;
-            // Delete "inverse" child data
-            start_delete_source(inverse_child);
-resume_5:
-            while (!cb())
-            {
-                state = 5;
-                return;
-            }
-            cb = NULL;
-            // Delete "inverse" child metadata, rename parent over it,
-            // and also change parent links of the previous "inverse" child
-            rename_inverse_parent();
-            state = 6;
-resume_6:
-            if (parent->waiting > 0)
-                return;
-        }
-        // Delete parents, except the "inverse" one
-        for (current_child = 0; current_child < chain_list.size(); current_child++)
-        {
-            if (chain_list[current_child] == inverse_parent)
-                continue;
-            start_delete_source(chain_list[current_child]);
-resume_7:
-            while (!cb())
-            {
-                state = 7;
-                return;
-            }
-            cb = NULL;
-            delete_inode_config(chain_list[current_child]);
-            state = 8;
-resume_8:
-            if (parent->waiting > 0)
-                return;
-        }
-        state = 9;
-resume_9:
-        // Done
-        return;
-    }
-
-    void get_merge_children()
-    {
-        // Get all children of from..to
-        inode_config_t *from_cfg = parent->get_inode_cfg(from_name);
-        inode_config_t *to_cfg = parent->get_inode_cfg(to_name);
-        // Check that to_cfg is actually a child of from_cfg
-        // FIXME de-copypaste the following piece of code with snap_merger_t
-        inode_config_t *cur = to_cfg;
-        chain_list.push_back(cur->num);
-        while (cur->num != from_cfg->num && cur->parent_id != 0)
-        {
-            auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
-            if (it == parent->cli->st_cli.inode_config.end())
-            {
-                fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
-                exit(1);
-            }
-            cur = &it->second;
-            chain_list.push_back(cur->num);
-        }
-        if (cur->num != from_cfg->num)
-        {
-            fprintf(stderr, "Layer %s is not a child of %s\n", to_name.c_str(), from_name.c_str());
-            exit(1);
-        }
-        new_parent = from_cfg->parent_id;
-        // Calculate ranks
-        int i = chain_list.size()-1;
-        for (inode_t item: chain_list)
-        {
-            sources[item] = i--;
-        }
-        for (auto & ic: parent->cli->st_cli.inode_config)
-        {
-            if (!ic.second.parent_id)
-            {
-                continue;
-            }
-            auto it = sources.find(ic.second.parent_id);
-            if (it != sources.end() && sources.find(ic.second.num) == sources.end())
-            {
-                merge_children.push_back(ic.second.num);
-                if (ic.second.readonly || writers_stopped)
-                {
-                    inverse_candidates[ic.second.num] = it->second;
-                }
-            }
-        }
-    }
-
-    void read_stats()
-    {
-        if (inverse_candidates.size() == 0)
-        {
-            return;
-        }
-        json11::Json::array reads;
-        for (auto cp: inverse_candidates)
-        {
-            inode_t inode = cp.first;
-            reads.push_back(json11::Json::object {
-                { "request_range", json11::Json::object {
-                    { "key", base64_encode(
-                        parent->cli->st_cli.etcd_prefix+
-                        "/inode/stats/"+std::to_string(INODE_POOL(inode))+
-                        "/"+std::to_string(INODE_NO_POOL(inode))
-                    ) },
-                } }
-            });
-        }
-        for (auto cp: sources)
-        {
-            inode_t inode = cp.first;
-            reads.push_back(json11::Json::object {
-                { "request_range", json11::Json::object {
-                    { "key", base64_encode(
-                        parent->cli->st_cli.etcd_prefix+
-                        "/inode/stats/"+std::to_string(INODE_POOL(inode))+
-                        "/"+std::to_string(INODE_NO_POOL(inode))
-                    ) },
-                } }
-            });
-        }
-        parent->waiting++;
-        parent->cli->st_cli.etcd_txn(json11::Json::object {
-            { "success", reads },
-        }, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json data)
-        {
-            parent->waiting--;
-            if (err != "")
-            {
-                fprintf(stderr, "Error reading layer statistics from etcd: %s\n", err.c_str());
-                exit(1);
-            }
-            for (auto inode_result: data["responses"].array_items())
-            {
-                auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["kvs"][0]);
-                pool_id_t pool_id = 0;
-                inode_t inode = 0;
-                char null_byte = 0;
-                sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
-                if (!inode || null_byte != 0)
-                {
-                    fprintf(stderr, "Bad key returned from etcd: %s\n", kv.key.c_str());
-                    exit(1);
-                }
-                auto pool_cfg_it = parent->cli->st_cli.pool_config.find(pool_id);
-                if (pool_cfg_it == parent->cli->st_cli.pool_config.end())
-                {
-                    fprintf(stderr, "Pool %u does not exist\n", pool_id);
-                    exit(1);
-                }
-                inode = INODE_WITH_POOL(pool_id, inode);
-                auto & pool_cfg = pool_cfg_it->second;
-                uint64_t used_bytes = kv.value["raw_used"].uint64_value() / pool_cfg.pg_size;
-                if (pool_cfg.scheme != POOL_SCHEME_REPLICATED)
-                {
-                    used_bytes *= (pool_cfg.pg_size - pool_cfg.parity_chunks);
-                }
-                inode_used[inode] = used_bytes;
-            }
-            parent->ringloop->wakeup();
-        });
-    }
-
-    void choose_inverse_candidate()
-    {
-        uint64_t max_diff = 0;
-        for (auto cp: inverse_candidates)
-        {
-            inode_t child = cp.first;
-            uint64_t child_used = inode_used[child];
-            int rank = cp.second;
-            for (int i = chain_list.size()-rank; i < chain_list.size(); i++)
-            {
-                inode_t parent = chain_list[i];
-                uint64_t parent_used = inode_used[parent];
-                if (parent_used > child_used && (!max_diff || max_diff < (parent_used-child_used)))
-                {
-                    max_diff = (parent_used-child_used);
-                    inverse_parent = parent;
-                    inverse_child = child;
-                }
-            }
-        }
-    }
-
-    void rename_inverse_parent()
-    {
-        auto child_it = parent->cli->st_cli.inode_config.find(inverse_child);
-        if (child_it == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode %ld disappeared\n", inverse_child);
-            exit(1);
-        }
-        auto target_it = parent->cli->st_cli.inode_config.find(inverse_parent);
-        if (target_it == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode %ld disappeared\n", inverse_parent);
-            exit(1);
-        }
-        inode_config_t *child_cfg = &child_it->second;
-        inode_config_t *target_cfg = &target_it->second;
-        std::string child_name = child_cfg->name;
-        std::string target_name = target_cfg->name;
-        std::string child_cfg_key = base64_encode(
-            parent->cli->st_cli.etcd_prefix+
-            "/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
-            "/"+std::to_string(INODE_NO_POOL(inverse_child))
-        );
-        std::string target_cfg_key = base64_encode(
-            parent->cli->st_cli.etcd_prefix+
-            "/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
-            "/"+std::to_string(INODE_NO_POOL(inverse_parent))
-        );
-        // Fill new configuration
-        inode_config_t new_cfg = *child_cfg;
-        new_cfg.num = target_cfg->num;
-        new_cfg.parent_id = new_parent;
-        json11::Json::array cmp = json11::Json::array {
-            json11::Json::object {
-                { "target", "MOD" },
-                { "key", child_cfg_key },
-                { "result", "LESS" },
-                { "mod_revision", child_cfg->mod_revision+1 },
-            },
-            json11::Json::object {
-                { "target", "MOD" },
-                { "key", target_cfg_key },
-                { "result", "LESS" },
-                { "mod_revision", target_cfg->mod_revision+1 },
-            },
-        };
-        json11::Json::array txn = json11::Json::array {
-            json11::Json::object {
-                { "request_delete_range", json11::Json::object {
-                    { "key", child_cfg_key },
-                } },
-            },
-            json11::Json::object {
-                { "request_put", json11::Json::object {
-                    { "key", target_cfg_key },
-                    { "value", base64_encode(json11::Json(parent->cli->st_cli.serialize_inode_cfg(&new_cfg)).dump()) },
-                } },
-            },
-            json11::Json::object {
-                { "request_put", json11::Json::object {
-                    { "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+child_cfg->name) },
-                    { "value", base64_encode(json11::Json({
-                        { "id", INODE_NO_POOL(inverse_parent) },
-                        { "pool_id", (uint64_t)INODE_POOL(inverse_parent) },
-                    }).dump()) },
-                } },
-            },
-        };
-        // Reparent children of inverse_child
-        for (auto & cp: parent->cli->st_cli.inode_config)
-        {
-            if (cp.second.parent_id == child_cfg->num)
-            {
-                auto cp_cfg = cp.second;
-                cp_cfg.parent_id = inverse_parent;
-                auto cp_key = base64_encode(
-                    parent->cli->st_cli.etcd_prefix+
-                    "/config/inode/"+std::to_string(INODE_POOL(cp.second.num))+
-                    "/"+std::to_string(INODE_NO_POOL(cp.second.num))
-                );
-                cmp.push_back(json11::Json::object {
-                    { "target", "MOD" },
-                    { "key", cp_key },
-                    { "result", "LESS" },
-                    { "mod_revision", cp.second.mod_revision+1 },
-                });
-                txn.push_back(json11::Json::object {
-                    { "request_put", json11::Json::object {
-                        { "key", cp_key },
-                        { "value", base64_encode(json11::Json(parent->cli->st_cli.serialize_inode_cfg(&cp_cfg)).dump()) },
-                    } },
-                });
-            }
-        }
-        parent->waiting++;
-        parent->cli->st_cli.etcd_txn(json11::Json::object {
-            { "compare", cmp },
-            { "success", txn },
-        }, ETCD_SLOW_TIMEOUT, [this, target_name, child_name](std::string err, json11::Json res)
-        {
-            parent->waiting--;
-            if (err != "")
-            {
-                fprintf(stderr, "Error renaming %s to %s: %s\n", target_name.c_str(), child_name.c_str(), err.c_str());
-                exit(1);
-            }
-            if (!res["succeeded"].bool_value())
-            {
-                fprintf(
-                    stderr, "Parent (%s), child (%s), or one of its children"
-                    " configuration was modified during rename\n", target_name.c_str(), child_name.c_str()
-                );
-                exit(1);
-            }
-            printf("Layer %s renamed to %s\n", target_name.c_str(), child_name.c_str());
-            parent->ringloop->wakeup();
-        });
-    }
-
-    void delete_inode_config(inode_t cur)
-    {
-        auto cur_cfg_it = parent->cli->st_cli.inode_config.find(cur);
-        if (cur_cfg_it == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode 0x%lx disappeared\n", cur);
-            exit(1);
-        }
-        inode_config_t *cur_cfg = &cur_cfg_it->second;
-        std::string cur_name = cur_cfg->name;
-        std::string cur_cfg_key = base64_encode(
-            parent->cli->st_cli.etcd_prefix+
-            "/config/inode/"+std::to_string(INODE_POOL(cur))+
-            "/"+std::to_string(INODE_NO_POOL(cur))
-        );
-        parent->waiting++;
-        parent->cli->st_cli.etcd_txn(json11::Json::object {
-            { "compare", json11::Json::array {
-                json11::Json::object {
-                    { "target", "MOD" },
-                    { "key", cur_cfg_key },
-                    { "result", "LESS" },
-                    { "mod_revision", cur_cfg->mod_revision+1 },
-                },
-            } },
-            { "success", json11::Json::array {
-                json11::Json::object {
-                    { "request_delete_range", json11::Json::object {
-                        { "key", cur_cfg_key },
-                    } },
-                    { "request_delete_range", json11::Json::object {
-                        { "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+cur_name) },
-                    } },
-                },
-            } },
-        }, ETCD_SLOW_TIMEOUT, [this, cur_name](std::string err, json11::Json res)
-        {
-            parent->waiting--;
-            if (err != "")
-            {
-                fprintf(stderr, "Error deleting %s: %s\n", cur_name.c_str(), err.c_str());
-                exit(1);
-            }
-            if (!res["succeeded"].bool_value())
-            {
-                fprintf(stderr, "Layer %s configuration was modified during deletion\n", cur_name.c_str());
-                exit(1);
-            }
-            printf("Layer %s deleted\n", cur_name.c_str());
-            parent->ringloop->wakeup();
-        });
-    }
-
-    void start_merge_child(inode_t child_inode, inode_t target_inode)
-    {
-        auto child_it = parent->cli->st_cli.inode_config.find(child_inode);
-        if (child_it == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode %ld disappeared\n", child_inode);
-            exit(1);
-        }
-        auto target_it = parent->cli->st_cli.inode_config.find(target_inode);
-        if (target_it == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode %ld disappeared\n", target_inode);
-            exit(1);
-        }
-        cb = parent->start_merge(json11::Json::object {
-            { "command", json11::Json::array{ "merge-data", from_name, child_it->second.name } },
-            { "target", target_it->second.name },
-            { "delete-source", false },
-            { "cas", use_cas },
-            { "fsync-interval", fsync_interval },
-        });
-    }
-
-    void start_delete_source(inode_t inode)
-    {
-        auto source = parent->cli->st_cli.inode_config.find(inode);
-        if (source == parent->cli->st_cli.inode_config.end())
-        {
-            fprintf(stderr, "Inode %ld disappeared\n", inode);
-            exit(1);
-        }
-        cb = parent->start_rm(json11::Json::object {
-            { "inode", inode },
-            { "pool", (uint64_t)INODE_POOL(inode) },
-            { "fsync-interval", fsync_interval },
-        });
-    }
-};
-
-std::function<bool(void)> cli_tool_t::start_snap_rm(json11::Json cfg)
-{
-    json11::Json::array cmd = cfg["command"].array_items();
-    auto snap_remover = new snap_remover_t();
-    snap_remover->parent = this;
-    snap_remover->from_name = cmd.size() > 1 ? cmd[1].string_value() : "";
-    snap_remover->to_name = cmd.size() > 2 ? cmd[2].string_value() : "";
-    if (snap_remover->from_name == "")
-    {
-        fprintf(stderr, "Layer to remove argument is missing\n");
-        exit(1);
-    }
-    if (snap_remover->to_name == "")
-    {
-        snap_remover->to_name = snap_remover->from_name;
-    }
-    snap_remover->fsync_interval = cfg["fsync-interval"].uint64_value();
-    if (!snap_remover->fsync_interval)
-        snap_remover->fsync_interval = 128;
-    if (!cfg["cas"].is_null())
-        snap_remover->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
-    if (!cfg["writers_stopped"].is_null())
-        snap_remover->writers_stopped = true;
-    return [snap_remover]()
-    {
-        snap_remover->loop();
-        if (snap_remover->is_done())
-        {
-            delete snap_remover;
-            return true;
-        }
-        return false;
-    };
-}
--- a/src/cluster_client.cpp
+++ b/src/cluster_client.cpp
@@ -12,7 +12,7 @@
 #define CACHE_DIRTY 1
 #define CACHE_FLUSHING 2
 #define CACHE_REPEATING 3
-#define OP_FLUSH_BUFFER 0x02
+#define OP_FLUSH_BUFFER 2

 cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
 {
@@ -31,7 +31,6 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
        {
            // peer_osd just connected
            continue_ops();
-            continue_lists();
        }
        else if (dirty_buffers.size())
        {
@@ -140,7 +139,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
        if (!op->prev_wait && pgs_loaded)
            continue_sync(op);
    }
-    else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) */
+    else
    {
        for (auto prev = op->prev; prev; prev = prev->prev)
        {
@@ -148,7 +147,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
            {
                op->prev_wait++;
            }
-            else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ || prev->opcode == OSD_OP_READ_BITMAP)
+            else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ)
            {
                // Flushes are always in the beginning
                break;
@@ -168,7 +167,7 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
            auto n2 = next->next;
            if (next->opcode == OSD_OP_SYNC ||
                next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
-                (next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP) && (flags & OP_FLUSH_BUFFER))
+                next->opcode == OSD_OP_READ && (flags & OP_FLUSH_BUFFER))
            {
                next->prev_wait += inc;
                if (!next->prev_wait)
@@ -358,7 +357,7 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
            // And now they have to be resliced!
            for (auto op = op_queue_head; op; op = op->next)
            {
-                if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) &&
+                if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ) &&
                    INODE_POOL(op->cur_inode) == pool_item.first)
                {
                    op->needs_reslice = true;
@@ -418,8 +417,7 @@ void cluster_client_t::on_ready(std::function<void(void)> fn)
 */
 void cluster_client_t::execute(cluster_op_t *op)
 {
-    if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
-        op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_WRITE)
+    if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ && op->opcode != OSD_OP_WRITE)
    {
        op->retval = -EINVAL;
        std::function<void(cluster_op_t*)>(op->callback)(op);
@@ -559,7 +557,7 @@ void cluster_client_t::flush_buffer(const object_id & oid, cluster_buffer_t *wr)
 {
    wr->state = CACHE_REPEATING;
    cluster_op_t *op = new cluster_op_t;
-    op->flags = OSD_OP_IGNORE_READONLY|OP_FLUSH_BUFFER;
+    op->flags = OP_FLUSH_BUFFER;
    op->opcode = OSD_OP_WRITE;
    op->cur_inode = op->inode = oid.inode;
    op->offset = oid.stripe;
@@ -596,8 +594,7 @@ int cluster_client_t::continue_rw(cluster_op_t *op)
    else if (op->state == 3)
        goto resume_3;
 resume_0:
-    if ((op->opcode == OSD_OP_READ || op->opcode == OSD_OP_WRITE) && !op->len ||
-        op->offset % bs_bitmap_granularity || op->len % bs_bitmap_granularity)
+    if (!op->len || op->offset % bs_bitmap_granularity || op->len % bs_bitmap_granularity)
    {
        op->retval = -EINVAL;
        erase_op(op);
@@ -618,19 +615,16 @@ resume_0:
            return 0;
        }
    }
-    if (op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE)
+    if (op->opcode == OSD_OP_WRITE)
    {
-        if (!(op->flags & OSD_OP_IGNORE_READONLY))
+        auto ino_it = st_cli.inode_config.find(op->inode);
+        if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
        {
-            auto ino_it = st_cli.inode_config.find(op->inode);
-            if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
-            {
-                op->retval = -EINVAL;
-                erase_op(op);
-                return 1;
-            }
+            op->retval = -EINVAL;
+            erase_op(op);
+            return 1;
        }
-        if (op->opcode == OSD_OP_WRITE && !immediate_commit && !(op->flags & OP_FLUSH_BUFFER))
+        if (!immediate_commit && !(op->flags & OP_FLUSH_BUFFER))
        {
            copy_write(op, dirty_buffers);
        }
@@ -639,7 +633,7 @@ resume_1:
    // Slice the operation into parts
    slice_rw(op);
    op->needs_reslice = false;
-    if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && op->version && op->parts.size() > 1)
+    if (op->opcode == OSD_OP_WRITE && op->version && op->parts.size() > 1)
    {
        // Atomic writes to multiple stripes are unsupported
        op->retval = -EINVAL;
@@ -799,13 +793,13 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
    uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
    uint64_t pg_block_size = bs_block_size * pg_data_size;
    uint64_t first_stripe = (op->offset / pg_block_size) * pg_block_size;
-    uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
+    uint64_t last_stripe = ((op->offset + op->len + pg_block_size - 1) / pg_block_size - 1) * pg_block_size;
    op->retval = 0;
    op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
-    if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
+    if (op->opcode == OSD_OP_READ)
    {
        // Allocate memory for the bitmap
-        unsigned object_bitmap_size = (((op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : op->len) / bs_bitmap_granularity + 7) / 8);
+        unsigned object_bitmap_size = ((op->len / bs_bitmap_granularity + 7) / 8);
        object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
        unsigned bitmap_mem = object_bitmap_size + (bs_bitmap_size * pg_data_size) * op->parts.size();
        if (op->bitmap_buf_size < bitmap_mem)
@@ -869,13 +863,13 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
            if (end == begin)
                op->done_count++;
        }
-        else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
+        else
        {
            add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
        }
        op->parts[i].parent = op;
        op->parts[i].offset = begin;
-        op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
+        op->parts[i].len = (uint32_t)(end - begin);
        op->parts[i].pg_num = pg_num;
        op->parts[i].osd_num = 0;
        op->parts[i].flags = 0;
@@ -889,7 +883,7 @@ bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len
    uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
    uint64_t pg_block_size = bs_block_size * pg_data_size;
    uint64_t first_stripe = (offset / pg_block_size) * pg_block_size;
-    uint64_t last_stripe = len > 0 ? ((offset + len - 1) / pg_block_size) * pg_block_size : first_stripe;
+    uint64_t last_stripe = ((offset + len + pg_block_size - 1) / pg_block_size - 1) * pg_block_size;
    for (uint64_t stripe = first_stripe; stripe <= last_stripe; stripe += pg_block_size)
    {
        pg_num_t pg_num = (stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
@@ -922,12 +916,9 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
                pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
            );
            uint64_t meta_rev = 0;
-            if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
-            {
-                auto ino_it = st_cli.inode_config.find(op->inode);
-                if (ino_it != st_cli.inode_config.end())
-                    meta_rev = ino_it->second.mod_revision;
-            }
+            auto ino_it = st_cli.inode_config.find(op->inode);
+            if (ino_it != st_cli.inode_config.end())
+                meta_rev = ino_it->second.mod_revision;
            part->op = (osd_op_t){
                .op_type = OSD_OP_OUT,
                .peer_fd = peer_fd,
@@ -935,16 +926,16 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
                    .header = {
                        .magic = SECONDARY_OSD_OP_MAGIC,
                        .id = op_id++,
-                        .opcode = op->opcode == OSD_OP_READ_BITMAP ? OSD_OP_READ : op->opcode,
+                        .opcode = op->opcode,
                    },
                    .inode = op->cur_inode,
                    .offset = part->offset,
                    .len = part->len,
                    .meta_revision = meta_rev,
-                    .version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
+                    .version = op->opcode == OSD_OP_WRITE ? op->version : 0,
                } },
-                .bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? op->part_bitmaps + pg_bitmap_size*i : NULL),
-                .bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? pg_bitmap_size : 0),
+                .bitmap = op->opcode == OSD_OP_WRITE ? NULL : op->part_bitmaps + pg_bitmap_size*i,
+                .bitmap_len = (unsigned)(op->opcode == OSD_OP_WRITE ? 0 : pg_bitmap_size),
                .callback = [this, part](osd_op_t *op_part)
                {
                    handle_op_part(part);
@@ -1126,7 +1117,7 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
        dirty_osds.insert(part->osd_num);
        part->flags |= PART_DONE;
        op->done_count++;
-        if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
+        if (op->opcode == OSD_OP_READ)
        {
            copy_part_bitmap(op, part);
            op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
@@ -1150,7 +1141,7 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
    );
    uint32_t object_offset = (part->op.req.rw.offset - op->offset) / bs_bitmap_granularity;
    uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / bs_bitmap_granularity;
-    uint32_t part_len = (op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : part->op.req.rw.len) / bs_bitmap_granularity;
+    uint32_t part_len = part->op.req.rw.len / bs_bitmap_granularity;
    if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
    {
        // Copy bytes
@@ -1170,8 +1161,3 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
        part_len--;
    }
 }
-
-uint64_t cluster_client_t::next_op_id()
-{
-    return op_id++;
-}
--- a/src/cluster_client.h
+++ b/src/cluster_client.h
@@ -10,11 +10,6 @@
 #define MAX_BLOCK_SIZE 128*1024*1024
 #define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
 #define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
-#define INODE_LIST_DONE 1
-#define INODE_LIST_HAS_UNSTABLE 2
-#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
-
-#define OSD_OP_IGNORE_READONLY 0x08

 struct cluster_op_t;

@@ -32,22 +27,19 @@ struct cluster_op_part_t

 struct cluster_op_t
 {
-    uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP
+    uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC
    uint64_t inode;
    uint64_t offset;
    uint64_t len;
    // for reads and writes within a single object (stripe),
    // reads can return current version and writes can use "CAS" semantics
    uint64_t version = 0;
-    // now only OSD_OP_IGNORE_READONLY is supported
-    uint64_t flags = 0;
    int retval;
    osd_op_buf_list_t iov;
-    // READ and READ_BITMAP return the bitmap here
-    void *bitmap_buf = NULL;
    std::function<void(cluster_op_t*)> callback;
    ~cluster_op_t();
 protected:
+    uint64_t flags = 0;
    int state = 0;
    uint64_t cur_inode; // for snapshot reads
    void *buf = NULL;
@@ -56,7 +48,7 @@ protected:
    bool up_wait = false;
    int inflight_count = 0, done_count = 0;
    std::vector<cluster_op_part_t> parts;
-    void *part_bitmaps = NULL;
+    void *bitmap_buf = NULL, *part_bitmaps = NULL;
    unsigned bitmap_buf_size = 0;
    cluster_op_t *prev = NULL, *next = NULL;
    int prev_wait = 0;
@@ -70,9 +62,6 @@ struct cluster_buffer_t
    int state;
 };

-struct inode_list_t;
-struct inode_list_osd_t;
-
 // FIXME: Split into public and private interfaces
 class cluster_client_t
 {
@@ -104,7 +93,6 @@ class cluster_client_t
    bool pgs_loaded = false;
    ring_consumer_t consumer;
    std::vector<std::function<void(void)>> on_ready_hooks;
-    std::vector<inode_list_t*> lists;
    int continuing_ops = 0;

 public:
@@ -120,14 +108,6 @@ public:

    static void copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers);
    void continue_ops(bool up_retry = false);
-    inode_list_t *list_inode_start(inode_t inode,
-        std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
-    int list_pg_count(inode_list_t *lst);
-    void list_inode_next(inode_list_t *lst, int next_pgs);
-    inline uint32_t get_bs_bitmap_granularity() { return bs_bitmap_granularity; }
-    inline uint64_t get_bs_block_size() { return bs_block_size; }
-    uint64_t next_op_id();
-
 protected:
    bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
    void flush_buffer(const object_id & oid, cluster_buffer_t *wr);
@@ -145,7 +125,4 @@ protected:
    void erase_op(cluster_op_t *op);
    void calc_wait(cluster_op_t *op);
    void inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc);
-    void continue_lists();
-    void continue_listing(inode_list_t *lst);
-    void send_list(inode_list_osd_t *cur_list);
 };
--- a/src/cluster_client_list.cpp
+++ b/src/cluster_client_list.cpp
@@ -1,285 +0,0 @@
-// Copyright (c) Vitaliy Filippov, 2019+
-// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
-
-#include <algorithm>
-#include "pg_states.h"
-#include "cluster_client.h"
-
-struct inode_list_t;
-
-struct inode_list_pg_t;
-
-struct inode_list_osd_t
-{
-    inode_list_pg_t *pg = NULL;
-    osd_num_t osd_num = 0;
-    bool sent = false;
-};
-
-struct inode_list_pg_t
-{
-    inode_list_t *lst = NULL;
-    int pos = 0;
-    pg_num_t pg_num;
-    osd_num_t cur_primary;
-    bool has_unstable = false;
-    int sent = 0;
-    int done = 0;
-    std::vector<inode_list_osd_t> list_osds;
-    std::set<object_id> objects;
-};
-
-struct inode_list_t
-{
-    cluster_client_t *cli = NULL;
-    pool_id_t pool_id = 0;
-    inode_t inode = 0;
-    int done_pgs = 0;
-    int want = 0;
-    std::vector<inode_list_pg_t*> pgs;
-    std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
-};
-
-inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
-    std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback)
-{
-    int skipped_pgs = 0;
-    pool_id_t pool_id = INODE_POOL(inode);
-    if (!pool_id || st_cli.pool_config.find(pool_id) == st_cli.pool_config.end())
-    {
-        if (log_level > 0)
-        {
-            fprintf(stderr, "Pool %u does not exist\n", pool_id);
-        }
-        return NULL;
-    }
-    inode_list_t *lst = new inode_list_t();
-    lst->cli = this;
-    lst->pool_id = pool_id;
-    lst->inode = inode;
-    lst->callback = callback;
-    auto pool_cfg = st_cli.pool_config[pool_id];
-    for (auto & pg_item: pool_cfg.pg_config)
-    {
-        auto & pg = pg_item.second;
-        if (pg.pause || !pg.cur_primary || !(pg.cur_state & PG_ACTIVE))
-        {
-            skipped_pgs++;
-            if (log_level > 0)
-            {
-                fprintf(stderr, "PG %u is inactive, skipping\n", pg_item.first);
-            }
-            continue;
-        }
-        inode_list_pg_t *r = new inode_list_pg_t();
-        r->lst = lst;
-        r->pg_num = pg_item.first;
-        r->cur_primary = pg.cur_primary;
-        if (pg.cur_state != PG_ACTIVE)
-        {
-            // Not clean
-            std::set<osd_num_t> all_peers;
-            for (osd_num_t pg_osd: pg.target_set)
-            {
-                if (pg_osd != 0)
-                {
-                    all_peers.insert(pg_osd);
-                }
-            }
-            for (osd_num_t pg_osd: pg.all_peers)
-            {
-                if (pg_osd != 0)
-                {
-                    all_peers.insert(pg_osd);
-                }
-            }
-            for (auto & hist_item: pg.target_history)
-            {
-                for (auto pg_osd: hist_item)
-                {
-                    if (pg_osd != 0)
-                    {
-                        all_peers.insert(pg_osd);
-                    }
-                }
-            }
-            for (osd_num_t peer_osd: all_peers)
-            {
-                r->list_osds.push_back((inode_list_osd_t){
-                    .pg = r,
-                    .osd_num = peer_osd,
-                    .sent = false,
-                });
-            }
-        }
-        else
-        {
-            // Clean
-            r->list_osds.push_back((inode_list_osd_t){
-                .pg = r,
-                .osd_num = pg.cur_primary,
-                .sent = false,
-            });
-        }
-        lst->pgs.push_back(r);
-    }
-    std::sort(lst->pgs.begin(), lst->pgs.end(), [](inode_list_pg_t *a, inode_list_pg_t *b)
-    {
-        return a->cur_primary < b->cur_primary ? true : false;
-    });
-    for (int i = 0; i < lst->pgs.size(); i++)
-    {
-        lst->pgs[i]->pos = i;
-    }
-    lists.push_back(lst);
-    return lst;
-}
-
-int cluster_client_t::list_pg_count(inode_list_t *lst)
-{
-    return lst->pgs.size();
-}
-
-void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
-{
-    if (next_pgs >= 0)
-    {
-        lst->want += next_pgs;
-    }
-    continue_listing(lst);
-}
-
-void cluster_client_t::continue_listing(inode_list_t *lst)
-{
-    if (lst->done_pgs >= lst->pgs.size())
-    {
-        // All done
-        for (int i = 0; i < lists.size(); i++)
-        {
-            if (lists[i] == lst)
-            {
-                lists.erase(lists.begin()+i, lists.begin()+i+1);
-                break;
-            }
-        }
-        delete lst;
-        return;
-    }
-    if (lst->want <= 0)
-    {
-        return;
-    }
-    for (int i = 0; i < lst->pgs.size(); i++)
-    {
-        if (lst->pgs[i] && lst->pgs[i]->sent < lst->pgs[i]->list_osds.size())
-        {
-            for (int j = 0; j < lst->pgs[i]->list_osds.size(); j++)
-            {
-                send_list(&lst->pgs[i]->list_osds[j]);
-                if (lst->want <= 0)
-                {
-                    break;
-                }
-            }
-        }
-    }
-}
-
-void cluster_client_t::send_list(inode_list_osd_t *cur_list)
-{
-    if (cur_list->sent)
-    {
-        return;
-    }
-    if (msgr.osd_peer_fds.find(cur_list->osd_num) == msgr.osd_peer_fds.end())
-    {
-        // Initiate connection
-        msgr.connect_peer(cur_list->osd_num, st_cli.peer_states[cur_list->osd_num]);
-        return;
-    }
-    auto & pool_cfg = st_cli.pool_config[cur_list->pg->lst->pool_id];
-    osd_op_t *op = new osd_op_t();
-    op->op_type = OSD_OP_OUT;
-    op->peer_fd = msgr.osd_peer_fds[cur_list->osd_num];
-    op->req = (osd_any_op_t){
-        .sec_list = {
-            .header = {
-                .magic = SECONDARY_OSD_OP_MAGIC,
-                .id = op_id++,
-                .opcode = OSD_OP_SEC_LIST,
-            },
-            .list_pg = cur_list->pg->pg_num,
-            .pg_count = (pg_num_t)pool_cfg.real_pg_count,
-            .pg_stripe_size = pool_cfg.pg_stripe_size,
-            .min_inode = cur_list->pg->lst->inode,
-            .max_inode = cur_list->pg->lst->inode,
-        },
-    };
-    op->callback = [this, cur_list](osd_op_t *op)
-    {
-        if (op->reply.hdr.retval < 0)
-        {
-            fprintf(stderr, "Failed to get PG %u/%u object list from OSD %lu (retval=%ld), skipping\n",
-                cur_list->pg->lst->pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval);
-        }
-        else
-        {
-            if (op->reply.sec_list.stable_count < op->reply.hdr.retval)
-            {
-                // Unstable objects, if present, mean that someone still writes into the inode. Warn the user about it.
-                cur_list->pg->has_unstable = true;
-                fprintf(
-                    stderr, "[PG %u/%u] Inode still has %lu unstable object versions out of total %lu - is it still open?\n",
-                    cur_list->pg->lst->pool_id, cur_list->pg->pg_num, op->reply.hdr.retval - op->reply.sec_list.stable_count,
-                    op->reply.hdr.retval
-                );
-            }
-            if (log_level > 0)
-            {
-                fprintf(
-                    stderr, "[PG %u/%u] Got inode object list from OSD %lu: %ld object versions\n",
-                    cur_list->pg->lst->pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval
-                );
-            }
-            for (uint64_t i = 0; i < op->reply.hdr.retval; i++)
-            {
-                object_id oid = ((obj_ver_id*)op->buf)[i].oid;
-                oid.stripe = oid.stripe & ~STRIPE_MASK;
-                cur_list->pg->objects.insert(oid);
-            }
-        }
-        delete op;
-        auto lst = cur_list->pg->lst;
-        auto pg = cur_list->pg;
-        pg->done++;
-        if (pg->done >= pg->list_osds.size())
-        {
-            int status = 0;
-            lst->done_pgs++;
-            if (lst->done_pgs >= lst->pgs.size())
-            {
-                status |= INODE_LIST_DONE;
-            }
-            if (pg->has_unstable)
-            {
-                status |= INODE_LIST_HAS_UNSTABLE;
-            }
-            lst->callback(lst, std::move(pg->objects), pg->pg_num, pg->cur_primary, status);
-            lst->pgs[pg->pos] = NULL;
-            delete pg;
-        }
-        continue_listing(lst);
-    };
-    msgr.outbox_push(op);
-    cur_list->sent = true;
-    cur_list->pg->sent++;
-    cur_list->pg->lst->want--;
-}
-
-void cluster_client_t::continue_lists()
-{
-    for (auto lst: lists)
-    {
-        continue_listing(lst);
-    }
-}
--- a/src/etcd_state_client.cpp
+++ b/src/etcd_state_client.cpp
@@ -765,22 +765,3 @@ void etcd_state_client_t::close_watch(inode_watch_t* watch)
    }
    delete watch;
 }
-
-json11::Json::object & etcd_state_client_t::serialize_inode_cfg(inode_config_t *cfg)
-{
-    json11::Json::object new_cfg = json11::Json::object {
-        { "name", cfg->name },
-        { "size", cfg->size },
-    };
-    if (cfg->parent_id)
-    {
-        if (INODE_POOL(cfg->num) != INODE_POOL(cfg->parent_id))
-            new_cfg["parent_pool"] = (uint64_t)INODE_POOL(cfg->parent_id);
-        new_cfg["parent_id"] = (uint64_t)INODE_NO_POOL(cfg->parent_id);
-    }
-    if (cfg->readonly)
-    {
-        new_cfg["readonly"] = true;
-    }
-    return new_cfg;
-}
--- a/src/etcd_state_client.h
+++ b/src/etcd_state_client.h
@@ -99,7 +99,6 @@ public:
    std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
    std::function<void(osd_num_t)> on_change_osd_state_hook;

-    json11::Json::object & serialize_inode_cfg(inode_config_t *cfg);
    etcd_kv_t parse_etcd_kv(const json11::Json & kv_json);
    void etcd_call(std::string api, json11::Json payload, int timeout, std::function<void(std::string, json11::Json)> callback);
    void etcd_txn(json11::Json txn, int timeout, std::function<void(std::string, json11::Json)> callback);
--- a/src/messenger.cpp
+++ b/src/messenger.cpp
@@ -117,7 +117,7 @@ osd_messenger_t::~osd_messenger_t()
    }
    while (clients.size() > 0)
    {
-        stop_client(clients.begin()->first, true, true);
+        stop_client(clients.begin()->first, true);
    }
 #ifdef WITH_RDMA
    if (rdma_context)
--- a/src/messenger.h
+++ b/src/messenger.h
@@ -156,7 +156,7 @@ public:
    void init();
    void parse_config(const json11::Json & config);
    void connect_peer(uint64_t osd_num, json11::Json peer_state);
-    void stop_client(int peer_fd, bool force = false, bool force_delete = false);
+    void stop_client(int peer_fd, bool force = false);
    void outbox_push(osd_op_t *cur_op);
    std::function<void(osd_op_t*)> exec_op;
    std::function<void(osd_num_t)> repeer_pgs;
--- a/src/mock/messenger.cpp
+++ b/src/mock/messenger.cpp
@@ -15,7 +15,7 @@ osd_messenger_t::~osd_messenger_t()
 {
    while (clients.size() > 0)
    {
-        stop_client(clients.begin()->first, true, true);
+        stop_client(clients.begin()->first, true);
    }
 }

--- a/src/msgr_rdma.cpp
+++ b/src/msgr_rdma.cpp
@@ -46,9 +46,6 @@ msgr_rdma_connection_t::~msgr_rdma_connection_t()
    ctx->used_max_cqe -= max_send+max_recv;
    if (qp)
        ibv_destroy_qp(qp);
-    if (recv_buffers.size())
-        for (auto b: recv_buffers)
-            free(b);
 }

 msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu)
@@ -58,7 +55,6 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
    msgr_rdma_context_t *ctx = new msgr_rdma_context_t();
    ctx->mtu = mtu;

-    srand48(time(NULL));
    dev_list = ibv_get_device_list(NULL);
    if (!dev_list)
    {
@@ -481,11 +477,7 @@ void osd_messenger_t::handle_rdma_events()
            if (!is_send)
            {
                cl->rdma_conn->cur_recv--;
-                if (!handle_read_buffer(cl, cl->rdma_conn->recv_buffers[0], wc[i].byte_len))
-                {
-                    // handle_read_buffer may stop the client
-                    continue;
-                }
+                handle_read_buffer(cl, cl->rdma_conn->recv_buffers[0], wc[i].byte_len);
                free(cl->rdma_conn->recv_buffers[0]);
                cl->rdma_conn->recv_buffers.erase(cl->rdma_conn->recv_buffers.begin(), cl->rdma_conn->recv_buffers.begin()+1);
                try_recv_rdma(cl);
--- a/src/msgr_stop.cpp
+++ b/src/msgr_stop.cpp
@@ -41,7 +41,7 @@ void osd_messenger_t::cancel_op(osd_op_t *op)
    }
 }

-void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
+void osd_messenger_t::stop_client(int peer_fd, bool force)
 {
    assert(peer_fd != 0);
    auto it = clients.find(peer_fd);
@@ -136,7 +136,7 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
        clients.erase(it);
    }
    cl->refs--;
-    if (cl->refs <= 0 || force_delete)
+    if (cl->refs <= 0)
    {
        delete cl;
    }
--- a/src/osd.cpp
+++ b/src/osd.cpp
@@ -7,8 +7,6 @@
 #include <netinet/tcp.h>
 #include <arpa/inet.h>

-#include "blockstore_impl.h"
-#include "osd_primary.h"
 #include "osd.h"
 #include "http_client.h"

@@ -367,7 +365,6 @@ void osd_t::print_stats()

 void osd_t::print_slow()
 {
-    bool has_slow = false;
    char alloc[1024];
    timespec now;
    clock_gettime(CLOCK_REALTIME, &now);
@@ -429,32 +426,9 @@ void osd_t::print_slow()
                {
                    bufprintf(" inode=%lx offset=%lx len=%x", op->req.rw.inode, op->req.rw.offset, op->req.rw.len);
                }
-                if (op->req.hdr.opcode == OSD_OP_SEC_READ || op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
-                    op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE || op->req.hdr.opcode == OSD_OP_SEC_DELETE ||
-                    op->req.hdr.opcode == OSD_OP_SEC_SYNC || op->req.hdr.opcode == OSD_OP_SEC_LIST ||
-                    op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ||
-                    op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
-                {
-                    bufprintf(" state=%d", PRIV(op->bs_op)->op_state);
-                    int wait_for = PRIV(op->bs_op)->wait_for;
-                    if (wait_for)
-                    {
-                        bufprintf(" wait=%d (detail=%lu)", wait_for, PRIV(op->bs_op)->wait_detail);
-                    }
-                }
-                else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE ||
-                    op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE)
-                {
-                    bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st);
-                }
 #undef bufprintf
                printf("%s\n", alloc);
-                has_slow = true;
            }
        }
    }
-    if (has_slow)
-    {
-        bs->dump_diagnostics();
-    }
 }
--- a/src/osd_id.h
+++ b/src/osd_id.h
@@ -9,8 +9,6 @@
 #define POOL_ID_MAX 0x10000
 #define POOL_ID_BITS 16
 #define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
-#define INODE_NO_POOL(inode) (inode_t)(inode & ((1l << (64-POOL_ID_BITS)) - 1))
-#define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))

 // Pool ID is 16 bits long
 typedef uint32_t pool_id_t;
--- a/src/osd_primary.cpp
+++ b/src/osd_primary.cpp
@@ -198,7 +198,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
        {
            // Fast happy-path
            cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0);
-            submit_primary_subops(SUBMIT_RMW_READ, op_data->target_ver, pg.cur_set.data(), cur_op);
+            submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.cur_set.data(), cur_op);
            op_data->st = 1;
        }
        else
@@ -215,7 +215,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
            op_data->scheme = pg.scheme;
            op_data->degraded = 1;
            cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0);
-            submit_primary_subops(SUBMIT_RMW_READ, op_data->target_ver, cur_set, cur_op);
+            submit_primary_subops(SUBMIT_READ, op_data->target_ver, cur_set, cur_op);
            op_data->st = 1;
        }
    }
@@ -353,7 +353,6 @@ resume_3:
    if (cur_op->req.rw.version && op_data->fact_ver != (cur_op->req.rw.version-1))
    {
        cur_op->reply.hdr.retval = -EINTR;
-        cur_op->reply.rw.version = op_data->fact_ver;
        goto continue_others;
    }
    // Save version override for parallel reads
--- a/src/osd_primary_write.cpp
+++ b/src/osd_primary_write.cpp
@@ -100,7 +100,6 @@ resume_3:
    if (cur_op->req.rw.version && op_data->fact_ver != (cur_op->req.rw.version-1))
    {
        cur_op->reply.hdr.retval = -EINTR;
-        cur_op->reply.rw.version = op_data->fact_ver;
        goto continue_others;
    }
    if (op_data->scheme == POOL_SCHEME_REPLICATED)
@@ -261,7 +260,6 @@ resume_9:
        }
    }
    cur_op->reply.hdr.retval = cur_op->req.rw.len;
-    cur_op->reply.rw.version = op_data->fact_ver;
 continue_others:
    osd_op_t *next_op = NULL;
    auto next_it = pg.write_queue.find(op_data->oid);
--- a/src/qemu_driver.c
+++ b/src/qemu_driver.c
@@ -102,7 +102,7 @@ static void qemu_vitastor_unescape(char *src)
 }

 // vitastor[:key=value]*
-// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
+// vitastor[:(etcd|etcd_host|etcd_address)=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
 // vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
 static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
 {
@@ -199,8 +199,12 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
    int64_t ret = 0;
    qemu_mutex_init(&client->mutex);
    client->config_path = g_strdup(qdict_get_try_str(options, "config_path"));
-    // FIXME: Rename to etcd_address
-    client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
+    if (qdict_get_try_str(options, "etcd_address"))
+        client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_address"));
+    else if (qdict_get_try_str(options, "etcd_host"))
+        client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
+    else if (qdict_get_try_str(options, "etcd"))
+        client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd"));
    client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd_prefix"));
    client->use_rdma = qdict_get_try_int(options, "use_rdma", -1);
    client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma_device"));
@@ -267,6 +271,8 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
    qdict_del(options, "rdma_device");
    qdict_del(options, "config_path");
    qdict_del(options, "etcd_host");
+    qdict_del(options, "etcd_address");
+    qdict_del(options, "etcd");
    qdict_del(options, "etcd_prefix");
    qdict_del(options, "image");
    qdict_del(options, "inode");
@@ -512,6 +518,8 @@ static const char *vitastor_strong_runtime_opts[] = {
    "inode",
    "pool",
    "config_path",
+    "etcd",
+    "etcd_address",
    "etcd_host",
    "etcd_prefix",

--- a/src/rm_inode.cpp
+++ b/src/rm_inode.cpp
@@ -0,0 +1,410 @@
+// Copyright (c) Vitaliy Filippov, 2019+
+// License: VNPL-1.1 (see README.md for details)
+
+/**
+ * Inode removal tool
+ * May be included into a bigger "command-line management interface" in the future
+ */
+
+#include <vector>
+#include <algorithm>
+
+#include "epoll_manager.h"
+#include "cluster_client.h"
+#include "pg_states.h"
+
+#define RM_LISTING 1
+#define RM_REMOVING 2
+#define RM_END 3
+
+const char *exe_name = NULL;
+
+struct rm_pg_t;
+
+struct rm_pg_osd_t
+{
+    rm_pg_t *pg = NULL;
+    osd_num_t osd_num;
+    bool sent = false;
+};
+
+struct rm_pg_t
+{
+    pg_num_t pg_num;
+    osd_num_t rm_osd_num;
+    std::vector<rm_pg_osd_t> list_osds;
+    int state = 0;
+    int to_list;
+    std::set<object_id> objects;
+    std::set<object_id>::iterator obj_pos;
+    uint64_t obj_count = 0, obj_done = 0, obj_prev_done = 0;
+    int in_flight = 0;
+};
+
+class rm_inode_t
+{
+protected:
+    uint64_t inode = 0;
+    pool_id_t pool_id = 0;
+    uint64_t iodepth = 0, parallel_osds = 0;
+
+    ring_loop_t *ringloop = NULL;
+    epoll_manager_t *epmgr = NULL;
+    cluster_client_t *cli = NULL;
+    ring_consumer_t consumer;
+
+    std::vector<rm_pg_t*> lists;
+    uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
+    uint64_t pgs_to_list = 0;
+    bool started = false;
+    bool progress = true;
+    bool list_first = false;
+    int log_level = 0;
+
+public:
+    static json11::Json::object parse_args(int narg, const char *args[])
+    {
+        json11::Json::object cfg;
+        cfg["progress"] = "1";
+        for (int i = 1; i < narg; i++)
+        {
+            if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
+            {
+                help();
+            }
+            else if (args[i][0] == '-' && args[i][1] == '-')
+            {
+                const char *opt = args[i]+2;
+                cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "wait-list") || i == narg-1 ? "1" : args[++i];
+            }
+        }
+        return cfg;
+    }
+
+    static void help()
+    {
+        printf(
+            "Vitastor inode removal tool\n"
+            "(c) Vitaliy Filippov, 2020 (VNPL-1.1)\n\n"
+            "USAGE:\n"
+            "  %s [--etcd_address <etcd_address>] --pool <pool> --inode <inode> [--wait-list]\n",
+            exe_name
+        );
+        exit(0);
+    }
+
+    void run(json11::Json cfg)
+    {
+        inode = cfg["inode"].uint64_value();
+        pool_id = cfg["pool"].uint64_value();
+        if (pool_id)
+            inode = (inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)pool_id) << (64-POOL_ID_BITS));
+        pool_id = INODE_POOL(inode);
+        if (!pool_id)
+        {
+            fprintf(stderr, "pool is missing");
+            exit(1);
+        }
+        iodepth = cfg["iodepth"].uint64_value();
+        if (!iodepth)
+            iodepth = 32;
+        parallel_osds = cfg["parallel_osds"].uint64_value();
+        if (!parallel_osds)
+            parallel_osds = 4;
+        log_level = cfg["log_level"].int64_value();
+        progress = cfg["progress"].uint64_value() ? true : false;
+        list_first = cfg["wait-list"].uint64_value() ? true : false;
+        // Create client
+        ringloop = new ring_loop_t(512);
+        epmgr = new epoll_manager_t(ringloop);
+        cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
+        cli->on_ready([this]() { start_delete(); });
+        // Initialize job
+        consumer.loop = [this]()
+        {
+            if (started)
+                continue_delete();
+            ringloop->submit();
+        };
+        ringloop->register_consumer(&consumer);
+        // Loop until it completes
+        while (1)
+        {
+            ringloop->loop();
+            ringloop->wait();
+        }
+    }
+
+    void start_delete()
+    {
+        if (cli->st_cli.pool_config.find(pool_id) == cli->st_cli.pool_config.end())
+        {
+            fprintf(stderr, "Pool %u does not exist\n", pool_id);
+            exit(1);
+        }
+        auto pool_cfg = cli->st_cli.pool_config[pool_id];
+        for (auto & pg_item: pool_cfg.pg_config)
+        {
+            auto & pg = pg_item.second;
+            if (pg.pause || !pg.cur_primary || !(pg.cur_state & PG_ACTIVE))
+            {
+                fprintf(stderr, "PG %u is inactive, skipping\n", pg_item.first);
+                continue;
+            }
+            rm_pg_t *r = new rm_pg_t();
+            r->pg_num = pg_item.first;
+            r->rm_osd_num = pg.cur_primary;
+            r->state = RM_LISTING;
+            if (pg.cur_state != PG_ACTIVE)
+            {
+                std::set<osd_num_t> all_peers;
+                for (osd_num_t pg_osd: pg.target_set)
+                {
+                    if (pg_osd != 0)
+                    {
+                        all_peers.insert(pg_osd);
+                    }
+                }
+                for (osd_num_t pg_osd: pg.all_peers)
+                {
+                    if (pg_osd != 0)
+                    {
+                        all_peers.insert(pg_osd);
+                    }
+                }
+                for (auto & hist_item: pg.target_history)
+                {
+                    for (auto pg_osd: hist_item)
+                    {
+                        if (pg_osd != 0)
+                        {
+                            all_peers.insert(pg_osd);
+                        }
+                    }
+                }
+                for (osd_num_t peer_osd: all_peers)
+                {
+                    r->list_osds.push_back((rm_pg_osd_t){ .pg = r, .osd_num = peer_osd, .sent = false });
+                }
+            }
+            else
+            {
+                r->list_osds.push_back((rm_pg_osd_t){ .pg = r, .osd_num = pg.cur_primary, .sent = false });
+            }
+            r->to_list = r->list_osds.size();
+            lists.push_back(r);
+        }
+        std::sort(lists.begin(), lists.end(), [](rm_pg_t *a, rm_pg_t *b)
+        {
+            return a->rm_osd_num < b->rm_osd_num ? true : false;
+        });
+        pgs_to_list = lists.size();
+        started = true;
+        continue_delete();
+    }
+
+    void send_list(rm_pg_osd_t *cur_list)
+    {
+        if (cur_list->sent)
+        {
+            return;
+        }
+        if (cli->msgr.osd_peer_fds.find(cur_list->osd_num) ==
+            cli->msgr.osd_peer_fds.end())
+        {
+            // Initiate connection
+            cli->msgr.connect_peer(cur_list->osd_num, cli->st_cli.peer_states[cur_list->osd_num]);
+            return;
+        }
+        osd_op_t *op = new osd_op_t();
+        op->op_type = OSD_OP_OUT;
+        op->peer_fd = cli->msgr.osd_peer_fds[cur_list->osd_num];
+        op->req = (osd_any_op_t){
+            .sec_list = {
+                .header = {
+                    .magic = SECONDARY_OSD_OP_MAGIC,
+                    .id = cli->msgr.next_subop_id++,
+                    .opcode = OSD_OP_SEC_LIST,
+                },
+                .list_pg = cur_list->pg->pg_num,
+                .pg_count = (pg_num_t)cli->st_cli.pool_config[pool_id].real_pg_count,
+                .pg_stripe_size = cli->st_cli.pool_config[pool_id].pg_stripe_size,
+                .min_inode = inode,
+                .max_inode = inode,
+            },
+        };
+        op->callback = [this, cur_list](osd_op_t *op)
+        {
+            cur_list->pg->to_list--;
+            if (op->reply.hdr.retval < 0)
+            {
+                fprintf(stderr, "Failed to get PG %u/%u object list from OSD %lu (retval=%ld), skipping\n",
+                    pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval);
+            }
+            else
+            {
+                if (op->reply.sec_list.stable_count < op->reply.hdr.retval)
+                {
+                    // Unstable objects, if present, mean that someone still writes into the inode. Warn the user about it.
+                    printf(
+                        "[PG %u/%u] Inode still has %lu unstable object versions - is it still open? Not a good idea to delete it.\n",
+                        pool_id, cur_list->pg->pg_num, op->reply.hdr.retval - op->reply.sec_list.stable_count
+                    );
+                }
+                if (log_level > 0)
+                {
+                    printf(
+                        "[PG %u/%u] Got inode object list from OSD %lu: %ld object versions\n",
+                        pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval
+                    );
+                }
+                for (uint64_t i = 0; i < op->reply.hdr.retval; i++)
+                {
+                    object_id oid = ((obj_ver_id*)op->buf)[i].oid;
+                    oid.stripe = oid.stripe & ~STRIPE_MASK;
+                    cur_list->pg->objects.insert(oid);
+                }
+            }
+            delete op;
+            if (cur_list->pg->to_list <= 0)
+            {
+                cur_list->pg->obj_done = cur_list->pg->obj_prev_done = 0;
+                cur_list->pg->obj_pos = cur_list->pg->objects.begin();
+                cur_list->pg->obj_count = cur_list->pg->objects.size();
+                total_count += cur_list->pg->obj_count;
+                total_prev_pct = 0;
+                cur_list->pg->state = RM_REMOVING;
+                pgs_to_list--;
+            }
+            continue_delete();
+        };
+        cli->msgr.outbox_push(op);
+        cur_list->sent = true;
+    }
+
+    void send_ops(rm_pg_t *cur_list)
+    {
+        if (cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
+            cli->msgr.osd_peer_fds.end())
+        {
+            // Initiate connection
+            cli->msgr.connect_peer(cur_list->rm_osd_num, cli->st_cli.peer_states[cur_list->rm_osd_num]);
+            return;
+        }
+        while (cur_list->in_flight < iodepth && cur_list->obj_pos != cur_list->objects.end())
+        {
+            osd_op_t *op = new osd_op_t();
+            op->op_type = OSD_OP_OUT;
+            op->peer_fd = cli->msgr.osd_peer_fds[cur_list->rm_osd_num];
+            op->req = (osd_any_op_t){
+                .rw = {
+                    .header = {
+                        .magic = SECONDARY_OSD_OP_MAGIC,
+                        .id = cli->msgr.next_subop_id++,
+                        .opcode = OSD_OP_DELETE,
+                    },
+                    .inode = cur_list->obj_pos->inode,
+                    .offset = (cur_list->obj_pos->stripe & ~STRIPE_MASK),
+                    .len = 0,
+                },
+            };
+            op->callback = [this, cur_list](osd_op_t *op)
+            {
+                cur_list->in_flight--;
+                if (op->reply.hdr.retval < 0)
+                {
+                    fprintf(stderr, "Failed to remove object from PG %u (OSD %lu) (retval=%ld)\n",
+                        cur_list->pg_num, cur_list->rm_osd_num, op->reply.hdr.retval);
+                }
+                delete op;
+                cur_list->obj_done++;
+                total_done++;
+                continue_delete();
+            };
+            cli->msgr.outbox_push(op);
+            cur_list->obj_pos++;
+            cur_list->in_flight++;
+        }
+        if (!cur_list->in_flight && cur_list->obj_pos == cur_list->objects.end())
+        {
+            cur_list->obj_count = 0;
+            cur_list->obj_done = cur_list->obj_prev_done = 0;
+            cur_list->state = RM_END;
+        }
+    }
+
+    void continue_delete()
+    {
+        int par_osd = 0;
+        osd_num_t max_seen_osd = 0;
+        bool no_del = false;
+        if (list_first)
+        {
+            int i, n = 0;
+            for (i = 0; i < lists.size(); i++)
+            {
+                if (lists[i]->state == RM_LISTING)
+                {
+                    n++;
+                }
+            }
+            if (n > 0)
+            {
+                no_del = true;
+            }
+        }
+        for (int i = 0; i < lists.size(); i++)
+        {
+            if (lists[i]->state == RM_END)
+            {
+                delete lists[i];
+                lists.erase(lists.begin()+i, lists.begin()+i+1);
+                i--;
+            }
+            else if (lists[i]->rm_osd_num > max_seen_osd)
+            {
+                if (lists[i]->state == RM_LISTING)
+                {
+                    for (int j = 0; j < lists[i]->list_osds.size(); j++)
+                    {
+                        send_list(&lists[i]->list_osds[j]);
+                    }
+                }
+                else if (lists[i]->state == RM_REMOVING)
+                {
+                    if (no_del)
+                    {
+                        continue;
+                    }
+                    send_ops(lists[i]);
+                }
+                par_osd++;
+                max_seen_osd = lists[i]->rm_osd_num;
+                if (par_osd >= parallel_osds)
+                {
+                    break;
+                }
+            }
+        }
+        if (progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
+        {
+            printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
+            total_prev_pct = total_done*1000/total_count;
+        }
+        if (!lists.size())
+        {
+            printf("Done, inode %lu in pool %u removed\n", (inode & ((1l << (64-POOL_ID_BITS)) - 1)), pool_id);
+            exit(0);
+        }
+    }
+};
+
+int main(int narg, const char *args[])
+{
+    setvbuf(stdout, NULL, _IONBF, 0);
+    setvbuf(stderr, NULL, _IONBF, 0);
+    exe_name = args[0];
+    rm_inode_t *p = new rm_inode_t();
+    p->run(rm_inode_t::parse_args(narg, args));
+    return 0;
+}
--- a/src/vitastor_c.cpp
+++ b/src/vitastor_c.cpp
@@ -72,7 +72,7 @@ static void vitastor_c_write_handler(void *opaque)

 vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
    const char *config_path, const char *etcd_host, const char *etcd_prefix,
-    int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
+    bool use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
 {
    json11::Json cfg_json = vitastor_c_common_config(
        config_path, etcd_host, etcd_prefix, use_rdma,
--- a/src/vitastor_c.h
+++ b/src/vitastor_c.h
@@ -28,7 +28,7 @@ typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_

 vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
    const char *config_path, const char *etcd_host, const char *etcd_prefix,
-    int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
+    bool use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
 vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
    int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
 vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
--- a/tests/run_3osds.sh
+++ b/tests/run_3osds.sh
@@ -3,7 +3,6 @@
 . `dirname $0`/common.sh

 OSD_SIZE=${OSD_SIZE:-1024}
-PG_COUNT=${PG_COUNT:-1}

 dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
 dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
@@ -26,16 +25,16 @@ if [ -n "$GLOBAL_CONF" ]; then
    $ETCDCTL put /vitastor/config/global "$GLOBAL_CONF"
 fi

-$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1,"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
+$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1,"pg_count":1,"failure_domain":"osd"}}'

 sleep 2

-if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] | select((.osd_set | sort) == ["1","2","3"]) ] | length) == '$PG_COUNT); then
-    format_error "FAILED: $PG_COUNT PG(s) NOT CONFIGURED"
+if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and (.[0].items["1"]["1"].osd_set | sort) == ["1","2","3"]'); then
+    format_error "FAILED: 1 PG NOT CONFIGURED"
 fi

-if ! ($ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT); then
-    format_error "FAILED: $PG_COUNT PG(s) NOT UP"
+if ! ($ETCDCTL get /vitastor/pg/state/1/1 --print-value-only | jq -s -e '(. | length) != 0 and .[0].state == ["active"]'); then
+    format_error "FAILED: 1 PG NOT UP"
 fi

 if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then
--- a/tests/test_rm.sh
+++ b/tests/test_rm.sh
@@ -1,14 +0,0 @@
-#!/bin/bash -ex
-
-PG_COUNT=16
-. `dirname $0`/run_3osds.sh
-
-LD_PRELOAD=libasan.so.5 \
-    fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 \
-        -end_fsync=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10
-
-$ETCDCTL get --prefix '/vitastor/pg/state'
-
-build/src/vitastor-cli rm-data --etcd_address $ETCD_URL --pool 1 --inode 1
-
-format_green OK
--- a/tests/test_snapshot.sh
+++ b/tests/test_snapshot.sh
@@ -6,19 +6,18 @@

 $ETCDCTL put /vitastor/config/inode/1/2 '{"name":"testimg","size":'$((32*1024*1024))'}'

-LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
+LD_PRELOAD=libasan.so.5 \
    fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
        -etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10

 $ETCDCTL put /vitastor/config/inode/1/2 '{"name":"testimg@0","size":'$((32*1024*1024))'}'
 $ETCDCTL put /vitastor/config/inode/1/3 '{"parent_id":2,"name":"testimg","size":'$((32*1024*1024))'}'

-# Preload build/src/libfio_vitastor.so so libasan detects all symbols
-LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
+LD_PRELOAD=libasan.so.5 \
    fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \
        -rw=randwrite -etcd=$ETCD_URL -image=testimg -number_ios=1024

-LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
+LD_PRELOAD=libasan.so.5 \
    fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -rw=read -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M

 qemu-img convert -S 4096 -p \
@@ -39,16 +38,4 @@ node mon/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.b

 cmp ./testdata/merged.bin ./testdata/check.bin

-# Test merge
-
-$ETCDCTL put /vitastor/config/inode/1/3 '{"parent_id":2,"name":"testimg","size":'$((32*1024*1024))'}'
-
-build/src/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
-
-qemu-img convert -S 4096 -p \
-    -f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-    -O raw ./testdata/merged-by-tool.bin
-
-cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
-
 format_green OK
				`@@ -1 +0,0 @@`
				`deb http://vitastor.io/debian bullseye main`