Merge branch 'rel-1.4'

Release 1.4.8
- Do not use \r if output is not a terminal (should fix unexpected job output in proxmox) - Fix rm/rm-data error return code, add --down-ok option to bypass the error - Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval - Add ubuntu jammy build - Wait for blockstore initialisation before starting OSD (prevent timeouts when init takes time) - Fix a rare use-after-free in automatic sync after delete in blockstore
2024-02-29 09:59:01 +03:00 · 2024-02-29 09:58:34 +03:00 · 2024-02-29 00:21:17 +03:00 · 2024-02-29 00:20:10 +03:00 · 2024-02-28 13:10:02 +03:00
35 changed files with 217 additions and 125 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)

 project(vitastor)

-set(VERSION "1.4.7")
+set(VERSION "1.4.8")

 add_subdirectory(src)
--- a/csi/Makefile
+++ b/csi/Makefile
@ -1,4 +1,4 @@
-VERSION ?= v1.4.7
+VERSION ?= v1.4.8

 all: build push

--- a/csi/deploy/004-csi-nodeplugin.yaml
+++ b/csi/deploy/004-csi-nodeplugin.yaml
@ -49,7 +49,7 @@ spec:
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
-          image: vitalif/vitastor-csi:v1.4.7
+          image: vitalif/vitastor-csi:v1.4.8
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/deploy/007-csi-provisioner.yaml
+++ b/csi/deploy/007-csi-provisioner.yaml
@ -121,7 +121,7 @@ spec:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
-          image: vitalif/vitastor-csi:v1.4.7
+          image: vitalif/vitastor-csi:v1.4.8
          args:
            - "--node=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
--- a/csi/src/config.go
+++ b/csi/src/config.go
@ -5,7 +5,7 @@ package vitastor

 const (
    vitastorCSIDriverName    = "csi.vitastor.io"
-    vitastorCSIDriverVersion = "1.4.7"
+    vitastorCSIDriverVersion = "1.4.8"
 )

 // Config struct fills the parameters of request or user input
--- a/debian/changelog
+++ b/debian/changelog
@ -1,4 +1,4 @@
-vitastor (1.4.7-1) unstable; urgency=medium
+vitastor (1.4.8-1) unstable; urgency=medium

  * Bugfixes

--- a/debian/vitastor.Dockerfile
+++ b/debian/vitastor.Dockerfile
@ -37,8 +37,8 @@ RUN set -e -x; \
    mkdir -p /root/packages/vitastor-$REL; \
    rm -rf /root/packages/vitastor-$REL/*; \
    cd /root/packages/vitastor-$REL; \
-    cp -r /root/vitastor vitastor-1.4.7; \
-    cd vitastor-1.4.7; \
+    cp -r /root/vitastor vitastor-1.4.8; \
+    cd vitastor-1.4.8; \
    ln -s /root/fio-build/fio-*/ ./fio; \
    FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -51,8 +51,8 @@ RUN set -e -x; \
    rm -rf a b; \
    echo "dep:fio=$FIO" > debian/fio_version; \
    cd /root/packages/vitastor-$REL; \
-    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.7.orig.tar.xz vitastor-1.4.7; \
-    cd vitastor-1.4.7; \
+    tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.8.orig.tar.xz vitastor-1.4.8; \
+    cd vitastor-1.4.8; \
    V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
    DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
    DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
--- a/docs/config/client.en.md
+++ b/docs/config/client.en.md
@ -9,6 +9,8 @@
 These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
 affect their interaction with the cluster.

+- [client_retry_interval](#client_retry_interval)
+- [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@ -19,6 +21,26 @@ affect their interaction with the cluster.
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)

+## client_retry_interval
+
+- Type: milliseconds
+- Default: 50
+- Minimum: 10
+- Can be changed online: yes
+
+Retry time for I/O requests failed due to inactive PGs or network
+connectivity errors.
+
+## client_eio_retry_interval
+
+- Type: milliseconds
+- Default: 1000
+- Can be changed online: yes
+
+Retry time for I/O requests failed due to data corruption or unfinished
+EC object deletions (has_incomplete PG state). 0 disables such retries
+and clients are not blocked and just get EIO error code instead.
+
 ## client_max_dirty_bytes

 - Type: integer
--- a/docs/config/client.ru.md
+++ b/docs/config/client.ru.md
@ -9,6 +9,8 @@
 Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
 затрагивают логику их работы с кластером.

+- [client_retry_interval](#client_retry_interval)
+- [client_eio_retry_interval](#client_eio_retry_interval)
 - [client_max_dirty_bytes](#client_max_dirty_bytes)
 - [client_max_dirty_ops](#client_max_dirty_ops)
 - [client_enable_writeback](#client_enable_writeback)
@ -19,6 +21,27 @@
 - [nbd_max_devices](#nbd_max_devices)
 - [nbd_max_part](#nbd_max_part)

+## client_retry_interval
+
+- Тип: миллисекунды
+- Значение по умолчанию: 50
+- Минимальное значение: 10
+- Можно менять на лету: да
+
+Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
+ошибок сети.
+
+## client_eio_retry_interval
+
+- Тип: миллисекунды
+- Значение по умолчанию: 1000
+- Можно менять на лету: да
+
+Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
+или незавершённых удалений EC-объектов (состояния PG has_incomplete).
+0 отключает повторы таких запросов и клиенты не блокируются, а вместо
+этого просто получают код ошибки EIO.
+
 ## client_max_dirty_bytes

 - Тип: целое число
--- a/docs/config/network.en.md
+++ b/docs/config/network.en.md
@ -25,7 +25,6 @@ between clients, OSDs and etcd.
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
@ -212,17 +211,6 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
 within this time, the connection to it is dropped and a reconnection attempt
 is scheduled.

-## up_wait_retry_interval
-
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
-
-OSDs respond to clients with a special error code when they receive I/O
-requests for a PG that's not synchronized and started. This parameter sets
-the time for the clients to wait before re-attempting such I/O requests.
-
 ## max_etcd_attempts

 - Type: integer
--- a/docs/config/network.ru.md
+++ b/docs/config/network.ru.md
@ -25,7 +25,6 @@
 - [peer_connect_timeout](#peer_connect_timeout)
 - [osd_idle_timeout](#osd_idle_timeout)
 - [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
 - [max_etcd_attempts](#max_etcd_attempts)
 - [etcd_quick_timeout](#etcd_quick_timeout)
 - [etcd_slow_timeout](#etcd_slow_timeout)
@ -221,19 +220,6 @@ OSD в любом случае согласовывают реальное зн
 Если OSD не отвечает за это время, соединение отключается и производится
 повторная попытка соединения.

-## up_wait_retry_interval
-
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
-
-Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
-поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
-они отвечают клиентам специальным кодом ошибки, означающим, что клиент
-должен некоторое время подождать перед повторением запроса. Именно это время
-ожидания задаёт данный параметр.
-
 ## max_etcd_attempts

 - Тип: целое число
--- a/docs/config/src/client.yml
+++ b/docs/config/src/client.yml
@ -1,3 +1,27 @@
+- name: client_retry_interval
+  type: ms
+  min: 10
+  default: 50
+  online: true
+  info: |
+    Retry time for I/O requests failed due to inactive PGs or network
+    connectivity errors.
+  info_ru: |
+    Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
+    ошибок сети.
+- name: client_eio_retry_interval
+  type: ms
+  default: 1000
+  online: true
+  info: |
+    Retry time for I/O requests failed due to data corruption or unfinished
+    EC object deletions (has_incomplete PG state). 0 disables such retries
+    and clients are not blocked and just get EIO error code instead.
+  info_ru: |
+    Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
+    или незавершённых удалений EC-объектов (состояния PG has_incomplete).
+    0 отключает повторы таких запросов и клиенты не блокируются, а вместо
+    этого просто получают код ошибки EIO.
 - name: client_max_dirty_bytes
  type: int
  default: 33554432
--- a/docs/config/src/network.yml
+++ b/docs/config/src/network.yml
@ -243,21 +243,6 @@
    Максимальное время ожидания ответа на запрос проверки состояния соединения.
    Если OSD не отвечает за это время, соединение отключается и производится
    повторная попытка соединения.
- name: up_wait_retry_interval
-  type: ms
-  min: 10
-  default: 50
-  online: true
-  info: |
-    OSDs respond to clients with a special error code when they receive I/O
-    requests for a PG that's not synchronized and started. This parameter sets
-    the time for the clients to wait before re-attempting such I/O requests.
-  info_ru: |
-    Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
-    поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
-    они отвечают клиентам специальным кодом ошибки, означающим, что клиент
-    должен некоторое время подождать перед повторением запроса. Именно это время
-    ожидания задаёт данный параметр.
 - name: max_etcd_attempts
  type: int
  default: 5
--- a/docs/usage/cli.en.md
+++ b/docs/usage/cli.en.md
@ -135,19 +135,18 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots).

 ## modify

-`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
+`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`

 Rename, resize image or change its readonly status. Images with children can't be made read-write.
 If the new size is smaller than the old size, extra data will be purged.
 You should resize file system in the image, if present, before shrinking it.

-```
-f|--force  Proceed with shrinking or setting readwrite flag even if the image has children.
-```
+| `-f|--force` | Proceed with shrinking or setting readwrite flag even if the image has children. |
+| `--down-ok`  | Proceed with shrinking even if some data will be left on unavailable OSDs.       |

 ## rm

-`vitastor-cli rm <from> [<to>] [--writers-stopped]`
+`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`

 Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`),
 rebasing all their children accordingly. --writers-stopped allows merging to be a bit
@ -155,6 +154,10 @@ more effective in case of a single 'slim' read-write child and 'fat' removed par
 the child is merged into parent and parent is renamed to child in that case.
 In other cases parent layers are always merged into children.

+Other options:
+
+| `--down-ok` | Continue deletion/merging even if some data will be left on unavailable OSDs. |
+
 ## flatten

 `vitastor-cli flatten <layer>`
--- a/docs/usage/cli.ru.md
+++ b/docs/usage/cli.ru.md
@ -136,7 +136,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>

 ## modify

-`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
+`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`

 Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения"
 и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя.
@ -144,13 +144,12 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
 Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
 образа сначала уменьшите файловую систему в нём.

-```
-f|--force  Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
-```
+| -f|--force | Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.        |
+| --down-ok  | Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD. |

 ## rm

-`vitastor-cli rm <from> [<to>] [--writers-stopped]`
+`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`

 Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним
 образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть).
@ -162,6 +161,10 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>

 В других случаях родительские слои вливаются в дочерние.

+Другие опции:
+
+| `--down-ok` | Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD. |
+
 ## flatten

 `vitastor-cli flatten <layer>`
--- a/mon/mon.js
+++ b/mon/mon.js
@ -86,13 +86,14 @@ const etcd_tree = {
            client_max_buffered_bytes: 33554432,
            client_max_buffered_ops: 1024,
            client_max_writeback_iodepth: 256,
+            client_retry_interval: 50, // ms. min: 10
+            client_eio_retry_interval: 1000, // ms
            // client and osd - configurable online
            log_level: 0,
            peer_connect_interval: 5, // seconds. min: 1
            peer_connect_timeout: 5, // seconds. min: 1
            osd_idle_timeout: 5, // seconds. min: 1
            osd_ping_timeout: 5, // seconds. min: 1
-            up_wait_retry_interval: 50, // ms. min: 10
            max_etcd_attempts: 5,
            etcd_quick_timeout: 1000, // ms
            etcd_slow_timeout: 5000, // ms
--- a/mon/package.json
+++ b/mon/package.json
@ -1,6 +1,6 @@
 {
  "name": "vitastor-mon",
-  "version": "1.4.7",
+  "version": "1.4.8",
  "description": "Vitastor SDS monitor service",
  "main": "mon-main.js",
  "scripts": {
--- a/patches/cinder-vitastor.py
+++ b/patches/cinder-vitastor.py
@ -50,7 +50,7 @@ from cinder.volume import configuration
 from cinder.volume import driver
 from cinder.volume import volume_utils

-VERSION = '1.4.7'
+VERSION = '1.4.8'

 LOG = logging.getLogger(__name__)

--- a/rpm/build-tarball.sh
+++ b/rpm/build-tarball.sh
@ -24,4 +24,4 @@ rm fio
 mv fio-copy fio
 FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
 perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
-tar --transform 's#^#vitastor-1.4.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.7$(rpm --eval '%dist').tar.gz *
+tar --transform 's#^#vitastor-1.4.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.8$(rpm --eval '%dist').tar.gz *
--- a/rpm/vitastor-el7.Dockerfile
+++ b/rpm/vitastor-el7.Dockerfile
@ -36,7 +36,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.4.7.el7.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.8.el7.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el7.spec
+++ b/rpm/vitastor-el7.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.4.7
+Version:        1.4.8
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.4.7.el7.tar.gz
+Source0:        vitastor-1.4.8.el7.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el8.Dockerfile
+++ b/rpm/vitastor-el8.Dockerfile
@ -35,7 +35,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.4.7.el8.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.8.el8.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el8.spec
+++ b/rpm/vitastor-el8.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.4.7
+Version:        1.4.8
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.4.7.el8.tar.gz
+Source0:        vitastor-1.4.8.el8.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/rpm/vitastor-el9.Dockerfile
+++ b/rpm/vitastor-el9.Dockerfile
@ -18,7 +18,7 @@ ADD . /root/vitastor
 RUN set -e; \
    cd /root/vitastor/rpm; \
    sh build-tarball.sh; \
-    cp /root/vitastor-1.4.7.el9.tar.gz ~/rpmbuild/SOURCES; \
+    cp /root/vitastor-1.4.8.el9.tar.gz ~/rpmbuild/SOURCES; \
    cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
    cd ~/rpmbuild/SPECS/; \
    rpmbuild -ba vitastor.spec; \
--- a/rpm/vitastor-el9.spec
+++ b/rpm/vitastor-el9.spec
@ -1,11 +1,11 @@
 Name:           vitastor
-Version:        1.4.7
+Version:        1.4.8
 Release:        1%{?dist}
 Summary:        Vitastor, a fast software-defined clustered block storage

 License:        Vitastor Network Public License 1.1
 URL:            https://vitastor.io/
-Source0:        vitastor-1.4.7.el9.tar.gz
+Source0:        vitastor-1.4.8.el9.tar.gz

 BuildRequires:  liburing-devel >= 0.6
 BuildRequires:  gperftools-devel
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
 	set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
 endif()

-add_definitions(-DVERSION="1.4.7")
+add_definitions(-DVERSION="1.4.8")
 add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
 add_link_options(-fno-omit-frame-pointer)
 if (${WITH_ASAN})
--- a/src/cli.cpp
+++ b/src/cli.cpp
@ -46,18 +46,21 @@ static const char* help_text =
    "vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>\n"
    "  Create a snapshot of image <name>. May be used live if only a single writer is active.\n"
    "\n"
-    "vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]\n"
+    "vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]\n"
    "  Rename, resize image or change its readonly status. Images with children can't be made read-write.\n"
    "  If the new size is smaller than the old size, extra data will be purged.\n"
    "  You should resize file system in the image, if present, before shrinking it.\n"
    "  -f|--force  Proceed with shrinking or setting readwrite flag even if the image has children.\n"
+    "  --down-ok   Proceed with shrinking even if some data will be left on unavailable OSDs.\n"
    "\n"
-    "vitastor-cli rm <from> [<to>] [--writers-stopped]\n"
+    "vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]\n"
    "  Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n"
    "  rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n"
    "  more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n"
    "  the child is merged into parent and parent is renamed to child in that case.\n"
    "  In other cases parent layers are always merged into children.\n"
+    "  Other options:\n"
+    "  --down-ok  Continue deletion/merging even if some data will be left on unavailable OSDs.\n"
    "\n"
    "vitastor-cli flatten <layer>\n"
    "  Flatten a layer, i.e. merge data and detach it from parents.\n"
@ -170,7 +173,7 @@ static const char* help_text =
    "  --parallel_osds M   Work with M osds in parallel when possible (default 4)\n"
    "  --progress 1|0      Report progress (default 1)\n"
    "  --cas 1|0           Use CAS writes for flatten, merge, rm (default is decide automatically)\n"
-    "  --no-color          Disable colored output\n"
+    "  --color 1|0         Enable/disable colored output and CR symbols (default 1 if stdout is a terminal)\n"
    "  --json              JSON output\n"
 ;

@ -221,6 +224,7 @@ static json11::Json::object parse_args(int narg, const char *args[])
                !strcmp(opt, "readonly") || !strcmp(opt, "readwrite") ||
                !strcmp(opt, "force") || !strcmp(opt, "reverse") ||
                !strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") ||
+                !strcmp(opt, "down-ok") || !strcmp(opt, "down_ok") ||
                !strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") ||
                !strcmp(opt, "help") || !strcmp(opt, "all") ||
                !strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped"))
--- a/src/cli_common.cpp
+++ b/src/cli_common.cpp
@ -1,6 +1,7 @@
 // Copyright (c) Vitaliy Filippov, 2019+
 // License: VNPL-1.1 (see README.md for details)

+#include <unistd.h>
 #include "str_util.h"
 #include "cluster_client.h"
 #include "cli.h"
@ -113,7 +114,12 @@ void cli_tool_t::parse_config(json11::Json::object & cfg)
        else
            kv_it++;
    }
+    if (cfg.find("no_color") != cfg.end())
        color = !cfg["no_color"].bool_value();
+    else if (cfg.find("color") != cfg.end())
+        color = cfg["color"].bool_value();
+    else
+        color = isatty(1);
    json_output = cfg["json"].bool_value();
    iodepth = cfg["iodepth"].uint64_value();
    if (!iodepth)
--- a/src/cli_merge.cpp
+++ b/src/cli_merge.cpp
@ -275,7 +275,9 @@ struct snap_merger_t
                processed++;
                if (parent->progress && !(processed % 128))
                {
-                    printf("\rFiltering target blocks: %ju/%ju", processed, to_process);
+                    fprintf(stderr, parent->color
+                        ? "\rFiltering target blocks: %ju/%ju"
+                        : "Filtering target blocks: %ju/%ju\n", processed, to_process);
                }
            }
            if (in_flight > 0 || oit != merge_offsets.end())
@ -285,7 +287,9 @@ struct snap_merger_t
            }
            if (parent->progress)
            {
-                printf("\r%ju full blocks of target filtered out\n", to_process-merge_offsets.size());
+                fprintf(stderr, parent->color
+                    ? "\r%ju full blocks of target filtered out\n"
+                    : "%ju full blocks of target filtered out\n", to_process-merge_offsets.size());
            }
        }
        state = 3;
@ -320,7 +324,9 @@ struct snap_merger_t
            processed++;
            if (parent->progress && !(processed % 128))
            {
-                printf("\rOverwriting blocks: %ju/%ju", processed, to_process);
+                fprintf(stderr, parent->color
+                    ? "\rOverwriting blocks: %ju/%ju"
+                    : "Overwriting blocks: %ju/%ju\n", processed, to_process);
            }
        }
        if (in_flight == 0 && rwo_error.size())
@ -339,7 +345,9 @@ struct snap_merger_t
        }
        if (parent->progress)
        {
-            printf("\rOverwriting blocks: %ju/%ju\n", to_process, to_process);
+            fprintf(stderr, parent->color
+                ? "\rOverwriting blocks: %ju/%ju\n"
+                : "Overwriting blocks: %ju/%ju\n", to_process, to_process);
        }
        // Done
        result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name, .data = json11::Json::object {
--- a/src/cli_modify.cpp
+++ b/src/cli_modify.cpp
@ -15,6 +15,7 @@ struct image_changer_t
    uint64_t new_size = 0;
    bool force_size = false, inc_size = false;
    bool set_readonly = false, set_readwrite = false, force = false;
+    bool down_ok = false;
    // interval between fsyncs
    int fsync_interval = 128;

@ -108,6 +109,7 @@ struct image_changer_t
                    { "pool", (uint64_t)INODE_POOL(inode_num) },
                    { "fsync-interval", fsync_interval },
                    { "min-offset", ((new_size+4095)/4096)*4096 },
+                    { "down-ok", down_ok },
                });
 resume_1:
                while (!cb(result))
@ -252,6 +254,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_modify(json11::Json cfg)
    changer->fsync_interval = cfg["fsync_interval"].uint64_value();
    if (!changer->fsync_interval)
        changer->fsync_interval = 128;
+    changer->down_ok = cfg["down_ok"].bool_value();
    // FIXME Check that the image doesn't have children when shrinking
    return [changer](cli_result_t & result)
    {
--- a/src/cli_rm.cpp
+++ b/src/cli_rm.cpp
@ -53,6 +53,8 @@ struct snap_remover_t
    int use_cas = 1;
    // interval between fsyncs
    int fsync_interval = 128;
+    // ignore deletion errors
+    bool down_ok = false;

    std::map<inode_t,int> sources;
    std::map<inode_t,uint64_t> inode_used;
@ -680,6 +682,7 @@ resume_100:
            { "inode", inode },
            { "pool", (uint64_t)INODE_POOL(inode) },
            { "fsync-interval", fsync_interval },
+            { "down-ok", down_ok },
        });
    }
 };
@ -691,6 +694,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm(json11::Json cfg)
    snap_remover->from_name = cfg["from"].string_value();
    snap_remover->to_name = cfg["to"].string_value();
    snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value();
+    snap_remover->down_ok = cfg["down_ok"].bool_value();
    if (!snap_remover->fsync_interval)
        snap_remover->fsync_interval = 128;
    if (!cfg["cas"].is_null())
--- a/src/cli_rm_data.cpp
+++ b/src/cli_rm_data.cpp
@ -25,6 +25,7 @@ struct rm_inode_t
    uint64_t inode = 0;
    pool_id_t pool_id = 0;
    uint64_t min_offset = 0;
+    bool down_ok = false;

    cli_tool_t *parent = NULL;
    inode_list_t *lister = NULL;
@ -212,7 +213,9 @@ struct rm_inode_t
        }
        if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
        {
-            fprintf(stderr, "\rRemoved %ju/%ju objects, %ju more PGs to list...", total_done, total_count, pgs_to_list);
+            fprintf(stderr, parent->color
+                ? "\rRemoved %ju/%ju objects, %ju more PGs to list..."
+                : "Removed %ju/%ju objects, %ju more PGs to list...\n", total_done, total_count, pgs_to_list);
            total_prev_pct = total_done*1000/total_count;
        }
        if (lists_done && !lists.size())
@ -221,7 +224,8 @@ struct rm_inode_t
            {
                fprintf(stderr, "\n");
            }
-            if (parent->progress && (total_done < total_count || inactive_osds.size() > 0 || error_count > 0))
+            bool is_error = (total_done < total_count || inactive_osds.size() > 0 || error_count > 0);
+            if (parent->progress && is_error)
            {
                fprintf(
                    stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n"
@ -230,8 +234,8 @@ struct rm_inode_t
                );
            }
            result = (cli_result_t){
-                .err = error_count > 0 ? EIO : 0,
-                .text = error_count > 0 ? "Some blocks were not removed" : (
+                .err = is_error && !down_ok ? EIO : 0,
+                .text = is_error ? "Some blocks were not removed" : (
                    "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
                    std::to_string(pool_id)+" removed"),
                .data = json11::Json::object {
@ -280,6 +284,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm_data(json11::Json cfg)
    {
        remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
    }
+    remover->down_ok = cfg["down_ok"].bool_value();
    remover->pool_id = INODE_POOL(remover->inode);
    remover->min_offset = cfg["min_offset"].uint64_value();
    return [remover](cli_result_t & result)
--- a/src/cluster_client.cpp
+++ b/src/cluster_client.cpp
@ -265,7 +265,7 @@ void cluster_client_t::erase_op(cluster_op_t *op)
    }
 }

-void cluster_client_t::continue_ops(bool up_retry)
+void cluster_client_t::continue_ops(int time_passed)
 {
    if (!pgs_loaded)
    {
@ -277,22 +277,27 @@ void cluster_client_t::continue_ops(bool up_retry)
        // Attempt to reenter the function
        return;
    }
+    int reset_duration = 0;
 restart:
    continuing_ops = 1;
    for (auto op = op_queue_head; op; )
    {
        cluster_op_t *next_op = op->next;
-        if (!op->up_wait || up_retry)
+        if (op->retry_after && time_passed)
        {
-            op->up_wait = false;
-            if (!op->prev_wait)
+            op->retry_after = op->retry_after > time_passed ? op->retry_after-time_passed : 0;
+            if (op->retry_after && (!reset_duration || op->retry_after < reset_duration))
+            {
+                reset_duration = op->retry_after;
+            }
+        }
+        if (!op->retry_after && !op->prev_wait)
        {
            if (op->opcode == OSD_OP_SYNC)
                continue_sync(op);
            else
                continue_rw(op);
        }
-        }
        op = next_op;
        if (continuing_ops == 2)
        {
@ -300,6 +305,27 @@ restart:
        }
    }
    continuing_ops = 0;
+    reset_retry_timer(reset_duration);
+}
+
+void cluster_client_t::reset_retry_timer(int new_duration)
+{
+    if (retry_timeout_duration && retry_timeout_duration <= new_duration || !new_duration)
+    {
+        return;
+    }
+    if (retry_timeout_id)
+    {
+        tfd->clear_timer(retry_timeout_id);
+    }
+    retry_timeout_duration = new_duration;
+    retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int)
+    {
+        int time_passed = retry_timeout_duration;
+        retry_timeout_id = 0;
+        retry_timeout_duration = 0;
+        continue_ops(time_passed);
+    });
 }

 void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config)
@ -349,15 +375,25 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
    {
        client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH;
    }
-    // up_wait_retry_interval
-    up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value();
-    if (!up_wait_retry_interval)
+    // client_retry_interval
+    client_retry_interval = config["client_retry_interval"].uint64_value();
+    if (!client_retry_interval)
    {
-        up_wait_retry_interval = 50;
+        client_retry_interval = 50;
    }
-    else if (up_wait_retry_interval < 10)
+    else if (client_retry_interval < 10)
    {
-        up_wait_retry_interval = 10;
+        client_retry_interval = 10;
+    }
+    // client_eio_retry_interval
+    client_eio_retry_interval = 1000;
+    if (!config["client_eio_retry_interval"].is_null())
+    {
+        client_eio_retry_interval = config["client_eio_retry_interval"].uint64_value();
+        if (client_eio_retry_interval && client_eio_retry_interval < 10)
+        {
+            client_eio_retry_interval = 10;
+        }
    }
    // log_level
    log_level = config["log_level"].uint64_value();
@ -716,15 +752,8 @@ resume_1:
                // We'll need to retry again
                if (op->parts[i].flags & PART_RETRY)
                {
-                    op->up_wait = true;
-                    if (!retry_timeout_id)
-                    {
-                        retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
-                        {
-                            retry_timeout_id = 0;
-                            continue_ops(true);
-                        });
-                    }
+                    op->retry_after = client_retry_interval;
+                    reset_retry_timer(client_retry_interval);
                }
                op->state = 1;
            }
@ -780,10 +809,9 @@ resume_2:
        return 1;
    }
    else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
-        op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC)
+        op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC)
    {
        // Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
-        // FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal
        erase_op(op);
        return 1;
    }
@ -1171,16 +1199,12 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
        // All next things like timer, continue_sync/rw and stop_client may affect the operation again
        // So do all these things after modifying operation state, otherwise we may hit reenterability bugs
        // FIXME postpone such things to set_immediate here to avoid bugs
-        // Mark op->up_wait = true to retry operation after a short pause (not immediately)
-        op->up_wait = true;
-        if (!retry_timeout_id)
+        // Set op->retry_after to retry operation after a short pause (not immediately)
+        if (!op->retry_after)
        {
-            retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
-            {
-                retry_timeout_id = 0;
-                continue_ops(true);
-            });
+            op->retry_after = op->retval == -EIO ? client_eio_retry_interval : client_retry_interval;
        }
+        reset_retry_timer(op->retry_after);
        if (op->inflight_count == 0)
        {
            if (op->opcode == OSD_OP_SYNC)
--- a/src/cluster_client.h
+++ b/src/cluster_client.h
@ -59,7 +59,7 @@ protected:
    void *buf = NULL;
    cluster_op_t *orig_op = NULL;
    bool needs_reslice = false;
-    bool up_wait = false;
+    int retry_after = 0;
    int inflight_count = 0, done_count = 0;
    std::vector<cluster_op_part_t> parts;
    void *part_bitmaps = NULL;
@ -92,9 +92,11 @@ class cluster_client_t
    uint64_t client_max_writeback_iodepth = 0;

    int log_level = 0;
-    int up_wait_retry_interval = 500; // ms
+    int client_retry_interval = 50; // ms
+    int client_eio_retry_interval = 1000; // ms

    int retry_timeout_id = 0;
+    int retry_timeout_duration = 0;
    std::vector<cluster_op_t*> offline_ops;
    cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
    writeback_cache_t *wb = NULL;
@ -131,7 +133,7 @@ public:

    bool get_immediate_commit(uint64_t inode);

-    void continue_ops(bool up_retry = false);
+    void continue_ops(int time_passed = 0);
    inode_list_t *list_inode_start(inode_t inode,
        std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
    int list_pg_count(inode_list_t *lst);
@ -152,6 +154,7 @@ protected:
    int continue_rw(cluster_op_t *op);
    bool check_rw(cluster_op_t *op);
    void slice_rw(cluster_op_t *op);
+    void reset_retry_timer(int new_duration);
    bool try_send(cluster_op_t *op, int i);
    int continue_sync(cluster_op_t *op);
    void send_sync(cluster_op_t *op, cluster_op_part_t *part);
--- a/src/vitastor.pc.in
+++ b/src/vitastor.pc.in
@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@

 Name: Vitastor
 Description: Vitastor client library
-Version: 1.4.7
+Version: 1.4.8
 Libs: -L${libdir} -lvitastor_client
 Cflags: -I${includedir}
Author	SHA1	Message	Date
Vitaliy Filippov	4cf6dceed7	Merge branch 'rel-1.4' Test / test_minsize_1 (push) Has been cancelled Details Test / test_move_reappear (push) Has been cancelled Details Test / test_rm (push) Has been cancelled Details Test / test_snapshot_chain (push) Has been cancelled Details Test / test_snapshot_chain_ec (push) Has been cancelled Details Test / test_snapshot_down (push) Has been cancelled Details Test / test_snapshot_down_ec (push) Has been cancelled Details Test / test_splitbrain (push) Has been cancelled Details Test / test_rebalance_verify (push) Has been cancelled Details Test / test_rebalance_verify_imm (push) Has been cancelled Details Test / test_rebalance_verify_ec (push) Has been cancelled Details Test / test_rebalance_verify_ec_imm (push) Has been cancelled Details Test / test_switch_primary (push) Has been cancelled Details Test / test_write (push) Has been cancelled Details Test / test_write_xor (push) Has been cancelled Details Test / test_write_no_same (push) Has been cancelled Details Test / test_heal_pg_size_2 (push) Has been cancelled Details Test / test_heal_ec (push) Has been cancelled Details Test / test_heal_csum_32k_dmj (push) Has been cancelled Details Test / test_heal_csum_32k_dj (push) Has been cancelled Details Test / test_heal_csum_32k (push) Has been cancelled Details Test / test_heal_csum_4k_dmj (push) Has been cancelled Details Test / test_heal_csum_4k_dj (push) Has been cancelled Details Test / test_heal_csum_4k (push) Has been cancelled Details Test / test_scrub (push) Has been cancelled Details Test / test_scrub_zero_osd_2 (push) Has been cancelled Details Test / test_scrub_xor (push) Has been cancelled Details Test / test_scrub_pg_size_3 (push) Has been cancelled Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled Details Test / test_scrub_ec (push) Has been cancelled Details	2024-02-29 09:59:01 +03:00
Vitaliy Filippov	38b8963330	Release 1.4.8 Test / test_rm (push) Successful in 19s Details Test / test_move_reappear (push) Successful in 26s Details Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m40s Details Test / test_snapshot_down (push) Successful in 31s Details Test / test_snapshot_down_ec (push) Successful in 34s Details Test / test_splitbrain (push) Successful in 27s Details Test / test_snapshot_chain (push) Successful in 2m18s Details Test / test_snapshot_chain_ec (push) Successful in 2m59s Details Test / test_rebalance_verify_imm (push) Successful in 5m32s Details Test / test_rebalance_verify (push) Successful in 6m11s Details Test / test_switch_primary (push) Successful in 41s Details Test / test_write (push) Successful in 45s Details Test / test_write_no_same (push) Successful in 23s Details Test / test_rebalance_verify_ec_imm (push) Successful in 5m2s Details Test / test_write_xor (push) Successful in 55s Details Test / test_rebalance_verify_ec (push) Successful in 6m22s Details Test / test_heal_pg_size_2 (push) Successful in 5m41s Details Test / test_heal_csum_32k_dmj (push) Successful in 5m59s Details Test / test_heal_csum_32k_dj (push) Successful in 7m19s Details Test / test_heal_csum_32k (push) Successful in 7m17s Details Test / test_heal_csum_4k_dmj (push) Successful in 7m14s Details Test / test_scrub (push) Successful in 1m12s Details Test / test_heal_ec (push) Successful in 9m2s Details Test / test_scrub_xor (push) Successful in 56s Details Test / test_scrub_zero_osd_2 (push) Successful in 1m8s Details Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 2m1s Details Test / test_heal_csum_4k_dj (push) Successful in 4m45s Details Test / test_scrub_pg_size_3 (push) Successful in 2m31s Details Test / test_heal_csum_4k (push) Successful in 4m54s Details Test / test_scrub_ec (push) Successful in 46s Details - Do not use \r if output is not a terminal (should fix unexpected job output in proxmox) - Fix rm/rm-data error return code, add --down-ok option to bypass the error - Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval - Add ubuntu jammy build - Wait for blockstore initialisation before starting OSD (prevent timeouts when init takes time) - Fix a rare use-after-free in automatic sync after delete in blockstore	2024-02-29 09:58:34 +03:00
Vitaliy Filippov	77167e2920	Do not use \r if output is not a terminal	2024-02-29 00:21:17 +03:00
Vitaliy Filippov	5af23672d0	Fix rm/rm-data error return code, add --down-ok option to bypass the error	2024-02-29 00:20:10 +03:00
Vitaliy Filippov	6bf1f539a6	Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval	2024-02-28 13:10:02 +03:00