diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ac97ec5..ffd021ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12) project(vitastor) -set(VERSION "1.4.7") +set(VERSION "1.4.8") add_subdirectory(src) diff --git a/csi/Makefile b/csi/Makefile index d657a801..34b9fef2 100644 --- a/csi/Makefile +++ b/csi/Makefile @@ -1,4 +1,4 @@ -VERSION ?= v1.4.7 +VERSION ?= v1.4.8 all: build push diff --git a/csi/deploy/004-csi-nodeplugin.yaml b/csi/deploy/004-csi-nodeplugin.yaml index eb0c254f..76ff1fa8 100644 --- a/csi/deploy/004-csi-nodeplugin.yaml +++ b/csi/deploy/004-csi-nodeplugin.yaml @@ -49,7 +49,7 @@ spec: capabilities: add: ["SYS_ADMIN"] allowPrivilegeEscalation: true - image: vitalif/vitastor-csi:v1.4.7 + image: vitalif/vitastor-csi:v1.4.8 args: - "--node=$(NODE_ID)" - "--endpoint=$(CSI_ENDPOINT)" diff --git a/csi/deploy/007-csi-provisioner.yaml b/csi/deploy/007-csi-provisioner.yaml index 1aece9c8..a6170334 100644 --- a/csi/deploy/007-csi-provisioner.yaml +++ b/csi/deploy/007-csi-provisioner.yaml @@ -121,7 +121,7 @@ spec: privileged: true capabilities: add: ["SYS_ADMIN"] - image: vitalif/vitastor-csi:v1.4.7 + image: vitalif/vitastor-csi:v1.4.8 args: - "--node=$(NODE_ID)" - "--endpoint=$(CSI_ENDPOINT)" diff --git a/csi/src/config.go b/csi/src/config.go index b9e1c701..dce70885 100644 --- a/csi/src/config.go +++ b/csi/src/config.go @@ -5,7 +5,7 @@ package vitastor const ( vitastorCSIDriverName = "csi.vitastor.io" - vitastorCSIDriverVersion = "1.4.7" + vitastorCSIDriverVersion = "1.4.8" ) // Config struct fills the parameters of request or user input diff --git a/debian/changelog b/debian/changelog index f8ad6f8b..129c5d62 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -vitastor (1.4.7-1) unstable; urgency=medium +vitastor (1.4.8-1) unstable; urgency=medium * Bugfixes diff --git a/debian/vitastor.Dockerfile b/debian/vitastor.Dockerfile index a736a50d..da258a6e 100644 --- a/debian/vitastor.Dockerfile +++ b/debian/vitastor.Dockerfile @@ -37,8 +37,8 @@ RUN set -e -x; \ mkdir -p /root/packages/vitastor-$REL; \ rm -rf /root/packages/vitastor-$REL/*; \ cd /root/packages/vitastor-$REL; \ - cp -r /root/vitastor vitastor-1.4.7; \ - cd vitastor-1.4.7; \ + cp -r /root/vitastor vitastor-1.4.8; \ + cd vitastor-1.4.8; \ ln -s /root/fio-build/fio-*/ ./fio; \ FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \ @@ -51,8 +51,8 @@ RUN set -e -x; \ rm -rf a b; \ echo "dep:fio=$FIO" > debian/fio_version; \ cd /root/packages/vitastor-$REL; \ - tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.7.orig.tar.xz vitastor-1.4.7; \ - cd vitastor-1.4.7; \ + tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.8.orig.tar.xz vitastor-1.4.8; \ + cd vitastor-1.4.8; \ V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ DEBFULLNAME="Vitaliy Filippov " dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \ DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \ diff --git a/docs/config/client.en.md b/docs/config/client.en.md index 0cd2420e..149f48ba 100644 --- a/docs/config/client.en.md +++ b/docs/config/client.en.md @@ -9,6 +9,8 @@ These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and affect their interaction with the cluster. +- [client_retry_interval](#client_retry_interval) +- [client_eio_retry_interval](#client_eio_retry_interval) - [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_ops](#client_max_dirty_ops) - [client_enable_writeback](#client_enable_writeback) @@ -19,6 +21,26 @@ affect their interaction with the cluster. - [nbd_max_devices](#nbd_max_devices) - [nbd_max_part](#nbd_max_part) +## client_retry_interval + +- Type: milliseconds +- Default: 50 +- Minimum: 10 +- Can be changed online: yes + +Retry time for I/O requests failed due to inactive PGs or network +connectivity errors. + +## client_eio_retry_interval + +- Type: milliseconds +- Default: 1000 +- Can be changed online: yes + +Retry time for I/O requests failed due to data corruption or unfinished +EC object deletions (has_incomplete PG state). 0 disables such retries +and clients are not blocked and just get EIO error code instead. + ## client_max_dirty_bytes - Type: integer diff --git a/docs/config/client.ru.md b/docs/config/client.ru.md index bc96e537..e714fe31 100644 --- a/docs/config/client.ru.md +++ b/docs/config/client.ru.md @@ -9,6 +9,8 @@ Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и затрагивают логику их работы с кластером. +- [client_retry_interval](#client_retry_interval) +- [client_eio_retry_interval](#client_eio_retry_interval) - [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_ops](#client_max_dirty_ops) - [client_enable_writeback](#client_enable_writeback) @@ -19,6 +21,27 @@ - [nbd_max_devices](#nbd_max_devices) - [nbd_max_part](#nbd_max_part) +## client_retry_interval + +- Тип: миллисекунды +- Значение по умолчанию: 50 +- Минимальное значение: 10 +- Можно менять на лету: да + +Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или +ошибок сети. + +## client_eio_retry_interval + +- Тип: миллисекунды +- Значение по умолчанию: 1000 +- Можно менять на лету: да + +Время повтора запросов ввода-вывода, неудачных из-за повреждения данных +или незавершённых удалений EC-объектов (состояния PG has_incomplete). +0 отключает повторы таких запросов и клиенты не блокируются, а вместо +этого просто получают код ошибки EIO. + ## client_max_dirty_bytes - Тип: целое число diff --git a/docs/config/network.en.md b/docs/config/network.en.md index a28464ee..85ffd1a5 100644 --- a/docs/config/network.en.md +++ b/docs/config/network.en.md @@ -25,7 +25,6 @@ between clients, OSDs and etcd. - [peer_connect_timeout](#peer_connect_timeout) - [osd_idle_timeout](#osd_idle_timeout) - [osd_ping_timeout](#osd_ping_timeout) -- [up_wait_retry_interval](#up_wait_retry_interval) - [max_etcd_attempts](#max_etcd_attempts) - [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_slow_timeout](#etcd_slow_timeout) @@ -212,17 +211,6 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond within this time, the connection to it is dropped and a reconnection attempt is scheduled. -## up_wait_retry_interval - -- Type: milliseconds -- Default: 50 -- Minimum: 10 -- Can be changed online: yes - -OSDs respond to clients with a special error code when they receive I/O -requests for a PG that's not synchronized and started. This parameter sets -the time for the clients to wait before re-attempting such I/O requests. - ## max_etcd_attempts - Type: integer diff --git a/docs/config/network.ru.md b/docs/config/network.ru.md index 1d3ceaa0..f97d7c9f 100644 --- a/docs/config/network.ru.md +++ b/docs/config/network.ru.md @@ -25,7 +25,6 @@ - [peer_connect_timeout](#peer_connect_timeout) - [osd_idle_timeout](#osd_idle_timeout) - [osd_ping_timeout](#osd_ping_timeout) -- [up_wait_retry_interval](#up_wait_retry_interval) - [max_etcd_attempts](#max_etcd_attempts) - [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_slow_timeout](#etcd_slow_timeout) @@ -221,19 +220,6 @@ OSD в любом случае согласовывают реальное зн Если OSD не отвечает за это время, соединение отключается и производится повторная попытка соединения. -## up_wait_retry_interval - -- Тип: миллисекунды -- Значение по умолчанию: 50 -- Минимальное значение: 10 -- Можно менять на лету: да - -Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не -поднятым на данный момент на них PG, либо к PG в процессе синхронизации, -они отвечают клиентам специальным кодом ошибки, означающим, что клиент -должен некоторое время подождать перед повторением запроса. Именно это время -ожидания задаёт данный параметр. - ## max_etcd_attempts - Тип: целое число diff --git a/docs/config/src/client.yml b/docs/config/src/client.yml index 3bebd783..7ca579cb 100644 --- a/docs/config/src/client.yml +++ b/docs/config/src/client.yml @@ -1,3 +1,27 @@ +- name: client_retry_interval + type: ms + min: 10 + default: 50 + online: true + info: | + Retry time for I/O requests failed due to inactive PGs or network + connectivity errors. + info_ru: | + Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или + ошибок сети. +- name: client_eio_retry_interval + type: ms + default: 1000 + online: true + info: | + Retry time for I/O requests failed due to data corruption or unfinished + EC object deletions (has_incomplete PG state). 0 disables such retries + and clients are not blocked and just get EIO error code instead. + info_ru: | + Время повтора запросов ввода-вывода, неудачных из-за повреждения данных + или незавершённых удалений EC-объектов (состояния PG has_incomplete). + 0 отключает повторы таких запросов и клиенты не блокируются, а вместо + этого просто получают код ошибки EIO. - name: client_max_dirty_bytes type: int default: 33554432 diff --git a/docs/config/src/network.yml b/docs/config/src/network.yml index 5bd2c808..ea0c7438 100644 --- a/docs/config/src/network.yml +++ b/docs/config/src/network.yml @@ -243,21 +243,6 @@ Максимальное время ожидания ответа на запрос проверки состояния соединения. Если OSD не отвечает за это время, соединение отключается и производится повторная попытка соединения. -- name: up_wait_retry_interval - type: ms - min: 10 - default: 50 - online: true - info: | - OSDs respond to clients with a special error code when they receive I/O - requests for a PG that's not synchronized and started. This parameter sets - the time for the clients to wait before re-attempting such I/O requests. - info_ru: | - Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не - поднятым на данный момент на них PG, либо к PG в процессе синхронизации, - они отвечают клиентам специальным кодом ошибки, означающим, что клиент - должен некоторое время подождать перед повторением запроса. Именно это время - ожидания задаёт данный параметр. - name: max_etcd_attempts type: int default: 5 diff --git a/docs/usage/cli.en.md b/docs/usage/cli.en.md index c7084409..128e29aa 100644 --- a/docs/usage/cli.en.md +++ b/docs/usage/cli.en.md @@ -135,19 +135,18 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots). ## modify -`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]` +`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]` Rename, resize image or change its readonly status. Images with children can't be made read-write. If the new size is smaller than the old size, extra data will be purged. You should resize file system in the image, if present, before shrinking it. -``` --f|--force Proceed with shrinking or setting readwrite flag even if the image has children. -``` +| `-f|--force` | Proceed with shrinking or setting readwrite flag even if the image has children. | +| `--down-ok` | Proceed with shrinking even if some data will be left on unavailable OSDs. | ## rm -`vitastor-cli rm [] [--writers-stopped]` +`vitastor-cli rm [] [--writers-stopped] [--down-ok]` Remove `` or all layers between `` and `` (`` must be a child of ``), rebasing all their children accordingly. --writers-stopped allows merging to be a bit @@ -155,6 +154,10 @@ more effective in case of a single 'slim' read-write child and 'fat' removed par the child is merged into parent and parent is renamed to child in that case. In other cases parent layers are always merged into children. +Other options: + +| `--down-ok` | Continue deletion/merging even if some data will be left on unavailable OSDs. | + ## flatten `vitastor-cli flatten ` diff --git a/docs/usage/cli.ru.md b/docs/usage/cli.ru.md index 933c7c8c..71bc22b2 100644 --- a/docs/usage/cli.ru.md +++ b/docs/usage/cli.ru.md @@ -136,7 +136,7 @@ vitastor-cli snap-create [-p|--pool ] @ ## modify -`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]` +`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]` Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения" и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя. @@ -144,13 +144,12 @@ vitastor-cli snap-create [-p|--pool ] @ Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением образа сначала уменьшите файловую систему в нём. -``` --f|--force Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. -``` +| -f|--force | Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. | +| --down-ok | Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD. | ## rm -`vitastor-cli rm [] [--writers-stopped]` +`vitastor-cli rm [] [--writers-stopped] [--down-ok]` Удалить образ `` или все слои от `` до `` (`` должен быть дочерним образом ``), одновременно меняя родительские образы их клонов (если таковые есть). @@ -162,6 +161,10 @@ vitastor-cli snap-create [-p|--pool ] @ В других случаях родительские слои вливаются в дочерние. +Другие опции: + +| `--down-ok` | Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD. | + ## flatten `vitastor-cli flatten ` diff --git a/mon/mon.js b/mon/mon.js index 6e8864c9..060d5c4a 100644 --- a/mon/mon.js +++ b/mon/mon.js @@ -86,13 +86,14 @@ const etcd_tree = { client_max_buffered_bytes: 33554432, client_max_buffered_ops: 1024, client_max_writeback_iodepth: 256, + client_retry_interval: 50, // ms. min: 10 + client_eio_retry_interval: 1000, // ms // client and osd - configurable online log_level: 0, peer_connect_interval: 5, // seconds. min: 1 peer_connect_timeout: 5, // seconds. min: 1 osd_idle_timeout: 5, // seconds. min: 1 osd_ping_timeout: 5, // seconds. min: 1 - up_wait_retry_interval: 50, // ms. min: 10 max_etcd_attempts: 5, etcd_quick_timeout: 1000, // ms etcd_slow_timeout: 5000, // ms diff --git a/mon/package.json b/mon/package.json index 92b762dc..be74b6cc 100644 --- a/mon/package.json +++ b/mon/package.json @@ -1,6 +1,6 @@ { "name": "vitastor-mon", - "version": "1.4.7", + "version": "1.4.8", "description": "Vitastor SDS monitor service", "main": "mon-main.js", "scripts": { diff --git a/patches/cinder-vitastor.py b/patches/cinder-vitastor.py index be5e9c49..bfd2349b 100644 --- a/patches/cinder-vitastor.py +++ b/patches/cinder-vitastor.py @@ -50,7 +50,7 @@ from cinder.volume import configuration from cinder.volume import driver from cinder.volume import volume_utils -VERSION = '1.4.7' +VERSION = '1.4.8' LOG = logging.getLogger(__name__) diff --git a/rpm/build-tarball.sh b/rpm/build-tarball.sh index 4b171ccb..e0fd5d70 100755 --- a/rpm/build-tarball.sh +++ b/rpm/build-tarball.sh @@ -24,4 +24,4 @@ rm fio mv fio-copy fio FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'` perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec -tar --transform 's#^#vitastor-1.4.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.7$(rpm --eval '%dist').tar.gz * +tar --transform 's#^#vitastor-1.4.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.8$(rpm --eval '%dist').tar.gz * diff --git a/rpm/vitastor-el7.Dockerfile b/rpm/vitastor-el7.Dockerfile index 9d1c67f5..04d44531 100644 --- a/rpm/vitastor-el7.Dockerfile +++ b/rpm/vitastor-el7.Dockerfile @@ -36,7 +36,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el7.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el7.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el7.spec b/rpm/vitastor-el7.spec index 6bcb7fdc..ed3b4286 100644 --- a/rpm/vitastor-el7.spec +++ b/rpm/vitastor-el7.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el7.tar.gz +Source0: vitastor-1.4.8.el7.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/rpm/vitastor-el8.Dockerfile b/rpm/vitastor-el8.Dockerfile index b18dd6b1..e16c50ce 100644 --- a/rpm/vitastor-el8.Dockerfile +++ b/rpm/vitastor-el8.Dockerfile @@ -35,7 +35,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el8.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el8.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el8.spec b/rpm/vitastor-el8.spec index fc76a740..29e5c440 100644 --- a/rpm/vitastor-el8.spec +++ b/rpm/vitastor-el8.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el8.tar.gz +Source0: vitastor-1.4.8.el8.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/rpm/vitastor-el9.Dockerfile b/rpm/vitastor-el9.Dockerfile index a0dcb83c..db36d54c 100644 --- a/rpm/vitastor-el9.Dockerfile +++ b/rpm/vitastor-el9.Dockerfile @@ -18,7 +18,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el9.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el9.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el9.spec b/rpm/vitastor-el9.spec index 5a2c8766..65f71b51 100644 --- a/rpm/vitastor-el9.spec +++ b/rpm/vitastor-el9.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el9.tar.gz +Source0: vitastor-1.4.8.el9.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0e2a7ba9..81536932 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}") endif() -add_definitions(-DVERSION="1.4.7") +add_definitions(-DVERSION="1.4.8") add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src) add_link_options(-fno-omit-frame-pointer) if (${WITH_ASAN}) diff --git a/src/cli.cpp b/src/cli.cpp index 55d790ee..703412ba 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -46,18 +46,21 @@ static const char* help_text = "vitastor-cli snap-create [-p|--pool ] @\n" " Create a snapshot of image . May be used live if only a single writer is active.\n" "\n" - "vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]\n" + "vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]\n" " Rename, resize image or change its readonly status. Images with children can't be made read-write.\n" " If the new size is smaller than the old size, extra data will be purged.\n" " You should resize file system in the image, if present, before shrinking it.\n" " -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n" + " --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n" "\n" - "vitastor-cli rm [] [--writers-stopped]\n" + "vitastor-cli rm [] [--writers-stopped] [--down-ok]\n" " Remove or all layers between and ( must be a child of ),\n" " rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n" " more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n" " the child is merged into parent and parent is renamed to child in that case.\n" " In other cases parent layers are always merged into children.\n" + " Other options:\n" + " --down-ok Continue deletion/merging even if some data will be left on unavailable OSDs.\n" "\n" "vitastor-cli flatten \n" " Flatten a layer, i.e. merge data and detach it from parents.\n" @@ -170,7 +173,7 @@ static const char* help_text = " --parallel_osds M Work with M osds in parallel when possible (default 4)\n" " --progress 1|0 Report progress (default 1)\n" " --cas 1|0 Use CAS writes for flatten, merge, rm (default is decide automatically)\n" - " --no-color Disable colored output\n" + " --color 1|0 Enable/disable colored output and CR symbols (default 1 if stdout is a terminal)\n" " --json JSON output\n" ; @@ -221,6 +224,7 @@ static json11::Json::object parse_args(int narg, const char *args[]) !strcmp(opt, "readonly") || !strcmp(opt, "readwrite") || !strcmp(opt, "force") || !strcmp(opt, "reverse") || !strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") || + !strcmp(opt, "down-ok") || !strcmp(opt, "down_ok") || !strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") || !strcmp(opt, "help") || !strcmp(opt, "all") || !strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped")) diff --git a/src/cli_common.cpp b/src/cli_common.cpp index e5b9b9b1..71b465ac 100644 --- a/src/cli_common.cpp +++ b/src/cli_common.cpp @@ -1,6 +1,7 @@ // Copyright (c) Vitaliy Filippov, 2019+ // License: VNPL-1.1 (see README.md for details) +#include #include "str_util.h" #include "cluster_client.h" #include "cli.h" @@ -113,7 +114,12 @@ void cli_tool_t::parse_config(json11::Json::object & cfg) else kv_it++; } - color = !cfg["no_color"].bool_value(); + if (cfg.find("no_color") != cfg.end()) + color = !cfg["no_color"].bool_value(); + else if (cfg.find("color") != cfg.end()) + color = cfg["color"].bool_value(); + else + color = isatty(1); json_output = cfg["json"].bool_value(); iodepth = cfg["iodepth"].uint64_value(); if (!iodepth) diff --git a/src/cli_merge.cpp b/src/cli_merge.cpp index 97969f0c..fd980985 100644 --- a/src/cli_merge.cpp +++ b/src/cli_merge.cpp @@ -275,7 +275,9 @@ struct snap_merger_t processed++; if (parent->progress && !(processed % 128)) { - printf("\rFiltering target blocks: %ju/%ju", processed, to_process); + fprintf(stderr, parent->color + ? "\rFiltering target blocks: %ju/%ju" + : "Filtering target blocks: %ju/%ju\n", processed, to_process); } } if (in_flight > 0 || oit != merge_offsets.end()) @@ -285,7 +287,9 @@ struct snap_merger_t } if (parent->progress) { - printf("\r%ju full blocks of target filtered out\n", to_process-merge_offsets.size()); + fprintf(stderr, parent->color + ? "\r%ju full blocks of target filtered out\n" + : "%ju full blocks of target filtered out\n", to_process-merge_offsets.size()); } } state = 3; @@ -320,7 +324,9 @@ struct snap_merger_t processed++; if (parent->progress && !(processed % 128)) { - printf("\rOverwriting blocks: %ju/%ju", processed, to_process); + fprintf(stderr, parent->color + ? "\rOverwriting blocks: %ju/%ju" + : "Overwriting blocks: %ju/%ju\n", processed, to_process); } } if (in_flight == 0 && rwo_error.size()) @@ -339,7 +345,9 @@ struct snap_merger_t } if (parent->progress) { - printf("\rOverwriting blocks: %ju/%ju\n", to_process, to_process); + fprintf(stderr, parent->color + ? "\rOverwriting blocks: %ju/%ju\n" + : "Overwriting blocks: %ju/%ju\n", to_process, to_process); } // Done result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name, .data = json11::Json::object { diff --git a/src/cli_modify.cpp b/src/cli_modify.cpp index 0f8d3b8e..eed8aa4f 100644 --- a/src/cli_modify.cpp +++ b/src/cli_modify.cpp @@ -15,6 +15,7 @@ struct image_changer_t uint64_t new_size = 0; bool force_size = false, inc_size = false; bool set_readonly = false, set_readwrite = false, force = false; + bool down_ok = false; // interval between fsyncs int fsync_interval = 128; @@ -108,6 +109,7 @@ struct image_changer_t { "pool", (uint64_t)INODE_POOL(inode_num) }, { "fsync-interval", fsync_interval }, { "min-offset", ((new_size+4095)/4096)*4096 }, + { "down-ok", down_ok }, }); resume_1: while (!cb(result)) @@ -252,6 +254,7 @@ std::function cli_tool_t::start_modify(json11::Json cfg) changer->fsync_interval = cfg["fsync_interval"].uint64_value(); if (!changer->fsync_interval) changer->fsync_interval = 128; + changer->down_ok = cfg["down_ok"].bool_value(); // FIXME Check that the image doesn't have children when shrinking return [changer](cli_result_t & result) { diff --git a/src/cli_rm.cpp b/src/cli_rm.cpp index 0ea9185c..277f8aea 100644 --- a/src/cli_rm.cpp +++ b/src/cli_rm.cpp @@ -53,6 +53,8 @@ struct snap_remover_t int use_cas = 1; // interval between fsyncs int fsync_interval = 128; + // ignore deletion errors + bool down_ok = false; std::map sources; std::map inode_used; @@ -680,6 +682,7 @@ resume_100: { "inode", inode }, { "pool", (uint64_t)INODE_POOL(inode) }, { "fsync-interval", fsync_interval }, + { "down-ok", down_ok }, }); } }; @@ -691,6 +694,7 @@ std::function cli_tool_t::start_rm(json11::Json cfg) snap_remover->from_name = cfg["from"].string_value(); snap_remover->to_name = cfg["to"].string_value(); snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value(); + snap_remover->down_ok = cfg["down_ok"].bool_value(); if (!snap_remover->fsync_interval) snap_remover->fsync_interval = 128; if (!cfg["cas"].is_null()) diff --git a/src/cli_rm_data.cpp b/src/cli_rm_data.cpp index d35694d7..e88c6baf 100644 --- a/src/cli_rm_data.cpp +++ b/src/cli_rm_data.cpp @@ -25,6 +25,7 @@ struct rm_inode_t uint64_t inode = 0; pool_id_t pool_id = 0; uint64_t min_offset = 0; + bool down_ok = false; cli_tool_t *parent = NULL; inode_list_t *lister = NULL; @@ -212,7 +213,9 @@ struct rm_inode_t } if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct) { - fprintf(stderr, "\rRemoved %ju/%ju objects, %ju more PGs to list...", total_done, total_count, pgs_to_list); + fprintf(stderr, parent->color + ? "\rRemoved %ju/%ju objects, %ju more PGs to list..." + : "Removed %ju/%ju objects, %ju more PGs to list...\n", total_done, total_count, pgs_to_list); total_prev_pct = total_done*1000/total_count; } if (lists_done && !lists.size()) @@ -221,17 +224,18 @@ struct rm_inode_t { fprintf(stderr, "\n"); } - if (parent->progress && (total_done < total_count || inactive_osds.size() > 0 || error_count > 0)) + bool is_error = (total_done < total_count || inactive_osds.size() > 0 || error_count > 0); + if (parent->progress && is_error) { fprintf( stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n" - " Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n", + "Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n", pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode) ); } result = (cli_result_t){ - .err = error_count > 0 ? EIO : 0, - .text = error_count > 0 ? "Some blocks were not removed" : ( + .err = is_error && !down_ok ? EIO : 0, + .text = is_error ? "Some blocks were not removed" : ( "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+ std::to_string(pool_id)+" removed"), .data = json11::Json::object { @@ -280,6 +284,7 @@ std::function cli_tool_t::start_rm_data(json11::Json cfg) { remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS)); } + remover->down_ok = cfg["down_ok"].bool_value(); remover->pool_id = INODE_POOL(remover->inode); remover->min_offset = cfg["min_offset"].uint64_value(); return [remover](cli_result_t & result) diff --git a/src/cluster_client.cpp b/src/cluster_client.cpp index 1a72874b..8e679f4a 100644 --- a/src/cluster_client.cpp +++ b/src/cluster_client.cpp @@ -265,7 +265,7 @@ void cluster_client_t::erase_op(cluster_op_t *op) } } -void cluster_client_t::continue_ops(bool up_retry) +void cluster_client_t::continue_ops(int time_passed) { if (!pgs_loaded) { @@ -277,22 +277,27 @@ void cluster_client_t::continue_ops(bool up_retry) // Attempt to reenter the function return; } + int reset_duration = 0; restart: continuing_ops = 1; for (auto op = op_queue_head; op; ) { cluster_op_t *next_op = op->next; - if (!op->up_wait || up_retry) + if (op->retry_after && time_passed) { - op->up_wait = false; - if (!op->prev_wait) + op->retry_after = op->retry_after > time_passed ? op->retry_after-time_passed : 0; + if (op->retry_after && (!reset_duration || op->retry_after < reset_duration)) { - if (op->opcode == OSD_OP_SYNC) - continue_sync(op); - else - continue_rw(op); + reset_duration = op->retry_after; } } + if (!op->retry_after && !op->prev_wait) + { + if (op->opcode == OSD_OP_SYNC) + continue_sync(op); + else + continue_rw(op); + } op = next_op; if (continuing_ops == 2) { @@ -300,6 +305,27 @@ restart: } } continuing_ops = 0; + reset_retry_timer(reset_duration); +} + +void cluster_client_t::reset_retry_timer(int new_duration) +{ + if (retry_timeout_duration && retry_timeout_duration <= new_duration || !new_duration) + { + return; + } + if (retry_timeout_id) + { + tfd->clear_timer(retry_timeout_id); + } + retry_timeout_duration = new_duration; + retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int) + { + int time_passed = retry_timeout_duration; + retry_timeout_id = 0; + retry_timeout_duration = 0; + continue_ops(time_passed); + }); } void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config) @@ -349,15 +375,25 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co { client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH; } - // up_wait_retry_interval - up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value(); - if (!up_wait_retry_interval) + // client_retry_interval + client_retry_interval = config["client_retry_interval"].uint64_value(); + if (!client_retry_interval) { - up_wait_retry_interval = 50; + client_retry_interval = 50; } - else if (up_wait_retry_interval < 10) + else if (client_retry_interval < 10) { - up_wait_retry_interval = 10; + client_retry_interval = 10; + } + // client_eio_retry_interval + client_eio_retry_interval = 1000; + if (!config["client_eio_retry_interval"].is_null()) + { + client_eio_retry_interval = config["client_eio_retry_interval"].uint64_value(); + if (client_eio_retry_interval && client_eio_retry_interval < 10) + { + client_eio_retry_interval = 10; + } } // log_level log_level = config["log_level"].uint64_value(); @@ -716,15 +752,8 @@ resume_1: // We'll need to retry again if (op->parts[i].flags & PART_RETRY) { - op->up_wait = true; - if (!retry_timeout_id) - { - retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) - { - retry_timeout_id = 0; - continue_ops(true); - }); - } + op->retry_after = client_retry_interval; + reset_retry_timer(client_retry_interval); } op->state = 1; } @@ -780,10 +809,9 @@ resume_2: return 1; } else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) && - op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC) + op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC) { // Fatal error (neither -EPIPE, -EIO nor -ENOSPC) - // FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal erase_op(op); return 1; } @@ -1171,16 +1199,12 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part) // All next things like timer, continue_sync/rw and stop_client may affect the operation again // So do all these things after modifying operation state, otherwise we may hit reenterability bugs // FIXME postpone such things to set_immediate here to avoid bugs - // Mark op->up_wait = true to retry operation after a short pause (not immediately) - op->up_wait = true; - if (!retry_timeout_id) + // Set op->retry_after to retry operation after a short pause (not immediately) + if (!op->retry_after) { - retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) - { - retry_timeout_id = 0; - continue_ops(true); - }); + op->retry_after = op->retval == -EIO ? client_eio_retry_interval : client_retry_interval; } + reset_retry_timer(op->retry_after); if (op->inflight_count == 0) { if (op->opcode == OSD_OP_SYNC) diff --git a/src/cluster_client.h b/src/cluster_client.h index 140f7a32..89ca4bfc 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -59,7 +59,7 @@ protected: void *buf = NULL; cluster_op_t *orig_op = NULL; bool needs_reslice = false; - bool up_wait = false; + int retry_after = 0; int inflight_count = 0, done_count = 0; std::vector parts; void *part_bitmaps = NULL; @@ -92,9 +92,11 @@ class cluster_client_t uint64_t client_max_writeback_iodepth = 0; int log_level = 0; - int up_wait_retry_interval = 500; // ms + int client_retry_interval = 50; // ms + int client_eio_retry_interval = 1000; // ms int retry_timeout_id = 0; + int retry_timeout_duration = 0; std::vector offline_ops; cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL; writeback_cache_t *wb = NULL; @@ -131,7 +133,7 @@ public: bool get_immediate_commit(uint64_t inode); - void continue_ops(bool up_retry = false); + void continue_ops(int time_passed = 0); inode_list_t *list_inode_start(inode_t inode, std::function&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback); int list_pg_count(inode_list_t *lst); @@ -152,6 +154,7 @@ protected: int continue_rw(cluster_op_t *op); bool check_rw(cluster_op_t *op); void slice_rw(cluster_op_t *op); + void reset_retry_timer(int new_duration); bool try_send(cluster_op_t *op, int i); int continue_sync(cluster_op_t *op); void send_sync(cluster_op_t *op, cluster_op_part_t *part); diff --git a/src/vitastor.pc.in b/src/vitastor.pc.in index e54406a6..15741448 100644 --- a/src/vitastor.pc.in +++ b/src/vitastor.pc.in @@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: Vitastor Description: Vitastor client library -Version: 1.4.7 +Version: 1.4.8 Libs: -L${libdir} -lvitastor_client Cflags: -I${includedir}