Compare commits

...

5 Commits

Author SHA1 Message Date
Vitaliy Filippov 4cf6dceed7 Merge branch 'rel-1.4'
Test / test_minsize_1 (push) Has been cancelled Details
Test / test_move_reappear (push) Has been cancelled Details
Test / test_rm (push) Has been cancelled Details
Test / test_snapshot_chain (push) Has been cancelled Details
Test / test_snapshot_chain_ec (push) Has been cancelled Details
Test / test_snapshot_down (push) Has been cancelled Details
Test / test_snapshot_down_ec (push) Has been cancelled Details
Test / test_splitbrain (push) Has been cancelled Details
Test / test_rebalance_verify (push) Has been cancelled Details
Test / test_rebalance_verify_imm (push) Has been cancelled Details
Test / test_rebalance_verify_ec (push) Has been cancelled Details
Test / test_rebalance_verify_ec_imm (push) Has been cancelled Details
Test / test_switch_primary (push) Has been cancelled Details
Test / test_write (push) Has been cancelled Details
Test / test_write_xor (push) Has been cancelled Details
Test / test_write_no_same (push) Has been cancelled Details
Test / test_heal_pg_size_2 (push) Has been cancelled Details
Test / test_heal_ec (push) Has been cancelled Details
Test / test_heal_csum_32k_dmj (push) Has been cancelled Details
Test / test_heal_csum_32k_dj (push) Has been cancelled Details
Test / test_heal_csum_32k (push) Has been cancelled Details
Test / test_heal_csum_4k_dmj (push) Has been cancelled Details
Test / test_heal_csum_4k_dj (push) Has been cancelled Details
Test / test_heal_csum_4k (push) Has been cancelled Details
Test / test_scrub (push) Has been cancelled Details
Test / test_scrub_zero_osd_2 (push) Has been cancelled Details
Test / test_scrub_xor (push) Has been cancelled Details
Test / test_scrub_pg_size_3 (push) Has been cancelled Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled Details
Test / test_scrub_ec (push) Has been cancelled Details
2024-02-29 09:59:01 +03:00
Vitaliy Filippov 38b8963330 Release 1.4.8
Test / test_rm (push) Successful in 19s Details
Test / test_move_reappear (push) Successful in 26s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m40s Details
Test / test_snapshot_down (push) Successful in 31s Details
Test / test_snapshot_down_ec (push) Successful in 34s Details
Test / test_splitbrain (push) Successful in 27s Details
Test / test_snapshot_chain (push) Successful in 2m18s Details
Test / test_snapshot_chain_ec (push) Successful in 2m59s Details
Test / test_rebalance_verify_imm (push) Successful in 5m32s Details
Test / test_rebalance_verify (push) Successful in 6m11s Details
Test / test_switch_primary (push) Successful in 41s Details
Test / test_write (push) Successful in 45s Details
Test / test_write_no_same (push) Successful in 23s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 5m2s Details
Test / test_write_xor (push) Successful in 55s Details
Test / test_rebalance_verify_ec (push) Successful in 6m22s Details
Test / test_heal_pg_size_2 (push) Successful in 5m41s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m59s Details
Test / test_heal_csum_32k_dj (push) Successful in 7m19s Details
Test / test_heal_csum_32k (push) Successful in 7m17s Details
Test / test_heal_csum_4k_dmj (push) Successful in 7m14s Details
Test / test_scrub (push) Successful in 1m12s Details
Test / test_heal_ec (push) Successful in 9m2s Details
Test / test_scrub_xor (push) Successful in 56s Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m8s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 2m1s Details
Test / test_heal_csum_4k_dj (push) Successful in 4m45s Details
Test / test_scrub_pg_size_3 (push) Successful in 2m31s Details
Test / test_heal_csum_4k (push) Successful in 4m54s Details
Test / test_scrub_ec (push) Successful in 46s Details
- Do not use \r if output is not a terminal (should fix unexpected job output in proxmox)
- Fix rm/rm-data error return code, add --down-ok option to bypass the error
- Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval
- Add ubuntu jammy build
- Wait for blockstore initialisation before starting OSD (prevent timeouts when init takes time)
- Fix a rare use-after-free in automatic sync after delete in blockstore
2024-02-29 09:58:34 +03:00
Vitaliy Filippov 77167e2920 Do not use \r if output is not a terminal 2024-02-29 00:21:17 +03:00
Vitaliy Filippov 5af23672d0 Fix rm/rm-data error return code, add --down-ok option to bypass the error 2024-02-29 00:20:10 +03:00
Vitaliy Filippov 6bf1f539a6 Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval 2024-02-28 13:10:02 +03:00
35 changed files with 217 additions and 125 deletions

View File

@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor) project(vitastor)
set(VERSION "1.4.7") set(VERSION "1.4.8")
add_subdirectory(src) add_subdirectory(src)

View File

@ -1,4 +1,4 @@
VERSION ?= v1.4.7 VERSION ?= v1.4.8
all: build push all: build push

View File

@ -49,7 +49,7 @@ spec:
capabilities: capabilities:
add: ["SYS_ADMIN"] add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.4.7 image: vitalif/vitastor-csi:v1.4.8
args: args:
- "--node=$(NODE_ID)" - "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)" - "--endpoint=$(CSI_ENDPOINT)"

View File

@ -121,7 +121,7 @@ spec:
privileged: true privileged: true
capabilities: capabilities:
add: ["SYS_ADMIN"] add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.4.7 image: vitalif/vitastor-csi:v1.4.8
args: args:
- "--node=$(NODE_ID)" - "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)" - "--endpoint=$(CSI_ENDPOINT)"

View File

@ -5,7 +5,7 @@ package vitastor
const ( const (
vitastorCSIDriverName = "csi.vitastor.io" vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.4.7" vitastorCSIDriverVersion = "1.4.8"
) )
// Config struct fills the parameters of request or user input // Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@ -1,4 +1,4 @@
vitastor (1.4.7-1) unstable; urgency=medium vitastor (1.4.8-1) unstable; urgency=medium
* Bugfixes * Bugfixes

View File

@ -37,8 +37,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \ mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \ rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \ cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-1.4.7; \ cp -r /root/vitastor vitastor-1.4.8; \
cd vitastor-1.4.7; \ cd vitastor-1.4.8; \
ln -s /root/fio-build/fio-*/ ./fio; \ ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \ ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -51,8 +51,8 @@ RUN set -e -x; \
rm -rf a b; \ rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \ echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \ cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.7.orig.tar.xz vitastor-1.4.7; \ tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.8.orig.tar.xz vitastor-1.4.8; \
cd vitastor-1.4.7; \ cd vitastor-1.4.8; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \ DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \ DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@ -9,6 +9,8 @@
These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
affect their interaction with the cluster. affect their interaction with the cluster.
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops) - [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback) - [client_enable_writeback](#client_enable_writeback)
@ -19,6 +21,26 @@ affect their interaction with the cluster.
- [nbd_max_devices](#nbd_max_devices) - [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part) - [nbd_max_part](#nbd_max_part)
## client_retry_interval
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
Retry time for I/O requests failed due to inactive PGs or network
connectivity errors.
## client_eio_retry_interval
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Retry time for I/O requests failed due to data corruption or unfinished
EC object deletions (has_incomplete PG state). 0 disables such retries
and clients are not blocked and just get EIO error code instead.
## client_max_dirty_bytes ## client_max_dirty_bytes
- Type: integer - Type: integer

View File

@ -9,6 +9,8 @@
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
затрагивают логику их работы с кластером. затрагивают логику их работы с кластером.
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops) - [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback) - [client_enable_writeback](#client_enable_writeback)
@ -19,6 +21,27 @@
- [nbd_max_devices](#nbd_max_devices) - [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part) - [nbd_max_part](#nbd_max_part)
## client_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
ошибок сети.
## client_eio_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
этого просто получают код ошибки EIO.
## client_max_dirty_bytes ## client_max_dirty_bytes
- Тип: целое число - Тип: целое число

View File

@ -25,7 +25,6 @@ between clients, OSDs and etcd.
- [peer_connect_timeout](#peer_connect_timeout) - [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout) - [osd_idle_timeout](#osd_idle_timeout)
- [osd_ping_timeout](#osd_ping_timeout) - [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
- [max_etcd_attempts](#max_etcd_attempts) - [max_etcd_attempts](#max_etcd_attempts)
- [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_quick_timeout](#etcd_quick_timeout)
- [etcd_slow_timeout](#etcd_slow_timeout) - [etcd_slow_timeout](#etcd_slow_timeout)
@ -212,17 +211,6 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt within this time, the connection to it is dropped and a reconnection attempt
is scheduled. is scheduled.
## up_wait_retry_interval
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
the time for the clients to wait before re-attempting such I/O requests.
## max_etcd_attempts ## max_etcd_attempts
- Type: integer - Type: integer

View File

@ -25,7 +25,6 @@
- [peer_connect_timeout](#peer_connect_timeout) - [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout) - [osd_idle_timeout](#osd_idle_timeout)
- [osd_ping_timeout](#osd_ping_timeout) - [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
- [max_etcd_attempts](#max_etcd_attempts) - [max_etcd_attempts](#max_etcd_attempts)
- [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_quick_timeout](#etcd_quick_timeout)
- [etcd_slow_timeout](#etcd_slow_timeout) - [etcd_slow_timeout](#etcd_slow_timeout)
@ -221,19 +220,6 @@ OSD в любом случае согласовывают реальное зн
Если OSD не отвечает за это время, соединение отключается и производится Если OSD не отвечает за это время, соединение отключается и производится
повторная попытка соединения. повторная попытка соединения.
## up_wait_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
они отвечают клиентам специальным кодом ошибки, означающим, что клиент
должен некоторое время подождать перед повторением запроса. Именно это время
ожидания задаёт данный параметр.
## max_etcd_attempts ## max_etcd_attempts
- Тип: целое число - Тип: целое число

View File

@ -1,3 +1,27 @@
- name: client_retry_interval
type: ms
min: 10
default: 50
online: true
info: |
Retry time for I/O requests failed due to inactive PGs or network
connectivity errors.
info_ru: |
Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
ошибок сети.
- name: client_eio_retry_interval
type: ms
default: 1000
online: true
info: |
Retry time for I/O requests failed due to data corruption or unfinished
EC object deletions (has_incomplete PG state). 0 disables such retries
and clients are not blocked and just get EIO error code instead.
info_ru: |
Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
этого просто получают код ошибки EIO.
- name: client_max_dirty_bytes - name: client_max_dirty_bytes
type: int type: int
default: 33554432 default: 33554432

View File

@ -243,21 +243,6 @@
Максимальное время ожидания ответа на запрос проверки состояния соединения. Максимальное время ожидания ответа на запрос проверки состояния соединения.
Если OSD не отвечает за это время, соединение отключается и производится Если OSD не отвечает за это время, соединение отключается и производится
повторная попытка соединения. повторная попытка соединения.
- name: up_wait_retry_interval
type: ms
min: 10
default: 50
online: true
info: |
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
the time for the clients to wait before re-attempting such I/O requests.
info_ru: |
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
они отвечают клиентам специальным кодом ошибки, означающим, что клиент
должен некоторое время подождать перед повторением запроса. Именно это время
ожидания задаёт данный параметр.
- name: max_etcd_attempts - name: max_etcd_attempts
type: int type: int
default: 5 default: 5

View File

@ -135,19 +135,18 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
## modify ## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]` `vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
Rename, resize image or change its readonly status. Images with children can't be made read-write. Rename, resize image or change its readonly status. Images with children can't be made read-write.
If the new size is smaller than the old size, extra data will be purged. If the new size is smaller than the old size, extra data will be purged.
You should resize file system in the image, if present, before shrinking it. You should resize file system in the image, if present, before shrinking it.
``` | `-f|--force` | Proceed with shrinking or setting readwrite flag even if the image has children. |
-f|--force Proceed with shrinking or setting readwrite flag even if the image has children. | `--down-ok` | Proceed with shrinking even if some data will be left on unavailable OSDs. |
```
## rm ## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped]` `vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`), Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`),
rebasing all their children accordingly. --writers-stopped allows merging to be a bit rebasing all their children accordingly. --writers-stopped allows merging to be a bit
@ -155,6 +154,10 @@ more effective in case of a single 'slim' read-write child and 'fat' removed par
the child is merged into parent and parent is renamed to child in that case. the child is merged into parent and parent is renamed to child in that case.
In other cases parent layers are always merged into children. In other cases parent layers are always merged into children.
Other options:
| `--down-ok` | Continue deletion/merging even if some data will be left on unavailable OSDs. |
## flatten ## flatten
`vitastor-cli flatten <layer>` `vitastor-cli flatten <layer>`

View File

@ -136,7 +136,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
## modify ## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]` `vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения" Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения"
и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя. и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя.
@ -144,13 +144,12 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
образа сначала уменьшите файловую систему в нём. образа сначала уменьшите файловую систему в нём.
``` | -f|--force | Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. |
-f|--force Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. | --down-ok | Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD. |
```
## rm ## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped]` `vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним
образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть). образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть).
@ -162,6 +161,10 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
В других случаях родительские слои вливаются в дочерние. В других случаях родительские слои вливаются в дочерние.
Другие опции:
| `--down-ok` | Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD. |
## flatten ## flatten
`vitastor-cli flatten <layer>` `vitastor-cli flatten <layer>`

View File

@ -86,13 +86,14 @@ const etcd_tree = {
client_max_buffered_bytes: 33554432, client_max_buffered_bytes: 33554432,
client_max_buffered_ops: 1024, client_max_buffered_ops: 1024,
client_max_writeback_iodepth: 256, client_max_writeback_iodepth: 256,
client_retry_interval: 50, // ms. min: 10
client_eio_retry_interval: 1000, // ms
// client and osd - configurable online // client and osd - configurable online
log_level: 0, log_level: 0,
peer_connect_interval: 5, // seconds. min: 1 peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1 peer_connect_timeout: 5, // seconds. min: 1
osd_idle_timeout: 5, // seconds. min: 1 osd_idle_timeout: 5, // seconds. min: 1
osd_ping_timeout: 5, // seconds. min: 1 osd_ping_timeout: 5, // seconds. min: 1
up_wait_retry_interval: 50, // ms. min: 10
max_etcd_attempts: 5, max_etcd_attempts: 5,
etcd_quick_timeout: 1000, // ms etcd_quick_timeout: 1000, // ms
etcd_slow_timeout: 5000, // ms etcd_slow_timeout: 5000, // ms

View File

@ -1,6 +1,6 @@
{ {
"name": "vitastor-mon", "name": "vitastor-mon",
"version": "1.4.7", "version": "1.4.8",
"description": "Vitastor SDS monitor service", "description": "Vitastor SDS monitor service",
"main": "mon-main.js", "main": "mon-main.js",
"scripts": { "scripts": {

View File

@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver from cinder.volume import driver
from cinder.volume import volume_utils from cinder.volume import volume_utils
VERSION = '1.4.7' VERSION = '1.4.8'
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)

View File

@ -24,4 +24,4 @@ rm fio
mv fio-copy fio mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'` FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-1.4.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.7$(rpm --eval '%dist').tar.gz * tar --transform 's#^#vitastor-1.4.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.8$(rpm --eval '%dist').tar.gz *

View File

@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-1.4.7.el7.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-1.4.8.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 1.4.7 Version: 1.4.8
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-1.4.7.el7.tar.gz Source0: vitastor-1.4.8.el7.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-1.4.7.el8.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-1.4.8.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 1.4.7 Version: 1.4.8
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-1.4.7.el8.tar.gz Source0: vitastor-1.4.8.el8.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-1.4.7.el9.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-1.4.8.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 1.4.7 Version: 1.4.8
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-1.4.7.el9.tar.gz Source0: vitastor-1.4.8.el9.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif() endif()
add_definitions(-DVERSION="1.4.7") add_definitions(-DVERSION="1.4.8")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src) add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer) add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN}) if (${WITH_ASAN})

View File

@ -46,18 +46,21 @@ static const char* help_text =
"vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>\n" "vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>\n"
" Create a snapshot of image <name>. May be used live if only a single writer is active.\n" " Create a snapshot of image <name>. May be used live if only a single writer is active.\n"
"\n" "\n"
"vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]\n" "vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]\n"
" Rename, resize image or change its readonly status. Images with children can't be made read-write.\n" " Rename, resize image or change its readonly status. Images with children can't be made read-write.\n"
" If the new size is smaller than the old size, extra data will be purged.\n" " If the new size is smaller than the old size, extra data will be purged.\n"
" You should resize file system in the image, if present, before shrinking it.\n" " You should resize file system in the image, if present, before shrinking it.\n"
" -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n" " -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n"
" --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n"
"\n" "\n"
"vitastor-cli rm <from> [<to>] [--writers-stopped]\n" "vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]\n"
" Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n" " Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n"
" rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n" " rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n"
" more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n" " more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n"
" the child is merged into parent and parent is renamed to child in that case.\n" " the child is merged into parent and parent is renamed to child in that case.\n"
" In other cases parent layers are always merged into children.\n" " In other cases parent layers are always merged into children.\n"
" Other options:\n"
" --down-ok Continue deletion/merging even if some data will be left on unavailable OSDs.\n"
"\n" "\n"
"vitastor-cli flatten <layer>\n" "vitastor-cli flatten <layer>\n"
" Flatten a layer, i.e. merge data and detach it from parents.\n" " Flatten a layer, i.e. merge data and detach it from parents.\n"
@ -170,7 +173,7 @@ static const char* help_text =
" --parallel_osds M Work with M osds in parallel when possible (default 4)\n" " --parallel_osds M Work with M osds in parallel when possible (default 4)\n"
" --progress 1|0 Report progress (default 1)\n" " --progress 1|0 Report progress (default 1)\n"
" --cas 1|0 Use CAS writes for flatten, merge, rm (default is decide automatically)\n" " --cas 1|0 Use CAS writes for flatten, merge, rm (default is decide automatically)\n"
" --no-color Disable colored output\n" " --color 1|0 Enable/disable colored output and CR symbols (default 1 if stdout is a terminal)\n"
" --json JSON output\n" " --json JSON output\n"
; ;
@ -221,6 +224,7 @@ static json11::Json::object parse_args(int narg, const char *args[])
!strcmp(opt, "readonly") || !strcmp(opt, "readwrite") || !strcmp(opt, "readonly") || !strcmp(opt, "readwrite") ||
!strcmp(opt, "force") || !strcmp(opt, "reverse") || !strcmp(opt, "force") || !strcmp(opt, "reverse") ||
!strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") || !strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") ||
!strcmp(opt, "down-ok") || !strcmp(opt, "down_ok") ||
!strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") || !strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") ||
!strcmp(opt, "help") || !strcmp(opt, "all") || !strcmp(opt, "help") || !strcmp(opt, "all") ||
!strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped")) !strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped"))

View File

@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+ // Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
#include <unistd.h>
#include "str_util.h" #include "str_util.h"
#include "cluster_client.h" #include "cluster_client.h"
#include "cli.h" #include "cli.h"
@ -113,7 +114,12 @@ void cli_tool_t::parse_config(json11::Json::object & cfg)
else else
kv_it++; kv_it++;
} }
color = !cfg["no_color"].bool_value(); if (cfg.find("no_color") != cfg.end())
color = !cfg["no_color"].bool_value();
else if (cfg.find("color") != cfg.end())
color = cfg["color"].bool_value();
else
color = isatty(1);
json_output = cfg["json"].bool_value(); json_output = cfg["json"].bool_value();
iodepth = cfg["iodepth"].uint64_value(); iodepth = cfg["iodepth"].uint64_value();
if (!iodepth) if (!iodepth)

View File

@ -275,7 +275,9 @@ struct snap_merger_t
processed++; processed++;
if (parent->progress && !(processed % 128)) if (parent->progress && !(processed % 128))
{ {
printf("\rFiltering target blocks: %ju/%ju", processed, to_process); fprintf(stderr, parent->color
? "\rFiltering target blocks: %ju/%ju"
: "Filtering target blocks: %ju/%ju\n", processed, to_process);
} }
} }
if (in_flight > 0 || oit != merge_offsets.end()) if (in_flight > 0 || oit != merge_offsets.end())
@ -285,7 +287,9 @@ struct snap_merger_t
} }
if (parent->progress) if (parent->progress)
{ {
printf("\r%ju full blocks of target filtered out\n", to_process-merge_offsets.size()); fprintf(stderr, parent->color
? "\r%ju full blocks of target filtered out\n"
: "%ju full blocks of target filtered out\n", to_process-merge_offsets.size());
} }
} }
state = 3; state = 3;
@ -320,7 +324,9 @@ struct snap_merger_t
processed++; processed++;
if (parent->progress && !(processed % 128)) if (parent->progress && !(processed % 128))
{ {
printf("\rOverwriting blocks: %ju/%ju", processed, to_process); fprintf(stderr, parent->color
? "\rOverwriting blocks: %ju/%ju"
: "Overwriting blocks: %ju/%ju\n", processed, to_process);
} }
} }
if (in_flight == 0 && rwo_error.size()) if (in_flight == 0 && rwo_error.size())
@ -339,7 +345,9 @@ struct snap_merger_t
} }
if (parent->progress) if (parent->progress)
{ {
printf("\rOverwriting blocks: %ju/%ju\n", to_process, to_process); fprintf(stderr, parent->color
? "\rOverwriting blocks: %ju/%ju\n"
: "Overwriting blocks: %ju/%ju\n", to_process, to_process);
} }
// Done // Done
result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name, .data = json11::Json::object { result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name, .data = json11::Json::object {

View File

@ -15,6 +15,7 @@ struct image_changer_t
uint64_t new_size = 0; uint64_t new_size = 0;
bool force_size = false, inc_size = false; bool force_size = false, inc_size = false;
bool set_readonly = false, set_readwrite = false, force = false; bool set_readonly = false, set_readwrite = false, force = false;
bool down_ok = false;
// interval between fsyncs // interval between fsyncs
int fsync_interval = 128; int fsync_interval = 128;
@ -108,6 +109,7 @@ struct image_changer_t
{ "pool", (uint64_t)INODE_POOL(inode_num) }, { "pool", (uint64_t)INODE_POOL(inode_num) },
{ "fsync-interval", fsync_interval }, { "fsync-interval", fsync_interval },
{ "min-offset", ((new_size+4095)/4096)*4096 }, { "min-offset", ((new_size+4095)/4096)*4096 },
{ "down-ok", down_ok },
}); });
resume_1: resume_1:
while (!cb(result)) while (!cb(result))
@ -252,6 +254,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_modify(json11::Json cfg)
changer->fsync_interval = cfg["fsync_interval"].uint64_value(); changer->fsync_interval = cfg["fsync_interval"].uint64_value();
if (!changer->fsync_interval) if (!changer->fsync_interval)
changer->fsync_interval = 128; changer->fsync_interval = 128;
changer->down_ok = cfg["down_ok"].bool_value();
// FIXME Check that the image doesn't have children when shrinking // FIXME Check that the image doesn't have children when shrinking
return [changer](cli_result_t & result) return [changer](cli_result_t & result)
{ {

View File

@ -53,6 +53,8 @@ struct snap_remover_t
int use_cas = 1; int use_cas = 1;
// interval between fsyncs // interval between fsyncs
int fsync_interval = 128; int fsync_interval = 128;
// ignore deletion errors
bool down_ok = false;
std::map<inode_t,int> sources; std::map<inode_t,int> sources;
std::map<inode_t,uint64_t> inode_used; std::map<inode_t,uint64_t> inode_used;
@ -680,6 +682,7 @@ resume_100:
{ "inode", inode }, { "inode", inode },
{ "pool", (uint64_t)INODE_POOL(inode) }, { "pool", (uint64_t)INODE_POOL(inode) },
{ "fsync-interval", fsync_interval }, { "fsync-interval", fsync_interval },
{ "down-ok", down_ok },
}); });
} }
}; };
@ -691,6 +694,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm(json11::Json cfg)
snap_remover->from_name = cfg["from"].string_value(); snap_remover->from_name = cfg["from"].string_value();
snap_remover->to_name = cfg["to"].string_value(); snap_remover->to_name = cfg["to"].string_value();
snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value(); snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value();
snap_remover->down_ok = cfg["down_ok"].bool_value();
if (!snap_remover->fsync_interval) if (!snap_remover->fsync_interval)
snap_remover->fsync_interval = 128; snap_remover->fsync_interval = 128;
if (!cfg["cas"].is_null()) if (!cfg["cas"].is_null())

View File

@ -25,6 +25,7 @@ struct rm_inode_t
uint64_t inode = 0; uint64_t inode = 0;
pool_id_t pool_id = 0; pool_id_t pool_id = 0;
uint64_t min_offset = 0; uint64_t min_offset = 0;
bool down_ok = false;
cli_tool_t *parent = NULL; cli_tool_t *parent = NULL;
inode_list_t *lister = NULL; inode_list_t *lister = NULL;
@ -212,7 +213,9 @@ struct rm_inode_t
} }
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct) if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
{ {
fprintf(stderr, "\rRemoved %ju/%ju objects, %ju more PGs to list...", total_done, total_count, pgs_to_list); fprintf(stderr, parent->color
? "\rRemoved %ju/%ju objects, %ju more PGs to list..."
: "Removed %ju/%ju objects, %ju more PGs to list...\n", total_done, total_count, pgs_to_list);
total_prev_pct = total_done*1000/total_count; total_prev_pct = total_done*1000/total_count;
} }
if (lists_done && !lists.size()) if (lists_done && !lists.size())
@ -221,17 +224,18 @@ struct rm_inode_t
{ {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
if (parent->progress && (total_done < total_count || inactive_osds.size() > 0 || error_count > 0)) bool is_error = (total_done < total_count || inactive_osds.size() > 0 || error_count > 0);
if (parent->progress && is_error)
{ {
fprintf( fprintf(
stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n" stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n"
" Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n", "Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n",
pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode) pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode)
); );
} }
result = (cli_result_t){ result = (cli_result_t){
.err = error_count > 0 ? EIO : 0, .err = is_error && !down_ok ? EIO : 0,
.text = error_count > 0 ? "Some blocks were not removed" : ( .text = is_error ? "Some blocks were not removed" : (
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+ "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
std::to_string(pool_id)+" removed"), std::to_string(pool_id)+" removed"),
.data = json11::Json::object { .data = json11::Json::object {
@ -280,6 +284,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm_data(json11::Json cfg)
{ {
remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS)); remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
} }
remover->down_ok = cfg["down_ok"].bool_value();
remover->pool_id = INODE_POOL(remover->inode); remover->pool_id = INODE_POOL(remover->inode);
remover->min_offset = cfg["min_offset"].uint64_value(); remover->min_offset = cfg["min_offset"].uint64_value();
return [remover](cli_result_t & result) return [remover](cli_result_t & result)

View File

@ -265,7 +265,7 @@ void cluster_client_t::erase_op(cluster_op_t *op)
} }
} }
void cluster_client_t::continue_ops(bool up_retry) void cluster_client_t::continue_ops(int time_passed)
{ {
if (!pgs_loaded) if (!pgs_loaded)
{ {
@ -277,22 +277,27 @@ void cluster_client_t::continue_ops(bool up_retry)
// Attempt to reenter the function // Attempt to reenter the function
return; return;
} }
int reset_duration = 0;
restart: restart:
continuing_ops = 1; continuing_ops = 1;
for (auto op = op_queue_head; op; ) for (auto op = op_queue_head; op; )
{ {
cluster_op_t *next_op = op->next; cluster_op_t *next_op = op->next;
if (!op->up_wait || up_retry) if (op->retry_after && time_passed)
{ {
op->up_wait = false; op->retry_after = op->retry_after > time_passed ? op->retry_after-time_passed : 0;
if (!op->prev_wait) if (op->retry_after && (!reset_duration || op->retry_after < reset_duration))
{ {
if (op->opcode == OSD_OP_SYNC) reset_duration = op->retry_after;
continue_sync(op);
else
continue_rw(op);
} }
} }
if (!op->retry_after && !op->prev_wait)
{
if (op->opcode == OSD_OP_SYNC)
continue_sync(op);
else
continue_rw(op);
}
op = next_op; op = next_op;
if (continuing_ops == 2) if (continuing_ops == 2)
{ {
@ -300,6 +305,27 @@ restart:
} }
} }
continuing_ops = 0; continuing_ops = 0;
reset_retry_timer(reset_duration);
}
void cluster_client_t::reset_retry_timer(int new_duration)
{
if (retry_timeout_duration && retry_timeout_duration <= new_duration || !new_duration)
{
return;
}
if (retry_timeout_id)
{
tfd->clear_timer(retry_timeout_id);
}
retry_timeout_duration = new_duration;
retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int)
{
int time_passed = retry_timeout_duration;
retry_timeout_id = 0;
retry_timeout_duration = 0;
continue_ops(time_passed);
});
} }
void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config) void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config)
@ -349,15 +375,25 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
{ {
client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH; client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH;
} }
// up_wait_retry_interval // client_retry_interval
up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value(); client_retry_interval = config["client_retry_interval"].uint64_value();
if (!up_wait_retry_interval) if (!client_retry_interval)
{ {
up_wait_retry_interval = 50; client_retry_interval = 50;
} }
else if (up_wait_retry_interval < 10) else if (client_retry_interval < 10)
{ {
up_wait_retry_interval = 10; client_retry_interval = 10;
}
// client_eio_retry_interval
client_eio_retry_interval = 1000;
if (!config["client_eio_retry_interval"].is_null())
{
client_eio_retry_interval = config["client_eio_retry_interval"].uint64_value();
if (client_eio_retry_interval && client_eio_retry_interval < 10)
{
client_eio_retry_interval = 10;
}
} }
// log_level // log_level
log_level = config["log_level"].uint64_value(); log_level = config["log_level"].uint64_value();
@ -716,15 +752,8 @@ resume_1:
// We'll need to retry again // We'll need to retry again
if (op->parts[i].flags & PART_RETRY) if (op->parts[i].flags & PART_RETRY)
{ {
op->up_wait = true; op->retry_after = client_retry_interval;
if (!retry_timeout_id) reset_retry_timer(client_retry_interval);
{
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
{
retry_timeout_id = 0;
continue_ops(true);
});
}
} }
op->state = 1; op->state = 1;
} }
@ -780,10 +809,9 @@ resume_2:
return 1; return 1;
} }
else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) && else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC) op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC)
{ {
// Fatal error (neither -EPIPE, -EIO nor -ENOSPC) // Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
// FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal
erase_op(op); erase_op(op);
return 1; return 1;
} }
@ -1171,16 +1199,12 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
// All next things like timer, continue_sync/rw and stop_client may affect the operation again // All next things like timer, continue_sync/rw and stop_client may affect the operation again
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs // So do all these things after modifying operation state, otherwise we may hit reenterability bugs
// FIXME postpone such things to set_immediate here to avoid bugs // FIXME postpone such things to set_immediate here to avoid bugs
// Mark op->up_wait = true to retry operation after a short pause (not immediately) // Set op->retry_after to retry operation after a short pause (not immediately)
op->up_wait = true; if (!op->retry_after)
if (!retry_timeout_id)
{ {
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) op->retry_after = op->retval == -EIO ? client_eio_retry_interval : client_retry_interval;
{
retry_timeout_id = 0;
continue_ops(true);
});
} }
reset_retry_timer(op->retry_after);
if (op->inflight_count == 0) if (op->inflight_count == 0)
{ {
if (op->opcode == OSD_OP_SYNC) if (op->opcode == OSD_OP_SYNC)

View File

@ -59,7 +59,7 @@ protected:
void *buf = NULL; void *buf = NULL;
cluster_op_t *orig_op = NULL; cluster_op_t *orig_op = NULL;
bool needs_reslice = false; bool needs_reslice = false;
bool up_wait = false; int retry_after = 0;
int inflight_count = 0, done_count = 0; int inflight_count = 0, done_count = 0;
std::vector<cluster_op_part_t> parts; std::vector<cluster_op_part_t> parts;
void *part_bitmaps = NULL; void *part_bitmaps = NULL;
@ -92,9 +92,11 @@ class cluster_client_t
uint64_t client_max_writeback_iodepth = 0; uint64_t client_max_writeback_iodepth = 0;
int log_level = 0; int log_level = 0;
int up_wait_retry_interval = 500; // ms int client_retry_interval = 50; // ms
int client_eio_retry_interval = 1000; // ms
int retry_timeout_id = 0; int retry_timeout_id = 0;
int retry_timeout_duration = 0;
std::vector<cluster_op_t*> offline_ops; std::vector<cluster_op_t*> offline_ops;
cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL; cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
writeback_cache_t *wb = NULL; writeback_cache_t *wb = NULL;
@ -131,7 +133,7 @@ public:
bool get_immediate_commit(uint64_t inode); bool get_immediate_commit(uint64_t inode);
void continue_ops(bool up_retry = false); void continue_ops(int time_passed = 0);
inode_list_t *list_inode_start(inode_t inode, inode_list_t *list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback); std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
int list_pg_count(inode_list_t *lst); int list_pg_count(inode_list_t *lst);
@ -152,6 +154,7 @@ protected:
int continue_rw(cluster_op_t *op); int continue_rw(cluster_op_t *op);
bool check_rw(cluster_op_t *op); bool check_rw(cluster_op_t *op);
void slice_rw(cluster_op_t *op); void slice_rw(cluster_op_t *op);
void reset_retry_timer(int new_duration);
bool try_send(cluster_op_t *op, int i); bool try_send(cluster_op_t *op, int i);
int continue_sync(cluster_op_t *op); int continue_sync(cluster_op_t *op);
void send_sync(cluster_op_t *op, cluster_op_part_t *part); void send_sync(cluster_op_t *op, cluster_op_part_t *part);

View File

@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor Name: Vitastor
Description: Vitastor client library Description: Vitastor client library
Version: 1.4.7 Version: 1.4.8
Libs: -L${libdir} -lvitastor_client Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir} Cflags: -I${includedir}