Compare commits

..

No commits in common. "4cf6dceed730fa628cf3cdf1c2ec053a7f82ddea" and "4eab26f968a457263480e2e57cfc00828645c750" have entirely different histories.

35 changed files with 125 additions and 217 deletions

View File

@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "1.4.8")
set(VERSION "1.4.7")
add_subdirectory(src)

View File

@ -1,4 +1,4 @@
VERSION ?= v1.4.8
VERSION ?= v1.4.7
all: build push

View File

@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.4.8
image: vitalif/vitastor-csi:v1.4.7
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@ -121,7 +121,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.4.8
image: vitalif/vitastor-csi:v1.4.7
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.4.8"
vitastorCSIDriverVersion = "1.4.7"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@ -1,4 +1,4 @@
vitastor (1.4.8-1) unstable; urgency=medium
vitastor (1.4.7-1) unstable; urgency=medium
* Bugfixes

View File

@ -37,8 +37,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-1.4.8; \
cd vitastor-1.4.8; \
cp -r /root/vitastor vitastor-1.4.7; \
cd vitastor-1.4.7; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -51,8 +51,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.8.orig.tar.xz vitastor-1.4.8; \
cd vitastor-1.4.8; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.7.orig.tar.xz vitastor-1.4.7; \
cd vitastor-1.4.7; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@ -9,8 +9,6 @@
These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
affect their interaction with the cluster.
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback)
@ -21,26 +19,6 @@ affect their interaction with the cluster.
- [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part)
## client_retry_interval
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
Retry time for I/O requests failed due to inactive PGs or network
connectivity errors.
## client_eio_retry_interval
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Retry time for I/O requests failed due to data corruption or unfinished
EC object deletions (has_incomplete PG state). 0 disables such retries
and clients are not blocked and just get EIO error code instead.
## client_max_dirty_bytes
- Type: integer

View File

@ -9,8 +9,6 @@
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
затрагивают логику их работы с кластером.
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback)
@ -21,27 +19,6 @@
- [nbd_max_devices](#nbd_max_devices)
- [nbd_max_part](#nbd_max_part)
## client_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
ошибок сети.
## client_eio_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
этого просто получают код ошибки EIO.
## client_max_dirty_bytes
- Тип: целое число

View File

@ -25,6 +25,7 @@ between clients, OSDs and etcd.
- [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout)
- [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
- [max_etcd_attempts](#max_etcd_attempts)
- [etcd_quick_timeout](#etcd_quick_timeout)
- [etcd_slow_timeout](#etcd_slow_timeout)
@ -211,6 +212,17 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt
is scheduled.
## up_wait_retry_interval
- Type: milliseconds
- Default: 50
- Minimum: 10
- Can be changed online: yes
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
the time for the clients to wait before re-attempting such I/O requests.
## max_etcd_attempts
- Type: integer

View File

@ -25,6 +25,7 @@
- [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout)
- [osd_ping_timeout](#osd_ping_timeout)
- [up_wait_retry_interval](#up_wait_retry_interval)
- [max_etcd_attempts](#max_etcd_attempts)
- [etcd_quick_timeout](#etcd_quick_timeout)
- [etcd_slow_timeout](#etcd_slow_timeout)
@ -220,6 +221,19 @@ OSD в любом случае согласовывают реальное зн
Если OSD не отвечает за это время, соединение отключается и производится
повторная попытка соединения.
## up_wait_retry_interval
- Тип: миллисекунды
- Значение по умолчанию: 50
- Минимальное значение: 10
- Можно менять на лету: да
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
они отвечают клиентам специальным кодом ошибки, означающим, что клиент
должен некоторое время подождать перед повторением запроса. Именно это время
ожидания задаёт данный параметр.
## max_etcd_attempts
- Тип: целое число

View File

@ -1,27 +1,3 @@
- name: client_retry_interval
type: ms
min: 10
default: 50
online: true
info: |
Retry time for I/O requests failed due to inactive PGs or network
connectivity errors.
info_ru: |
Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или
ошибок сети.
- name: client_eio_retry_interval
type: ms
default: 1000
online: true
info: |
Retry time for I/O requests failed due to data corruption or unfinished
EC object deletions (has_incomplete PG state). 0 disables such retries
and clients are not blocked and just get EIO error code instead.
info_ru: |
Время повтора запросов ввода-вывода, неудачных из-за повреждения данных
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
этого просто получают код ошибки EIO.
- name: client_max_dirty_bytes
type: int
default: 33554432

View File

@ -243,6 +243,21 @@
Максимальное время ожидания ответа на запрос проверки состояния соединения.
Если OSD не отвечает за это время, соединение отключается и производится
повторная попытка соединения.
- name: up_wait_retry_interval
type: ms
min: 10
default: 50
online: true
info: |
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
the time for the clients to wait before re-attempting such I/O requests.
info_ru: |
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
они отвечают клиентам специальным кодом ошибки, означающим, что клиент
должен некоторое время подождать перед повторением запроса. Именно это время
ожидания задаёт данный параметр.
- name: max_etcd_attempts
type: int
default: 5

View File

@ -135,18 +135,19 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
Rename, resize image or change its readonly status. Images with children can't be made read-write.
If the new size is smaller than the old size, extra data will be purged.
You should resize file system in the image, if present, before shrinking it.
| `-f|--force` | Proceed with shrinking or setting readwrite flag even if the image has children. |
| `--down-ok` | Proceed with shrinking even if some data will be left on unavailable OSDs. |
```
-f|--force Proceed with shrinking or setting readwrite flag even if the image has children.
```
## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
`vitastor-cli rm <from> [<to>] [--writers-stopped]`
Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`),
rebasing all their children accordingly. --writers-stopped allows merging to be a bit
@ -154,10 +155,6 @@ more effective in case of a single 'slim' read-write child and 'fat' removed par
the child is merged into parent and parent is renamed to child in that case.
In other cases parent layers are always merged into children.
Other options:
| `--down-ok` | Continue deletion/merging even if some data will be left on unavailable OSDs. |
## flatten
`vitastor-cli flatten <layer>`

View File

@ -136,7 +136,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]`
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения"
и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя.
@ -144,12 +144,13 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
образа сначала уменьшите файловую систему в нём.
| -f|--force | Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. |
| --down-ok | Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD. |
```
-f|--force Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
```
## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
`vitastor-cli rm <from> [<to>] [--writers-stopped]`
Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним
образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть).
@ -161,10 +162,6 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
В других случаях родительские слои вливаются в дочерние.
Другие опции:
| `--down-ok` | Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD. |
## flatten
`vitastor-cli flatten <layer>`

View File

@ -86,14 +86,13 @@ const etcd_tree = {
client_max_buffered_bytes: 33554432,
client_max_buffered_ops: 1024,
client_max_writeback_iodepth: 256,
client_retry_interval: 50, // ms. min: 10
client_eio_retry_interval: 1000, // ms
// client and osd - configurable online
log_level: 0,
peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1
osd_idle_timeout: 5, // seconds. min: 1
osd_ping_timeout: 5, // seconds. min: 1
up_wait_retry_interval: 50, // ms. min: 10
max_etcd_attempts: 5,
etcd_quick_timeout: 1000, // ms
etcd_slow_timeout: 5000, // ms

View File

@ -1,6 +1,6 @@
{
"name": "vitastor-mon",
"version": "1.4.8",
"version": "1.4.7",
"description": "Vitastor SDS monitor service",
"main": "mon-main.js",
"scripts": {

View File

@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '1.4.8'
VERSION = '1.4.7'
LOG = logging.getLogger(__name__)

View File

@ -24,4 +24,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-1.4.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.8$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-1.4.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.7$(rpm --eval '%dist').tar.gz *

View File

@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.8.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.7.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.8
Version: 1.4.7
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.8.el7.tar.gz
Source0: vitastor-1.4.7.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.8.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.7.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.8
Version: 1.4.7
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.8.el8.tar.gz
Source0: vitastor-1.4.7.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.8.el9.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.7.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.8
Version: 1.4.7
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.8.el9.tar.gz
Source0: vitastor-1.4.7.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="1.4.8")
add_definitions(-DVERSION="1.4.7")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})

View File

@ -46,21 +46,18 @@ static const char* help_text =
"vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>\n"
" Create a snapshot of image <name>. May be used live if only a single writer is active.\n"
"\n"
"vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force] [--down-ok]\n"
"vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]\n"
" Rename, resize image or change its readonly status. Images with children can't be made read-write.\n"
" If the new size is smaller than the old size, extra data will be purged.\n"
" You should resize file system in the image, if present, before shrinking it.\n"
" -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n"
" --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n"
"\n"
"vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]\n"
"vitastor-cli rm <from> [<to>] [--writers-stopped]\n"
" Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n"
" rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n"
" more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n"
" the child is merged into parent and parent is renamed to child in that case.\n"
" In other cases parent layers are always merged into children.\n"
" Other options:\n"
" --down-ok Continue deletion/merging even if some data will be left on unavailable OSDs.\n"
"\n"
"vitastor-cli flatten <layer>\n"
" Flatten a layer, i.e. merge data and detach it from parents.\n"
@ -173,7 +170,7 @@ static const char* help_text =
" --parallel_osds M Work with M osds in parallel when possible (default 4)\n"
" --progress 1|0 Report progress (default 1)\n"
" --cas 1|0 Use CAS writes for flatten, merge, rm (default is decide automatically)\n"
" --color 1|0 Enable/disable colored output and CR symbols (default 1 if stdout is a terminal)\n"
" --no-color Disable colored output\n"
" --json JSON output\n"
;
@ -224,7 +221,6 @@ static json11::Json::object parse_args(int narg, const char *args[])
!strcmp(opt, "readonly") || !strcmp(opt, "readwrite") ||
!strcmp(opt, "force") || !strcmp(opt, "reverse") ||
!strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") ||
!strcmp(opt, "down-ok") || !strcmp(opt, "down_ok") ||
!strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") ||
!strcmp(opt, "help") || !strcmp(opt, "all") ||
!strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped"))

View File

@ -1,7 +1,6 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <unistd.h>
#include "str_util.h"
#include "cluster_client.h"
#include "cli.h"
@ -114,12 +113,7 @@ void cli_tool_t::parse_config(json11::Json::object & cfg)
else
kv_it++;
}
if (cfg.find("no_color") != cfg.end())
color = !cfg["no_color"].bool_value();
else if (cfg.find("color") != cfg.end())
color = cfg["color"].bool_value();
else
color = isatty(1);
color = !cfg["no_color"].bool_value();
json_output = cfg["json"].bool_value();
iodepth = cfg["iodepth"].uint64_value();
if (!iodepth)

View File

@ -275,9 +275,7 @@ struct snap_merger_t
processed++;
if (parent->progress && !(processed % 128))
{
fprintf(stderr, parent->color
? "\rFiltering target blocks: %ju/%ju"
: "Filtering target blocks: %ju/%ju\n", processed, to_process);
printf("\rFiltering target blocks: %ju/%ju", processed, to_process);
}
}
if (in_flight > 0 || oit != merge_offsets.end())
@ -287,9 +285,7 @@ struct snap_merger_t
}
if (parent->progress)
{
fprintf(stderr, parent->color
? "\r%ju full blocks of target filtered out\n"
: "%ju full blocks of target filtered out\n", to_process-merge_offsets.size());
printf("\r%ju full blocks of target filtered out\n", to_process-merge_offsets.size());
}
}
state = 3;
@ -324,9 +320,7 @@ struct snap_merger_t
processed++;
if (parent->progress && !(processed % 128))
{
fprintf(stderr, parent->color
? "\rOverwriting blocks: %ju/%ju"
: "Overwriting blocks: %ju/%ju\n", processed, to_process);
printf("\rOverwriting blocks: %ju/%ju", processed, to_process);
}
}
if (in_flight == 0 && rwo_error.size())
@ -345,9 +339,7 @@ struct snap_merger_t
}
if (parent->progress)
{
fprintf(stderr, parent->color
? "\rOverwriting blocks: %ju/%ju\n"
: "Overwriting blocks: %ju/%ju\n", to_process, to_process);
printf("\rOverwriting blocks: %ju/%ju\n", to_process, to_process);
}
// Done
result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name, .data = json11::Json::object {

View File

@ -15,7 +15,6 @@ struct image_changer_t
uint64_t new_size = 0;
bool force_size = false, inc_size = false;
bool set_readonly = false, set_readwrite = false, force = false;
bool down_ok = false;
// interval between fsyncs
int fsync_interval = 128;
@ -109,7 +108,6 @@ struct image_changer_t
{ "pool", (uint64_t)INODE_POOL(inode_num) },
{ "fsync-interval", fsync_interval },
{ "min-offset", ((new_size+4095)/4096)*4096 },
{ "down-ok", down_ok },
});
resume_1:
while (!cb(result))
@ -254,7 +252,6 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_modify(json11::Json cfg)
changer->fsync_interval = cfg["fsync_interval"].uint64_value();
if (!changer->fsync_interval)
changer->fsync_interval = 128;
changer->down_ok = cfg["down_ok"].bool_value();
// FIXME Check that the image doesn't have children when shrinking
return [changer](cli_result_t & result)
{

View File

@ -53,8 +53,6 @@ struct snap_remover_t
int use_cas = 1;
// interval between fsyncs
int fsync_interval = 128;
// ignore deletion errors
bool down_ok = false;
std::map<inode_t,int> sources;
std::map<inode_t,uint64_t> inode_used;
@ -682,7 +680,6 @@ resume_100:
{ "inode", inode },
{ "pool", (uint64_t)INODE_POOL(inode) },
{ "fsync-interval", fsync_interval },
{ "down-ok", down_ok },
});
}
};
@ -694,7 +691,6 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm(json11::Json cfg)
snap_remover->from_name = cfg["from"].string_value();
snap_remover->to_name = cfg["to"].string_value();
snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value();
snap_remover->down_ok = cfg["down_ok"].bool_value();
if (!snap_remover->fsync_interval)
snap_remover->fsync_interval = 128;
if (!cfg["cas"].is_null())

View File

@ -25,7 +25,6 @@ struct rm_inode_t
uint64_t inode = 0;
pool_id_t pool_id = 0;
uint64_t min_offset = 0;
bool down_ok = false;
cli_tool_t *parent = NULL;
inode_list_t *lister = NULL;
@ -213,9 +212,7 @@ struct rm_inode_t
}
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
{
fprintf(stderr, parent->color
? "\rRemoved %ju/%ju objects, %ju more PGs to list..."
: "Removed %ju/%ju objects, %ju more PGs to list...\n", total_done, total_count, pgs_to_list);
fprintf(stderr, "\rRemoved %ju/%ju objects, %ju more PGs to list...", total_done, total_count, pgs_to_list);
total_prev_pct = total_done*1000/total_count;
}
if (lists_done && !lists.size())
@ -224,18 +221,17 @@ struct rm_inode_t
{
fprintf(stderr, "\n");
}
bool is_error = (total_done < total_count || inactive_osds.size() > 0 || error_count > 0);
if (parent->progress && is_error)
if (parent->progress && (total_done < total_count || inactive_osds.size() > 0 || error_count > 0))
{
fprintf(
stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n"
"Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n",
" Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n",
pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode)
);
}
result = (cli_result_t){
.err = is_error && !down_ok ? EIO : 0,
.text = is_error ? "Some blocks were not removed" : (
.err = error_count > 0 ? EIO : 0,
.text = error_count > 0 ? "Some blocks were not removed" : (
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
std::to_string(pool_id)+" removed"),
.data = json11::Json::object {
@ -284,7 +280,6 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm_data(json11::Json cfg)
{
remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
}
remover->down_ok = cfg["down_ok"].bool_value();
remover->pool_id = INODE_POOL(remover->inode);
remover->min_offset = cfg["min_offset"].uint64_value();
return [remover](cli_result_t & result)

View File

@ -265,7 +265,7 @@ void cluster_client_t::erase_op(cluster_op_t *op)
}
}
void cluster_client_t::continue_ops(int time_passed)
void cluster_client_t::continue_ops(bool up_retry)
{
if (!pgs_loaded)
{
@ -277,27 +277,22 @@ void cluster_client_t::continue_ops(int time_passed)
// Attempt to reenter the function
return;
}
int reset_duration = 0;
restart:
continuing_ops = 1;
for (auto op = op_queue_head; op; )
{
cluster_op_t *next_op = op->next;
if (op->retry_after && time_passed)
if (!op->up_wait || up_retry)
{
op->retry_after = op->retry_after > time_passed ? op->retry_after-time_passed : 0;
if (op->retry_after && (!reset_duration || op->retry_after < reset_duration))
op->up_wait = false;
if (!op->prev_wait)
{
reset_duration = op->retry_after;
if (op->opcode == OSD_OP_SYNC)
continue_sync(op);
else
continue_rw(op);
}
}
if (!op->retry_after && !op->prev_wait)
{
if (op->opcode == OSD_OP_SYNC)
continue_sync(op);
else
continue_rw(op);
}
op = next_op;
if (continuing_ops == 2)
{
@ -305,27 +300,6 @@ restart:
}
}
continuing_ops = 0;
reset_retry_timer(reset_duration);
}
void cluster_client_t::reset_retry_timer(int new_duration)
{
if (retry_timeout_duration && retry_timeout_duration <= new_duration || !new_duration)
{
return;
}
if (retry_timeout_id)
{
tfd->clear_timer(retry_timeout_id);
}
retry_timeout_duration = new_duration;
retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int)
{
int time_passed = retry_timeout_duration;
retry_timeout_id = 0;
retry_timeout_duration = 0;
continue_ops(time_passed);
});
}
void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config)
@ -375,25 +349,15 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
{
client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH;
}
// client_retry_interval
client_retry_interval = config["client_retry_interval"].uint64_value();
if (!client_retry_interval)
// up_wait_retry_interval
up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value();
if (!up_wait_retry_interval)
{
client_retry_interval = 50;
up_wait_retry_interval = 50;
}
else if (client_retry_interval < 10)
else if (up_wait_retry_interval < 10)
{
client_retry_interval = 10;
}
// client_eio_retry_interval
client_eio_retry_interval = 1000;
if (!config["client_eio_retry_interval"].is_null())
{
client_eio_retry_interval = config["client_eio_retry_interval"].uint64_value();
if (client_eio_retry_interval && client_eio_retry_interval < 10)
{
client_eio_retry_interval = 10;
}
up_wait_retry_interval = 10;
}
// log_level
log_level = config["log_level"].uint64_value();
@ -752,8 +716,15 @@ resume_1:
// We'll need to retry again
if (op->parts[i].flags & PART_RETRY)
{
op->retry_after = client_retry_interval;
reset_retry_timer(client_retry_interval);
op->up_wait = true;
if (!retry_timeout_id)
{
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
{
retry_timeout_id = 0;
continue_ops(true);
});
}
}
op->state = 1;
}
@ -809,9 +780,10 @@ resume_2:
return 1;
}
else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC)
op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC)
{
// Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
// FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal
erase_op(op);
return 1;
}
@ -1199,12 +1171,16 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
// All next things like timer, continue_sync/rw and stop_client may affect the operation again
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs
// FIXME postpone such things to set_immediate here to avoid bugs
// Set op->retry_after to retry operation after a short pause (not immediately)
if (!op->retry_after)
// Mark op->up_wait = true to retry operation after a short pause (not immediately)
op->up_wait = true;
if (!retry_timeout_id)
{
op->retry_after = op->retval == -EIO ? client_eio_retry_interval : client_retry_interval;
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
{
retry_timeout_id = 0;
continue_ops(true);
});
}
reset_retry_timer(op->retry_after);
if (op->inflight_count == 0)
{
if (op->opcode == OSD_OP_SYNC)

View File

@ -59,7 +59,7 @@ protected:
void *buf = NULL;
cluster_op_t *orig_op = NULL;
bool needs_reslice = false;
int retry_after = 0;
bool up_wait = false;
int inflight_count = 0, done_count = 0;
std::vector<cluster_op_part_t> parts;
void *part_bitmaps = NULL;
@ -92,11 +92,9 @@ class cluster_client_t
uint64_t client_max_writeback_iodepth = 0;
int log_level = 0;
int client_retry_interval = 50; // ms
int client_eio_retry_interval = 1000; // ms
int up_wait_retry_interval = 500; // ms
int retry_timeout_id = 0;
int retry_timeout_duration = 0;
std::vector<cluster_op_t*> offline_ops;
cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
writeback_cache_t *wb = NULL;
@ -133,7 +131,7 @@ public:
bool get_immediate_commit(uint64_t inode);
void continue_ops(int time_passed = 0);
void continue_ops(bool up_retry = false);
inode_list_t *list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
int list_pg_count(inode_list_t *lst);
@ -154,7 +152,6 @@ protected:
int continue_rw(cluster_op_t *op);
bool check_rw(cluster_op_t *op);
void slice_rw(cluster_op_t *op);
void reset_retry_timer(int new_duration);
bool try_send(cluster_op_t *op, int i);
int continue_sync(cluster_op_t *op);
void send_sync(cluster_op_t *op, cluster_op_part_t *part);

View File

@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor
Description: Vitastor client library
Version: 1.4.8
Version: 1.4.7
Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir}