From 6bf1f539a6b38e8a596f0fcdbf3c8452fddad8b8 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Wed, 28 Feb 2024 00:51:13 +0300 Subject: [PATCH 1/4] Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval --- docs/config/client.en.md | 22 +++++++++ docs/config/client.ru.md | 23 ++++++++++ docs/config/network.en.md | 12 ----- docs/config/network.ru.md | 14 ------ docs/config/src/client.yml | 24 ++++++++++ docs/config/src/network.yml | 15 ------- mon/mon.js | 3 +- src/cluster_client.cpp | 90 +++++++++++++++++++++++-------------- src/cluster_client.h | 9 ++-- 9 files changed, 134 insertions(+), 78 deletions(-) diff --git a/docs/config/client.en.md b/docs/config/client.en.md index 0cd2420e..149f48ba 100644 --- a/docs/config/client.en.md +++ b/docs/config/client.en.md @@ -9,6 +9,8 @@ These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and affect their interaction with the cluster. +- [client_retry_interval](#client_retry_interval) +- [client_eio_retry_interval](#client_eio_retry_interval) - [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_ops](#client_max_dirty_ops) - [client_enable_writeback](#client_enable_writeback) @@ -19,6 +21,26 @@ affect their interaction with the cluster. - [nbd_max_devices](#nbd_max_devices) - [nbd_max_part](#nbd_max_part) +## client_retry_interval + +- Type: milliseconds +- Default: 50 +- Minimum: 10 +- Can be changed online: yes + +Retry time for I/O requests failed due to inactive PGs or network +connectivity errors. + +## client_eio_retry_interval + +- Type: milliseconds +- Default: 1000 +- Can be changed online: yes + +Retry time for I/O requests failed due to data corruption or unfinished +EC object deletions (has_incomplete PG state). 0 disables such retries +and clients are not blocked and just get EIO error code instead. + ## client_max_dirty_bytes - Type: integer diff --git a/docs/config/client.ru.md b/docs/config/client.ru.md index bc96e537..e714fe31 100644 --- a/docs/config/client.ru.md +++ b/docs/config/client.ru.md @@ -9,6 +9,8 @@ Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и затрагивают логику их работы с кластером. +- [client_retry_interval](#client_retry_interval) +- [client_eio_retry_interval](#client_eio_retry_interval) - [client_max_dirty_bytes](#client_max_dirty_bytes) - [client_max_dirty_ops](#client_max_dirty_ops) - [client_enable_writeback](#client_enable_writeback) @@ -19,6 +21,27 @@ - [nbd_max_devices](#nbd_max_devices) - [nbd_max_part](#nbd_max_part) +## client_retry_interval + +- Тип: миллисекунды +- Значение по умолчанию: 50 +- Минимальное значение: 10 +- Можно менять на лету: да + +Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или +ошибок сети. + +## client_eio_retry_interval + +- Тип: миллисекунды +- Значение по умолчанию: 1000 +- Можно менять на лету: да + +Время повтора запросов ввода-вывода, неудачных из-за повреждения данных +или незавершённых удалений EC-объектов (состояния PG has_incomplete). +0 отключает повторы таких запросов и клиенты не блокируются, а вместо +этого просто получают код ошибки EIO. + ## client_max_dirty_bytes - Тип: целое число diff --git a/docs/config/network.en.md b/docs/config/network.en.md index a28464ee..85ffd1a5 100644 --- a/docs/config/network.en.md +++ b/docs/config/network.en.md @@ -25,7 +25,6 @@ between clients, OSDs and etcd. - [peer_connect_timeout](#peer_connect_timeout) - [osd_idle_timeout](#osd_idle_timeout) - [osd_ping_timeout](#osd_ping_timeout) -- [up_wait_retry_interval](#up_wait_retry_interval) - [max_etcd_attempts](#max_etcd_attempts) - [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_slow_timeout](#etcd_slow_timeout) @@ -212,17 +211,6 @@ Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond within this time, the connection to it is dropped and a reconnection attempt is scheduled. -## up_wait_retry_interval - -- Type: milliseconds -- Default: 50 -- Minimum: 10 -- Can be changed online: yes - -OSDs respond to clients with a special error code when they receive I/O -requests for a PG that's not synchronized and started. This parameter sets -the time for the clients to wait before re-attempting such I/O requests. - ## max_etcd_attempts - Type: integer diff --git a/docs/config/network.ru.md b/docs/config/network.ru.md index 1d3ceaa0..f97d7c9f 100644 --- a/docs/config/network.ru.md +++ b/docs/config/network.ru.md @@ -25,7 +25,6 @@ - [peer_connect_timeout](#peer_connect_timeout) - [osd_idle_timeout](#osd_idle_timeout) - [osd_ping_timeout](#osd_ping_timeout) -- [up_wait_retry_interval](#up_wait_retry_interval) - [max_etcd_attempts](#max_etcd_attempts) - [etcd_quick_timeout](#etcd_quick_timeout) - [etcd_slow_timeout](#etcd_slow_timeout) @@ -221,19 +220,6 @@ OSD в любом случае согласовывают реальное зн Если OSD не отвечает за это время, соединение отключается и производится повторная попытка соединения. -## up_wait_retry_interval - -- Тип: миллисекунды -- Значение по умолчанию: 50 -- Минимальное значение: 10 -- Можно менять на лету: да - -Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не -поднятым на данный момент на них PG, либо к PG в процессе синхронизации, -они отвечают клиентам специальным кодом ошибки, означающим, что клиент -должен некоторое время подождать перед повторением запроса. Именно это время -ожидания задаёт данный параметр. - ## max_etcd_attempts - Тип: целое число diff --git a/docs/config/src/client.yml b/docs/config/src/client.yml index 3bebd783..7ca579cb 100644 --- a/docs/config/src/client.yml +++ b/docs/config/src/client.yml @@ -1,3 +1,27 @@ +- name: client_retry_interval + type: ms + min: 10 + default: 50 + online: true + info: | + Retry time for I/O requests failed due to inactive PGs or network + connectivity errors. + info_ru: | + Время повтора запросов ввода-вывода, неудачных из-за неактивных PG или + ошибок сети. +- name: client_eio_retry_interval + type: ms + default: 1000 + online: true + info: | + Retry time for I/O requests failed due to data corruption or unfinished + EC object deletions (has_incomplete PG state). 0 disables such retries + and clients are not blocked and just get EIO error code instead. + info_ru: | + Время повтора запросов ввода-вывода, неудачных из-за повреждения данных + или незавершённых удалений EC-объектов (состояния PG has_incomplete). + 0 отключает повторы таких запросов и клиенты не блокируются, а вместо + этого просто получают код ошибки EIO. - name: client_max_dirty_bytes type: int default: 33554432 diff --git a/docs/config/src/network.yml b/docs/config/src/network.yml index 5bd2c808..ea0c7438 100644 --- a/docs/config/src/network.yml +++ b/docs/config/src/network.yml @@ -243,21 +243,6 @@ Максимальное время ожидания ответа на запрос проверки состояния соединения. Если OSD не отвечает за это время, соединение отключается и производится повторная попытка соединения. -- name: up_wait_retry_interval - type: ms - min: 10 - default: 50 - online: true - info: | - OSDs respond to clients with a special error code when they receive I/O - requests for a PG that's not synchronized and started. This parameter sets - the time for the clients to wait before re-attempting such I/O requests. - info_ru: | - Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не - поднятым на данный момент на них PG, либо к PG в процессе синхронизации, - они отвечают клиентам специальным кодом ошибки, означающим, что клиент - должен некоторое время подождать перед повторением запроса. Именно это время - ожидания задаёт данный параметр. - name: max_etcd_attempts type: int default: 5 diff --git a/mon/mon.js b/mon/mon.js index 6e8864c9..060d5c4a 100644 --- a/mon/mon.js +++ b/mon/mon.js @@ -86,13 +86,14 @@ const etcd_tree = { client_max_buffered_bytes: 33554432, client_max_buffered_ops: 1024, client_max_writeback_iodepth: 256, + client_retry_interval: 50, // ms. min: 10 + client_eio_retry_interval: 1000, // ms // client and osd - configurable online log_level: 0, peer_connect_interval: 5, // seconds. min: 1 peer_connect_timeout: 5, // seconds. min: 1 osd_idle_timeout: 5, // seconds. min: 1 osd_ping_timeout: 5, // seconds. min: 1 - up_wait_retry_interval: 50, // ms. min: 10 max_etcd_attempts: 5, etcd_quick_timeout: 1000, // ms etcd_slow_timeout: 5000, // ms diff --git a/src/cluster_client.cpp b/src/cluster_client.cpp index 1a72874b..8e679f4a 100644 --- a/src/cluster_client.cpp +++ b/src/cluster_client.cpp @@ -265,7 +265,7 @@ void cluster_client_t::erase_op(cluster_op_t *op) } } -void cluster_client_t::continue_ops(bool up_retry) +void cluster_client_t::continue_ops(int time_passed) { if (!pgs_loaded) { @@ -277,22 +277,27 @@ void cluster_client_t::continue_ops(bool up_retry) // Attempt to reenter the function return; } + int reset_duration = 0; restart: continuing_ops = 1; for (auto op = op_queue_head; op; ) { cluster_op_t *next_op = op->next; - if (!op->up_wait || up_retry) + if (op->retry_after && time_passed) { - op->up_wait = false; - if (!op->prev_wait) + op->retry_after = op->retry_after > time_passed ? op->retry_after-time_passed : 0; + if (op->retry_after && (!reset_duration || op->retry_after < reset_duration)) { - if (op->opcode == OSD_OP_SYNC) - continue_sync(op); - else - continue_rw(op); + reset_duration = op->retry_after; } } + if (!op->retry_after && !op->prev_wait) + { + if (op->opcode == OSD_OP_SYNC) + continue_sync(op); + else + continue_rw(op); + } op = next_op; if (continuing_ops == 2) { @@ -300,6 +305,27 @@ restart: } } continuing_ops = 0; + reset_retry_timer(reset_duration); +} + +void cluster_client_t::reset_retry_timer(int new_duration) +{ + if (retry_timeout_duration && retry_timeout_duration <= new_duration || !new_duration) + { + return; + } + if (retry_timeout_id) + { + tfd->clear_timer(retry_timeout_id); + } + retry_timeout_duration = new_duration; + retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int) + { + int time_passed = retry_timeout_duration; + retry_timeout_id = 0; + retry_timeout_duration = 0; + continue_ops(time_passed); + }); } void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config) @@ -349,15 +375,25 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co { client_max_writeback_iodepth = DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH; } - // up_wait_retry_interval - up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value(); - if (!up_wait_retry_interval) + // client_retry_interval + client_retry_interval = config["client_retry_interval"].uint64_value(); + if (!client_retry_interval) { - up_wait_retry_interval = 50; + client_retry_interval = 50; } - else if (up_wait_retry_interval < 10) + else if (client_retry_interval < 10) { - up_wait_retry_interval = 10; + client_retry_interval = 10; + } + // client_eio_retry_interval + client_eio_retry_interval = 1000; + if (!config["client_eio_retry_interval"].is_null()) + { + client_eio_retry_interval = config["client_eio_retry_interval"].uint64_value(); + if (client_eio_retry_interval && client_eio_retry_interval < 10) + { + client_eio_retry_interval = 10; + } } // log_level log_level = config["log_level"].uint64_value(); @@ -716,15 +752,8 @@ resume_1: // We'll need to retry again if (op->parts[i].flags & PART_RETRY) { - op->up_wait = true; - if (!retry_timeout_id) - { - retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) - { - retry_timeout_id = 0; - continue_ops(true); - }); - } + op->retry_after = client_retry_interval; + reset_retry_timer(client_retry_interval); } op->state = 1; } @@ -780,10 +809,9 @@ resume_2: return 1; } else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) && - op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC) + op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC) { // Fatal error (neither -EPIPE, -EIO nor -ENOSPC) - // FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal erase_op(op); return 1; } @@ -1171,16 +1199,12 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part) // All next things like timer, continue_sync/rw and stop_client may affect the operation again // So do all these things after modifying operation state, otherwise we may hit reenterability bugs // FIXME postpone such things to set_immediate here to avoid bugs - // Mark op->up_wait = true to retry operation after a short pause (not immediately) - op->up_wait = true; - if (!retry_timeout_id) + // Set op->retry_after to retry operation after a short pause (not immediately) + if (!op->retry_after) { - retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) - { - retry_timeout_id = 0; - continue_ops(true); - }); + op->retry_after = op->retval == -EIO ? client_eio_retry_interval : client_retry_interval; } + reset_retry_timer(op->retry_after); if (op->inflight_count == 0) { if (op->opcode == OSD_OP_SYNC) diff --git a/src/cluster_client.h b/src/cluster_client.h index 140f7a32..89ca4bfc 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -59,7 +59,7 @@ protected: void *buf = NULL; cluster_op_t *orig_op = NULL; bool needs_reslice = false; - bool up_wait = false; + int retry_after = 0; int inflight_count = 0, done_count = 0; std::vector parts; void *part_bitmaps = NULL; @@ -92,9 +92,11 @@ class cluster_client_t uint64_t client_max_writeback_iodepth = 0; int log_level = 0; - int up_wait_retry_interval = 500; // ms + int client_retry_interval = 50; // ms + int client_eio_retry_interval = 1000; // ms int retry_timeout_id = 0; + int retry_timeout_duration = 0; std::vector offline_ops; cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL; writeback_cache_t *wb = NULL; @@ -131,7 +133,7 @@ public: bool get_immediate_commit(uint64_t inode); - void continue_ops(bool up_retry = false); + void continue_ops(int time_passed = 0); inode_list_t *list_inode_start(inode_t inode, std::function&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback); int list_pg_count(inode_list_t *lst); @@ -152,6 +154,7 @@ protected: int continue_rw(cluster_op_t *op); bool check_rw(cluster_op_t *op); void slice_rw(cluster_op_t *op); + void reset_retry_timer(int new_duration); bool try_send(cluster_op_t *op, int i); int continue_sync(cluster_op_t *op); void send_sync(cluster_op_t *op, cluster_op_part_t *part); From 5af23672d0f98d2c9b974437b3517a014a672e42 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Wed, 28 Feb 2024 21:08:25 +0300 Subject: [PATCH 2/4] Fix rm/rm-data error return code, add --down-ok option to bypass the error --- docs/usage/cli.en.md | 13 ++++++++----- docs/usage/cli.ru.md | 13 ++++++++----- src/cli.cpp | 8 ++++++-- src/cli_modify.cpp | 3 +++ src/cli_rm.cpp | 4 ++++ src/cli_rm_data.cpp | 11 +++++++---- 6 files changed, 36 insertions(+), 16 deletions(-) diff --git a/docs/usage/cli.en.md b/docs/usage/cli.en.md index b8adcb5b..cfc65117 100644 --- a/docs/usage/cli.en.md +++ b/docs/usage/cli.en.md @@ -131,19 +131,18 @@ See also about [how to export snapshots](qemu.en.md#exporting-snapshots). ## modify -`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]` +`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]` Rename, resize image or change its readonly status. Images with children can't be made read-write. If the new size is smaller than the old size, extra data will be purged. You should resize file system in the image, if present, before shrinking it. -``` --f|--force Proceed with shrinking or setting readwrite flag even if the image has children. -``` +| `-f|--force` | Proceed with shrinking or setting readwrite flag even if the image has children. | +| `--down-ok` | Proceed with shrinking even if some data will be left on unavailable OSDs. | ## rm -`vitastor-cli rm [] [--writers-stopped]` +`vitastor-cli rm [] [--writers-stopped] [--down-ok]` Remove `` or all layers between `` and `` (`` must be a child of ``), rebasing all their children accordingly. --writers-stopped allows merging to be a bit @@ -151,6 +150,10 @@ more effective in case of a single 'slim' read-write child and 'fat' removed par the child is merged into parent and parent is renamed to child in that case. In other cases parent layers are always merged into children. +Other options: + +| `--down-ok` | Continue deletion/merging even if some data will be left on unavailable OSDs. | + ## flatten `vitastor-cli flatten ` diff --git a/docs/usage/cli.ru.md b/docs/usage/cli.ru.md index 6d6b8a1c..4cab75ed 100644 --- a/docs/usage/cli.ru.md +++ b/docs/usage/cli.ru.md @@ -132,7 +132,7 @@ vitastor-cli snap-create [-p|--pool ] @ ## modify -`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]` +`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]` Изменить размер, имя образа или флаг "только для чтения". Снимать флаг "только для чтения" и уменьшать размер образов, у которых есть дочерние клоны, без `--force` нельзя. @@ -140,13 +140,12 @@ vitastor-cli snap-create [-p|--pool ] @ Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением образа сначала уменьшите файловую систему в нём. -``` --f|--force Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. -``` +| -f|--force | Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны. | +| --down-ok | Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD. | ## rm -`vitastor-cli rm [] [--writers-stopped]` +`vitastor-cli rm [] [--writers-stopped] [--down-ok]` Удалить образ `` или все слои от `` до `` (`` должен быть дочерним образом ``), одновременно меняя родительские образы их клонов (если таковые есть). @@ -158,6 +157,10 @@ vitastor-cli snap-create [-p|--pool ] @ В других случаях родительские слои вливаются в дочерние. +Другие опции: + +| `--down-ok` | Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD. | + ## flatten `vitastor-cli flatten ` diff --git a/src/cli.cpp b/src/cli.cpp index 301c7d8b..717e2398 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -46,18 +46,21 @@ static const char* help_text = "vitastor-cli snap-create [-p|--pool ] @\n" " Create a snapshot of image . May be used live if only a single writer is active.\n" "\n" - "vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]\n" + "vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force] [--down-ok]\n" " Rename, resize image or change its readonly status. Images with children can't be made read-write.\n" " If the new size is smaller than the old size, extra data will be purged.\n" " You should resize file system in the image, if present, before shrinking it.\n" " -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n" + " --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n" "\n" - "vitastor-cli rm [] [--writers-stopped]\n" + "vitastor-cli rm [] [--writers-stopped] [--down-ok]\n" " Remove or all layers between and ( must be a child of ),\n" " rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n" " more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n" " the child is merged into parent and parent is renamed to child in that case.\n" " In other cases parent layers are always merged into children.\n" + " Other options:\n" + " --down-ok Continue deletion/merging even if some data will be left on unavailable OSDs.\n" "\n" "vitastor-cli flatten \n" " Flatten a layer, i.e. merge data and detach it from parents.\n" @@ -171,6 +174,7 @@ static json11::Json::object parse_args(int narg, const char *args[]) !strcmp(opt, "readonly") || !strcmp(opt, "readwrite") || !strcmp(opt, "force") || !strcmp(opt, "reverse") || !strcmp(opt, "allow-data-loss") || !strcmp(opt, "allow_data_loss") || + !strcmp(opt, "down-ok") || !strcmp(opt, "down_ok") || !strcmp(opt, "dry-run") || !strcmp(opt, "dry_run") || !strcmp(opt, "help") || !strcmp(opt, "all") || (!strcmp(opt, "writers-stopped") || !strcmp(opt, "writers_stopped")) && strcmp("1", args[i+1]) != 0 diff --git a/src/cli_modify.cpp b/src/cli_modify.cpp index bda949da..0280ca2b 100644 --- a/src/cli_modify.cpp +++ b/src/cli_modify.cpp @@ -15,6 +15,7 @@ struct image_changer_t uint64_t new_size = 0; bool force_size = false, inc_size = false; bool set_readonly = false, set_readwrite = false, force = false; + bool down_ok = false; // interval between fsyncs int fsync_interval = 128; @@ -105,6 +106,7 @@ struct image_changer_t { "pool", (uint64_t)INODE_POOL(inode_num) }, { "fsync-interval", fsync_interval }, { "min-offset", ((new_size+4095)/4096)*4096 }, + { "down-ok", down_ok }, }); resume_1: while (!cb(result)) @@ -240,6 +242,7 @@ std::function cli_tool_t::start_modify(json11::Json cfg) changer->fsync_interval = cfg["fsync_interval"].uint64_value(); if (!changer->fsync_interval) changer->fsync_interval = 128; + changer->down_ok = cfg["down_ok"].bool_value(); // FIXME Check that the image doesn't have children when shrinking return [changer](cli_result_t & result) { diff --git a/src/cli_rm.cpp b/src/cli_rm.cpp index 683078ec..5c727320 100644 --- a/src/cli_rm.cpp +++ b/src/cli_rm.cpp @@ -53,6 +53,8 @@ struct snap_remover_t int use_cas = 1; // interval between fsyncs int fsync_interval = 128; + // ignore deletion errors + bool down_ok = false; std::map sources; std::map inode_used; @@ -679,6 +681,7 @@ resume_100: { "inode", inode }, { "pool", (uint64_t)INODE_POOL(inode) }, { "fsync-interval", fsync_interval }, + { "down-ok", down_ok }, }); } }; @@ -690,6 +693,7 @@ std::function cli_tool_t::start_rm(json11::Json cfg) snap_remover->from_name = cfg["from"].string_value(); snap_remover->to_name = cfg["to"].string_value(); snap_remover->fsync_interval = cfg["fsync_interval"].uint64_value(); + snap_remover->down_ok = cfg["down_ok"].bool_value(); if (!snap_remover->fsync_interval) snap_remover->fsync_interval = 128; if (!cfg["cas"].is_null()) diff --git a/src/cli_rm_data.cpp b/src/cli_rm_data.cpp index d35694d7..b060a5a5 100644 --- a/src/cli_rm_data.cpp +++ b/src/cli_rm_data.cpp @@ -25,6 +25,7 @@ struct rm_inode_t uint64_t inode = 0; pool_id_t pool_id = 0; uint64_t min_offset = 0; + bool down_ok = false; cli_tool_t *parent = NULL; inode_list_t *lister = NULL; @@ -221,17 +222,18 @@ struct rm_inode_t { fprintf(stderr, "\n"); } - if (parent->progress && (total_done < total_count || inactive_osds.size() > 0 || error_count > 0)) + bool is_error = (total_done < total_count || inactive_osds.size() > 0 || error_count > 0); + if (parent->progress && is_error) { fprintf( stderr, "Warning: Pool:%u,ID:%ju inode data may not have been fully removed.\n" - " Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n", + "Use `vitastor-cli rm-data --pool %u --inode %ju` if you encounter it in listings.\n", pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode) ); } result = (cli_result_t){ - .err = error_count > 0 ? EIO : 0, - .text = error_count > 0 ? "Some blocks were not removed" : ( + .err = is_error && !down_ok ? EIO : 0, + .text = is_error ? "Some blocks were not removed" : ( "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+ std::to_string(pool_id)+" removed"), .data = json11::Json::object { @@ -280,6 +282,7 @@ std::function cli_tool_t::start_rm_data(json11::Json cfg) { remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS)); } + remover->down_ok = cfg["down_ok"].bool_value(); remover->pool_id = INODE_POOL(remover->inode); remover->min_offset = cfg["min_offset"].uint64_value(); return [remover](cli_result_t & result) From 77167e292006421eb681ab97b57665ab09526e72 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Wed, 28 Feb 2024 21:22:19 +0300 Subject: [PATCH 3/4] Do not use \r if output is not a terminal --- src/cli.cpp | 2 +- src/cli_common.cpp | 8 +++++++- src/cli_merge.cpp | 16 ++++++++++++---- src/cli_rm_data.cpp | 4 +++- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/cli.cpp b/src/cli.cpp index 717e2398..45ea6fce 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -125,7 +125,7 @@ static const char* help_text = " --parallel_osds M Work with M osds in parallel when possible (default 4)\n" " --progress 1|0 Report progress (default 1)\n" " --cas 1|0 Use CAS writes for flatten, merge, rm (default is decide automatically)\n" - " --no-color Disable colored output\n" + " --color 1|0 Enable/disable colored output and CR symbols (default 1 if stdout is a terminal)\n" " --json JSON output\n" ; diff --git a/src/cli_common.cpp b/src/cli_common.cpp index e5b9b9b1..71b465ac 100644 --- a/src/cli_common.cpp +++ b/src/cli_common.cpp @@ -1,6 +1,7 @@ // Copyright (c) Vitaliy Filippov, 2019+ // License: VNPL-1.1 (see README.md for details) +#include #include "str_util.h" #include "cluster_client.h" #include "cli.h" @@ -113,7 +114,12 @@ void cli_tool_t::parse_config(json11::Json::object & cfg) else kv_it++; } - color = !cfg["no_color"].bool_value(); + if (cfg.find("no_color") != cfg.end()) + color = !cfg["no_color"].bool_value(); + else if (cfg.find("color") != cfg.end()) + color = cfg["color"].bool_value(); + else + color = isatty(1); json_output = cfg["json"].bool_value(); iodepth = cfg["iodepth"].uint64_value(); if (!iodepth) diff --git a/src/cli_merge.cpp b/src/cli_merge.cpp index 697c372d..0a24bf54 100644 --- a/src/cli_merge.cpp +++ b/src/cli_merge.cpp @@ -275,7 +275,9 @@ struct snap_merger_t processed++; if (parent->progress && !(processed % 128)) { - printf("\rFiltering target blocks: %ju/%ju", processed, to_process); + fprintf(stderr, parent->color + ? "\rFiltering target blocks: %ju/%ju" + : "Filtering target blocks: %ju/%ju\n", processed, to_process); } } if (in_flight > 0 || oit != merge_offsets.end()) @@ -285,7 +287,9 @@ struct snap_merger_t } if (parent->progress) { - printf("\r%ju full blocks of target filtered out\n", to_process-merge_offsets.size()); + fprintf(stderr, parent->color + ? "\r%ju full blocks of target filtered out\n" + : "%ju full blocks of target filtered out\n", to_process-merge_offsets.size()); } } state = 3; @@ -320,7 +324,9 @@ struct snap_merger_t processed++; if (parent->progress && !(processed % 128)) { - printf("\rOverwriting blocks: %ju/%ju", processed, to_process); + fprintf(stderr, parent->color + ? "\rOverwriting blocks: %ju/%ju" + : "Overwriting blocks: %ju/%ju\n", processed, to_process); } } if (in_flight == 0 && rwo_error.size()) @@ -339,7 +345,9 @@ struct snap_merger_t } if (parent->progress) { - printf("\rOverwriting blocks: %ju/%ju\n", to_process, to_process); + fprintf(stderr, parent->color + ? "\rOverwriting blocks: %ju/%ju\n" + : "Overwriting blocks: %ju/%ju\n", to_process, to_process); } // Done result = (cli_result_t){ .text = "Done, layers from "+from_name+" to "+to_name+" merged into "+target_name }; diff --git a/src/cli_rm_data.cpp b/src/cli_rm_data.cpp index b060a5a5..e88c6baf 100644 --- a/src/cli_rm_data.cpp +++ b/src/cli_rm_data.cpp @@ -213,7 +213,9 @@ struct rm_inode_t } if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct) { - fprintf(stderr, "\rRemoved %ju/%ju objects, %ju more PGs to list...", total_done, total_count, pgs_to_list); + fprintf(stderr, parent->color + ? "\rRemoved %ju/%ju objects, %ju more PGs to list..." + : "Removed %ju/%ju objects, %ju more PGs to list...\n", total_done, total_count, pgs_to_list); total_prev_pct = total_done*1000/total_count; } if (lists_done && !lists.size()) From 38b8963330612d44f1fe80a5f227924affc0c9e2 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 29 Feb 2024 09:58:34 +0300 Subject: [PATCH 4/4] Release 1.4.8 - Do not use \r if output is not a terminal (should fix unexpected job output in proxmox) - Fix rm/rm-data error return code, add --down-ok option to bypass the error - Add EIO retry timeout and allow to disable these retries, rename up_wait_retry_interval to client_retry_interval - Add ubuntu jammy build - Wait for blockstore initialisation before starting OSD (prevent timeouts when init takes time) - Fix a rare use-after-free in automatic sync after delete in blockstore --- CMakeLists.txt | 2 +- csi/Makefile | 2 +- csi/deploy/004-csi-nodeplugin.yaml | 2 +- csi/deploy/007-csi-provisioner.yaml | 2 +- csi/src/config.go | 2 +- debian/changelog | 2 +- debian/vitastor.Dockerfile | 8 ++++---- mon/package.json | 2 +- patches/cinder-vitastor.py | 2 +- rpm/build-tarball.sh | 2 +- rpm/vitastor-el7.Dockerfile | 2 +- rpm/vitastor-el7.spec | 4 ++-- rpm/vitastor-el8.Dockerfile | 2 +- rpm/vitastor-el8.spec | 4 ++-- rpm/vitastor-el9.Dockerfile | 2 +- rpm/vitastor-el9.spec | 4 ++-- src/CMakeLists.txt | 2 +- src/vitastor.pc.in | 2 +- 18 files changed, 24 insertions(+), 24 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ac97ec5..ffd021ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12) project(vitastor) -set(VERSION "1.4.7") +set(VERSION "1.4.8") add_subdirectory(src) diff --git a/csi/Makefile b/csi/Makefile index d657a801..34b9fef2 100644 --- a/csi/Makefile +++ b/csi/Makefile @@ -1,4 +1,4 @@ -VERSION ?= v1.4.7 +VERSION ?= v1.4.8 all: build push diff --git a/csi/deploy/004-csi-nodeplugin.yaml b/csi/deploy/004-csi-nodeplugin.yaml index eb0c254f..76ff1fa8 100644 --- a/csi/deploy/004-csi-nodeplugin.yaml +++ b/csi/deploy/004-csi-nodeplugin.yaml @@ -49,7 +49,7 @@ spec: capabilities: add: ["SYS_ADMIN"] allowPrivilegeEscalation: true - image: vitalif/vitastor-csi:v1.4.7 + image: vitalif/vitastor-csi:v1.4.8 args: - "--node=$(NODE_ID)" - "--endpoint=$(CSI_ENDPOINT)" diff --git a/csi/deploy/007-csi-provisioner.yaml b/csi/deploy/007-csi-provisioner.yaml index 1aece9c8..a6170334 100644 --- a/csi/deploy/007-csi-provisioner.yaml +++ b/csi/deploy/007-csi-provisioner.yaml @@ -121,7 +121,7 @@ spec: privileged: true capabilities: add: ["SYS_ADMIN"] - image: vitalif/vitastor-csi:v1.4.7 + image: vitalif/vitastor-csi:v1.4.8 args: - "--node=$(NODE_ID)" - "--endpoint=$(CSI_ENDPOINT)" diff --git a/csi/src/config.go b/csi/src/config.go index b9e1c701..dce70885 100644 --- a/csi/src/config.go +++ b/csi/src/config.go @@ -5,7 +5,7 @@ package vitastor const ( vitastorCSIDriverName = "csi.vitastor.io" - vitastorCSIDriverVersion = "1.4.7" + vitastorCSIDriverVersion = "1.4.8" ) // Config struct fills the parameters of request or user input diff --git a/debian/changelog b/debian/changelog index f8ad6f8b..129c5d62 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -vitastor (1.4.7-1) unstable; urgency=medium +vitastor (1.4.8-1) unstable; urgency=medium * Bugfixes diff --git a/debian/vitastor.Dockerfile b/debian/vitastor.Dockerfile index a736a50d..da258a6e 100644 --- a/debian/vitastor.Dockerfile +++ b/debian/vitastor.Dockerfile @@ -37,8 +37,8 @@ RUN set -e -x; \ mkdir -p /root/packages/vitastor-$REL; \ rm -rf /root/packages/vitastor-$REL/*; \ cd /root/packages/vitastor-$REL; \ - cp -r /root/vitastor vitastor-1.4.7; \ - cd vitastor-1.4.7; \ + cp -r /root/vitastor vitastor-1.4.8; \ + cd vitastor-1.4.8; \ ln -s /root/fio-build/fio-*/ ./fio; \ FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \ @@ -51,8 +51,8 @@ RUN set -e -x; \ rm -rf a b; \ echo "dep:fio=$FIO" > debian/fio_version; \ cd /root/packages/vitastor-$REL; \ - tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.7.orig.tar.xz vitastor-1.4.7; \ - cd vitastor-1.4.7; \ + tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.8.orig.tar.xz vitastor-1.4.8; \ + cd vitastor-1.4.8; \ V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ DEBFULLNAME="Vitaliy Filippov " dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \ DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \ diff --git a/mon/package.json b/mon/package.json index 92b762dc..be74b6cc 100644 --- a/mon/package.json +++ b/mon/package.json @@ -1,6 +1,6 @@ { "name": "vitastor-mon", - "version": "1.4.7", + "version": "1.4.8", "description": "Vitastor SDS monitor service", "main": "mon-main.js", "scripts": { diff --git a/patches/cinder-vitastor.py b/patches/cinder-vitastor.py index be5e9c49..bfd2349b 100644 --- a/patches/cinder-vitastor.py +++ b/patches/cinder-vitastor.py @@ -50,7 +50,7 @@ from cinder.volume import configuration from cinder.volume import driver from cinder.volume import volume_utils -VERSION = '1.4.7' +VERSION = '1.4.8' LOG = logging.getLogger(__name__) diff --git a/rpm/build-tarball.sh b/rpm/build-tarball.sh index 4b171ccb..e0fd5d70 100755 --- a/rpm/build-tarball.sh +++ b/rpm/build-tarball.sh @@ -24,4 +24,4 @@ rm fio mv fio-copy fio FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'` perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec -tar --transform 's#^#vitastor-1.4.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.7$(rpm --eval '%dist').tar.gz * +tar --transform 's#^#vitastor-1.4.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.8$(rpm --eval '%dist').tar.gz * diff --git a/rpm/vitastor-el7.Dockerfile b/rpm/vitastor-el7.Dockerfile index 9d1c67f5..04d44531 100644 --- a/rpm/vitastor-el7.Dockerfile +++ b/rpm/vitastor-el7.Dockerfile @@ -36,7 +36,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el7.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el7.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el7.spec b/rpm/vitastor-el7.spec index 6bcb7fdc..ed3b4286 100644 --- a/rpm/vitastor-el7.spec +++ b/rpm/vitastor-el7.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el7.tar.gz +Source0: vitastor-1.4.8.el7.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/rpm/vitastor-el8.Dockerfile b/rpm/vitastor-el8.Dockerfile index b18dd6b1..e16c50ce 100644 --- a/rpm/vitastor-el8.Dockerfile +++ b/rpm/vitastor-el8.Dockerfile @@ -35,7 +35,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el8.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el8.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el8.spec b/rpm/vitastor-el8.spec index fc76a740..29e5c440 100644 --- a/rpm/vitastor-el8.spec +++ b/rpm/vitastor-el8.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el8.tar.gz +Source0: vitastor-1.4.8.el8.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/rpm/vitastor-el9.Dockerfile b/rpm/vitastor-el9.Dockerfile index a0dcb83c..db36d54c 100644 --- a/rpm/vitastor-el9.Dockerfile +++ b/rpm/vitastor-el9.Dockerfile @@ -18,7 +18,7 @@ ADD . /root/vitastor RUN set -e; \ cd /root/vitastor/rpm; \ sh build-tarball.sh; \ - cp /root/vitastor-1.4.7.el9.tar.gz ~/rpmbuild/SOURCES; \ + cp /root/vitastor-1.4.8.el9.tar.gz ~/rpmbuild/SOURCES; \ cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \ cd ~/rpmbuild/SPECS/; \ rpmbuild -ba vitastor.spec; \ diff --git a/rpm/vitastor-el9.spec b/rpm/vitastor-el9.spec index 5a2c8766..65f71b51 100644 --- a/rpm/vitastor-el9.spec +++ b/rpm/vitastor-el9.spec @@ -1,11 +1,11 @@ Name: vitastor -Version: 1.4.7 +Version: 1.4.8 Release: 1%{?dist} Summary: Vitastor, a fast software-defined clustered block storage License: Vitastor Network Public License 1.1 URL: https://vitastor.io/ -Source0: vitastor-1.4.7.el9.tar.gz +Source0: vitastor-1.4.8.el9.tar.gz BuildRequires: liburing-devel >= 0.6 BuildRequires: gperftools-devel diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 66a060cd..0836ae4c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}") endif() -add_definitions(-DVERSION="1.4.7") +add_definitions(-DVERSION="1.4.8") add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src) add_link_options(-fno-omit-frame-pointer) if (${WITH_ASAN}) diff --git a/src/vitastor.pc.in b/src/vitastor.pc.in index e54406a6..15741448 100644 --- a/src/vitastor.pc.in +++ b/src/vitastor.pc.in @@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: Vitastor Description: Vitastor client library -Version: 1.4.7 +Version: 1.4.8 Libs: -L${libdir} -lvitastor_client Cflags: -I${includedir}