Warn about full and almost full OSDs in status
Test / buildenv (push) Successful in 10s
Details
Test / build (push) Successful in 12s
Details
Test / test_cas (push) Successful in 10s
Details
Test / make_test (push) Successful in 36s
Details
Test / test_change_pg_count (push) Successful in 38s
Details
Test / test_change_pg_size (push) Successful in 8s
Details
Test / test_change_pg_count_ec (push) Successful in 33s
Details
Test / test_create_nomaxid (push) Successful in 7s
Details
Test / test_etcd_fail (push) Successful in 58s
Details
Test / test_add_osd (push) Successful in 2m42s
Details
Test / test_interrupted_rebalance (push) Successful in 2m38s
Details
Test / test_failure_domain (push) Successful in 9s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 2m49s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 2m19s
Details
Test / test_snapshot (push) Successful in 40s
Details
Test / test_minsize_1 (push) Successful in 15s
Details
Test / test_snapshot_ec (push) Successful in 38s
Details
Test / test_rm (push) Successful in 15s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m9s
Details
Test / test_snapshot_down (push) Successful in 24s
Details
Test / test_snapshot_down_ec (push) Successful in 27s
Details
Test / test_splitbrain (push) Successful in 18s
Details
Test / test_snapshot_chain (push) Successful in 2m23s
Details
Test / test_snapshot_chain_ec (push) Successful in 2m53s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m21s
Details
Test / test_rebalance_verify (push) Successful in 3m46s
Details
Test / test_switch_primary (push) Successful in 33s
Details
Test / test_write (push) Successful in 54s
Details
Test / test_write_xor (push) Successful in 48s
Details
Test / test_write_no_same (push) Successful in 14s
Details
Test / test_rebalance_verify_ec (push) Successful in 4m38s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m22s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m34s
Details
Test / test_heal_ec (push) Successful in 3m38s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m44s
Details
Test / test_heal_csum_32k_dj (push) Successful in 5m51s
Details
Test / test_heal_csum_32k (push) Successful in 6m45s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m34s
Details
Test / test_enospc (push) Successful in 1m47s
Details
Test / test_enospc_xor (push) Successful in 2m41s
Details
Test / test_enospc_imm (push) Successful in 1m31s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m39s
Details
Test / test_heal_csum_4k (push) Successful in 6m15s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Details
Test / test_scrub (push) Successful in 35s
Details
Test / test_scrub_xor (push) Successful in 26s
Details
Test / test_enospc_imm_xor (push) Successful in 1m13s
Details
Test / test_nfs (push) Successful in 24s
Details
Test / test_scrub_ec (push) Successful in 33s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Details
Test / test_scrub_pg_size_3 (push) Successful in 42s
Details
Test / buildenv (push) Successful in 10s
Details
Test / build (push) Successful in 12s
Details
Test / test_cas (push) Successful in 10s
Details
Test / make_test (push) Successful in 36s
Details
Test / test_change_pg_count (push) Successful in 38s
Details
Test / test_change_pg_size (push) Successful in 8s
Details
Test / test_change_pg_count_ec (push) Successful in 33s
Details
Test / test_create_nomaxid (push) Successful in 7s
Details
Test / test_etcd_fail (push) Successful in 58s
Details
Test / test_add_osd (push) Successful in 2m42s
Details
Test / test_interrupted_rebalance (push) Successful in 2m38s
Details
Test / test_failure_domain (push) Successful in 9s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 2m49s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 2m19s
Details
Test / test_snapshot (push) Successful in 40s
Details
Test / test_minsize_1 (push) Successful in 15s
Details
Test / test_snapshot_ec (push) Successful in 38s
Details
Test / test_rm (push) Successful in 15s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m9s
Details
Test / test_snapshot_down (push) Successful in 24s
Details
Test / test_snapshot_down_ec (push) Successful in 27s
Details
Test / test_splitbrain (push) Successful in 18s
Details
Test / test_snapshot_chain (push) Successful in 2m23s
Details
Test / test_snapshot_chain_ec (push) Successful in 2m53s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m21s
Details
Test / test_rebalance_verify (push) Successful in 3m46s
Details
Test / test_switch_primary (push) Successful in 33s
Details
Test / test_write (push) Successful in 54s
Details
Test / test_write_xor (push) Successful in 48s
Details
Test / test_write_no_same (push) Successful in 14s
Details
Test / test_rebalance_verify_ec (push) Successful in 4m38s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m22s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m34s
Details
Test / test_heal_ec (push) Successful in 3m38s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m44s
Details
Test / test_heal_csum_32k_dj (push) Successful in 5m51s
Details
Test / test_heal_csum_32k (push) Successful in 6m45s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m34s
Details
Test / test_enospc (push) Successful in 1m47s
Details
Test / test_enospc_xor (push) Successful in 2m41s
Details
Test / test_enospc_imm (push) Successful in 1m31s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m39s
Details
Test / test_heal_csum_4k (push) Successful in 6m15s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Details
Test / test_scrub (push) Successful in 35s
Details
Test / test_scrub_xor (push) Successful in 26s
Details
Test / test_enospc_imm_xor (push) Successful in 1m13s
Details
Test / test_nfs (push) Successful in 24s
Details
Test / test_scrub_ec (push) Successful in 33s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Details
Test / test_scrub_pg_size_3 (push) Successful in 42s
Details
parent
2b863fb715
commit
6366972fe8
|
@ -21,6 +21,7 @@ affect their interaction with the cluster.
|
|||
- [nbd_timeout](#nbd_timeout)
|
||||
- [nbd_max_devices](#nbd_max_devices)
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
|
@ -167,3 +168,18 @@ Maximum number of NBD devices in the system. This value is passed as
|
|||
Maximum number of partitions per NBD device. This value is passed as
|
||||
`max_part` parameter for the nbd kernel module when vitastor-nbd autoloads it.
|
||||
Note that (nbds_max)*(1+max_part) usually can't exceed 256.
|
||||
|
||||
## osd_nearfull_ratio
|
||||
|
||||
- Type: number
|
||||
- Default: 0.95
|
||||
- Can be changed online: yes
|
||||
|
||||
Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
|
||||
|
||||
Remember that some client writes may hang or complete with an error if even
|
||||
just one OSD becomes 100 % full!
|
||||
|
||||
However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
|
||||
unable to start at all), so you'll be able to recover from "out of space" errors
|
||||
without destroying and recreating OSDs.
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
- [nbd_timeout](#nbd_timeout)
|
||||
- [nbd_max_devices](#nbd_max_devices)
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
|
@ -168,3 +169,20 @@
|
|||
Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
|
||||
модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
|
||||
Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
|
||||
|
||||
## osd_nearfull_ratio
|
||||
|
||||
- Тип: число
|
||||
- Значение по умолчанию: 0.95
|
||||
- Можно менять на лету: да
|
||||
|
||||
Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
|
||||
выводе vitastor-cli status.
|
||||
|
||||
Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
|
||||
если на 100 % заполнится хотя бы 1 OSD!
|
||||
|
||||
Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
|
||||
заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
|
||||
восстановить работу кластера после ошибок отсутствия свободного места
|
||||
без уничтожения и пересоздания OSD.
|
||||
|
|
|
@ -200,3 +200,27 @@
|
|||
Максимальное число разделов на одном NBD-устройстве. Данное значение передаётся
|
||||
модулю ядра nbd как параметр `max_part`, когда его загружает vitastor-nbd.
|
||||
Имейте в виду, что (nbds_max)*(1+max_part) обычно не может превышать 256.
|
||||
- name: osd_nearfull_ratio
|
||||
type: float
|
||||
default: 0.95
|
||||
online: true
|
||||
info: |
|
||||
Ratio of used space on OSD to treat it as "almost full" in vitastor-cli status output.
|
||||
|
||||
Remember that some client writes may hang or complete with an error if even
|
||||
just one OSD becomes 100 % full!
|
||||
|
||||
However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
|
||||
unable to start at all), so you'll be able to recover from "out of space" errors
|
||||
without destroying and recreating OSDs.
|
||||
info_ru: |
|
||||
Доля занятого места на OSD, начиная с которой он считается "почти заполненным" в
|
||||
выводе vitastor-cli status.
|
||||
|
||||
Помните, что часть клиентских запросов может зависнуть или завершиться с ошибкой,
|
||||
если на 100 % заполнится хотя бы 1 OSD!
|
||||
|
||||
Однако, в отличие от Ceph, заполненные на 100 % OSD Vitastor не падают (в Ceph
|
||||
заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
|
||||
восстановить работу кластера после ошибок отсутствия свободного места
|
||||
без уничтожения и пересоздания OSD.
|
||||
|
|
|
@ -92,6 +92,7 @@ const etcd_tree = {
|
|||
client_retry_interval: 50, // ms. min: 10
|
||||
client_eio_retry_interval: 1000, // ms
|
||||
client_retry_enospc: true,
|
||||
osd_nearfull_ratio: 0.95,
|
||||
// client and osd - configurable online
|
||||
log_level: 0,
|
||||
peer_connect_interval: 5, // seconds. min: 1
|
||||
|
|
|
@ -110,6 +110,12 @@ resume_2:
|
|||
}
|
||||
}
|
||||
int mon_count = 0;
|
||||
int osds_full = 0, osds_nearfull = 0;
|
||||
double osd_nearfull_ratio = parent->cli->config["osd_nearfull_ratio"].number_value();
|
||||
if (!osd_nearfull_ratio)
|
||||
{
|
||||
osd_nearfull_ratio = 0.95;
|
||||
}
|
||||
std::string mon_master;
|
||||
for (int i = 0; i < mon_members.size(); i++)
|
||||
{
|
||||
|
@ -139,8 +145,18 @@ resume_2:
|
|||
continue;
|
||||
}
|
||||
osd_count++;
|
||||
total_raw += kv.value["size"].uint64_value();
|
||||
free_raw += kv.value["free"].uint64_value();
|
||||
auto osd_size = kv.value["size"].uint64_value();
|
||||
auto osd_free = kv.value["free"].uint64_value();
|
||||
total_raw += osd_size;
|
||||
free_raw += osd_free;
|
||||
if (!osd_free)
|
||||
{
|
||||
osds_full++;
|
||||
}
|
||||
else if (osd_free < (uint64_t)(osd_size*(1-osd_nearfull_ratio)))
|
||||
{
|
||||
osds_nearfull++;
|
||||
}
|
||||
auto peer_it = parent->cli->st_cli.peer_states.find(stat_osd_num);
|
||||
if (peer_it != parent->cli->st_cli.peer_states.end())
|
||||
{
|
||||
|
@ -281,11 +297,27 @@ resume_2:
|
|||
else if (no_scrub)
|
||||
recovery_io += " scrub: "+str_repeat(" ", io_indent+1)+"disabled\n";
|
||||
}
|
||||
std::string warning_str;
|
||||
if (osds_full)
|
||||
{
|
||||
warning_str += " "+std::to_string(osds_full)+
|
||||
(osds_full > 1 ? " osds are full\n" : " osd is full\n");
|
||||
}
|
||||
if (osds_nearfull)
|
||||
{
|
||||
warning_str += " "+std::to_string(osds_nearfull)+
|
||||
(osds_nearfull > 1 ? " osds are almost full\n" : " osd is almost full\n");
|
||||
}
|
||||
if (warning_str != "")
|
||||
{
|
||||
warning_str = "\n warning:\n"+warning_str;
|
||||
}
|
||||
printf(
|
||||
" cluster:\n"
|
||||
" etcd: %d / %zd up, %s database size\n"
|
||||
" mon: %d up%s\n"
|
||||
" osd: %d / %d up\n"
|
||||
"%s"
|
||||
" \n"
|
||||
" data:\n"
|
||||
" raw: %s used, %s / %s available%s\n"
|
||||
|
@ -298,7 +330,7 @@ resume_2:
|
|||
"%s",
|
||||
etcd_alive, etcd_states.size(), format_size(etcd_db_size).c_str(),
|
||||
mon_count, mon_master == "" ? "" : (", master "+mon_master).c_str(),
|
||||
osd_up, osd_count,
|
||||
osd_up, osd_count, warning_str.c_str(),
|
||||
format_size(total_raw-free_raw).c_str(),
|
||||
format_size(free_raw-free_down_raw).c_str(),
|
||||
format_size(total_raw-down_raw).c_str(),
|
||||
|
|
Loading…
Reference in New Issue