Compare commits
3 Commits
9fe779a691
...
3629dbc54d
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | 3629dbc54d | |
Vitaliy Filippov | 29284bef40 | |
Vitaliy Filippov | 6a924d6066 |
|
@ -15,6 +15,7 @@ These parameters only apply to Monitors.
|
||||||
- [mon_stats_timeout](#mon_stats_timeout)
|
- [mon_stats_timeout](#mon_stats_timeout)
|
||||||
- [osd_out_time](#osd_out_time)
|
- [osd_out_time](#osd_out_time)
|
||||||
- [placement_levels](#placement_levels)
|
- [placement_levels](#placement_levels)
|
||||||
|
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||||
|
|
||||||
## etcd_mon_ttl
|
## etcd_mon_ttl
|
||||||
|
|
||||||
|
@ -77,3 +78,11 @@ values. Smaller priority means higher level in tree. For example,
|
||||||
levels are always predefined and can't be removed. If one of them is not
|
levels are always predefined and can't be removed. If one of them is not
|
||||||
present in the configuration, then it is defined with the default priority
|
present in the configuration, then it is defined with the default priority
|
||||||
(100 for "host", 101 for "osd").
|
(100 for "host", 101 for "osd").
|
||||||
|
|
||||||
|
## use_old_pg_combinator
|
||||||
|
|
||||||
|
- Type: boolean
|
||||||
|
- Default: false
|
||||||
|
|
||||||
|
Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
|
||||||
|
and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
- [mon_stats_timeout](#mon_stats_timeout)
|
- [mon_stats_timeout](#mon_stats_timeout)
|
||||||
- [osd_out_time](#osd_out_time)
|
- [osd_out_time](#osd_out_time)
|
||||||
- [placement_levels](#placement_levels)
|
- [placement_levels](#placement_levels)
|
||||||
|
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||||
|
|
||||||
## etcd_mon_ttl
|
## etcd_mon_ttl
|
||||||
|
|
||||||
|
@ -78,3 +79,11 @@ OSD перед обновлением агрегированной статис
|
||||||
"host" и "osd" являются предопределёнными и не могут быть удалены. Если
|
"host" и "osd" являются предопределёнными и не могут быть удалены. Если
|
||||||
один из них отсутствует в конфигурации, он доопределяется с приоритетом по
|
один из них отсутствует в конфигурации, он доопределяется с приоритетом по
|
||||||
умолчанию (100 для уровня "host", 101 для "osd").
|
умолчанию (100 для уровня "host", 101 для "osd").
|
||||||
|
|
||||||
|
## use_old_pg_combinator
|
||||||
|
|
||||||
|
- Тип: булево (да/нет)
|
||||||
|
- Значение по умолчанию: false
|
||||||
|
|
||||||
|
Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
|
||||||
|
и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
|
||||||
|
|
|
@ -32,6 +32,8 @@ Parameters:
|
||||||
- [pg_minsize](#pg_minsize)
|
- [pg_minsize](#pg_minsize)
|
||||||
- [pg_count](#pg_count)
|
- [pg_count](#pg_count)
|
||||||
- [failure_domain](#failure_domain)
|
- [failure_domain](#failure_domain)
|
||||||
|
- [level_placement](#level_placement)
|
||||||
|
- [raw_placement](#raw_placement)
|
||||||
- [max_osd_combinations](#max_osd_combinations)
|
- [max_osd_combinations](#max_osd_combinations)
|
||||||
- [block_size](#block_size)
|
- [block_size](#block_size)
|
||||||
- [bitmap_granularity](#bitmap_granularity)
|
- [bitmap_granularity](#bitmap_granularity)
|
||||||
|
@ -209,6 +211,69 @@ never put on OSDs in the same failure domain (for example, on the same host).
|
||||||
So failure domain specifies the unit which failure you are protecting yourself
|
So failure domain specifies the unit which failure you are protecting yourself
|
||||||
from.
|
from.
|
||||||
|
|
||||||
|
## level_placement
|
||||||
|
|
||||||
|
- Type: string
|
||||||
|
|
||||||
|
Additional failure domain rules, applied in conjuction with failure_domain.
|
||||||
|
Must be specified in the following form:
|
||||||
|
|
||||||
|
`<placement level>=<sequence of characters>, <level2>=<sequence2>, ...`
|
||||||
|
|
||||||
|
Sequence should be exactly [pg_size](#pg_size) character long. Each character
|
||||||
|
corresponds to an OSD in the PG of this pool. Equal characters mean that
|
||||||
|
corresponding items of the PG should be placed into the same placement tree
|
||||||
|
item at this level. Different characters mean that items should be placed into
|
||||||
|
different items.
|
||||||
|
|
||||||
|
For example, if you want a EC 4+2 pool and you want every 2 chunks to be stored
|
||||||
|
in its own datacenter and you also want each chunk to be stored on a different
|
||||||
|
host, you should set `level_placement` to `dc=112233 host=123456`.
|
||||||
|
|
||||||
|
Or you can set `level_placement` to `dc=112233` and leave `failure_domain` empty,
|
||||||
|
because `host` is the default `failure_domain` and it will be applied anyway.
|
||||||
|
|
||||||
|
Without this rule, it may happen that 3 chunks will be stored on OSDs in the
|
||||||
|
same datacenter, and the data will become inaccessibly if that datacenter goes
|
||||||
|
down in this case.
|
||||||
|
|
||||||
|
Of course, you should group your hosts into datacenters before applying the rule
|
||||||
|
by setting [placement_levels](monitor.en.md#placement_levels) to something like
|
||||||
|
`{"dc":90,"host":100,"osd":110}` and add DCs to [node_placement](#placement-tree),
|
||||||
|
like `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
|
||||||
|
|
||||||
|
## raw_placement
|
||||||
|
|
||||||
|
- Type: string
|
||||||
|
|
||||||
|
Raw PG placement rules, specified in the form of a DSL (domain-specific language).
|
||||||
|
Use only if you really know what you're doing :)
|
||||||
|
|
||||||
|
DSL specification:
|
||||||
|
|
||||||
|
```
|
||||||
|
dsl := item | item ("\n" | ",") items
|
||||||
|
item := "any" | rules
|
||||||
|
rules := rule | rule rules
|
||||||
|
rule := level operator arg
|
||||||
|
level := /\w+/
|
||||||
|
operator := "!=" | "=" | ">" | "?="
|
||||||
|
arg := value | "(" values ")"
|
||||||
|
values := value | value "," values
|
||||||
|
value := item_ref | constant_id
|
||||||
|
item_ref := /\d+/
|
||||||
|
constant_id := /"([^"]+)"/
|
||||||
|
```
|
||||||
|
|
||||||
|
"?=" operator means "preferred". I.e. `dc ?= "meow"` means "prefer datacenter meow
|
||||||
|
for this chunk, but put into another dc if it's unavailable".
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
- Simple 3 replicas with failure_domain=host: `any, host!=1, host!=(1,2)`
|
||||||
|
- EC 4+2 in 3 DC: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
|
||||||
|
- 1 replica in fixed DC + 2 in random DCs: `dc?=meow, dc!=1, dc!=(1,2)`
|
||||||
|
|
||||||
## max_osd_combinations
|
## max_osd_combinations
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
|
|
|
@ -31,6 +31,8 @@
|
||||||
- [pg_minsize](#pg_minsize)
|
- [pg_minsize](#pg_minsize)
|
||||||
- [pg_count](#pg_count)
|
- [pg_count](#pg_count)
|
||||||
- [failure_domain](#failure_domain)
|
- [failure_domain](#failure_domain)
|
||||||
|
- [level_placement](#level_placement)
|
||||||
|
- [raw_placement](#raw_placement)
|
||||||
- [max_osd_combinations](#max_osd_combinations)
|
- [max_osd_combinations](#max_osd_combinations)
|
||||||
- [block_size](#block_size)
|
- [block_size](#block_size)
|
||||||
- [bitmap_granularity](#bitmap_granularity)
|
- [bitmap_granularity](#bitmap_granularity)
|
||||||
|
@ -161,7 +163,7 @@ OSD, PG деактивируется на чтение и запись. Иным
|
||||||
Для примера, разница между pg_minsize 2 и 1 в реплицированном пуле с 3 копиями
|
Для примера, разница между pg_minsize 2 и 1 в реплицированном пуле с 3 копиями
|
||||||
данных (pg_size=3), проявляется следующим образом:
|
данных (pg_size=3), проявляется следующим образом:
|
||||||
- Если 2 сервера отключаются при pg_minsize=2, пул становится неактивным и
|
- Если 2 сервера отключаются при pg_minsize=2, пул становится неактивным и
|
||||||
остаётся неактивным в течение [osd_out_time](monitor.en.md#osd_out_time)
|
остаётся неактивным в течение [osd_out_time](monitor.ru.md#osd_out_time)
|
||||||
(10 минут), после чего монитор назначает другие OSD/серверы на замену, пул
|
(10 минут), после чего монитор назначает другие OSD/серверы на замену, пул
|
||||||
поднимается и начинает восстанавливать недостающие копии данных. Соответственно,
|
поднимается и начинает восстанавливать недостающие копии данных. Соответственно,
|
||||||
если OSD на замену нет - то есть, если у вас всего 3 сервера с OSD и 2 из них
|
если OSD на замену нет - то есть, если у вас всего 3 сервера с OSD и 2 из них
|
||||||
|
@ -169,7 +171,7 @@ OSD, PG деактивируется на чтение и запись. Иным
|
||||||
или не добавите хотя бы 1 сервер (или не переключите failure_domain на "osd").
|
или не добавите хотя бы 1 сервер (или не переключите failure_domain на "osd").
|
||||||
- Если 2 сервера отключаются при pg_minsize=1, ввод-вывод лишь приостанавливается
|
- Если 2 сервера отключаются при pg_minsize=1, ввод-вывод лишь приостанавливается
|
||||||
на короткое время, до тех пор, пока монитор не поймёт, что OSD отключены
|
на короткое время, до тех пор, пока монитор не поймёт, что OSD отключены
|
||||||
(что занимает 5-10 секунд при стандартном [etcd_report_interval](osd.en.md#etcd_report_interval)).
|
(что занимает 5-10 секунд при стандартном [etcd_report_interval](osd.ru.md#etcd_report_interval)).
|
||||||
После этого ввод-вывод восстанавливается, но новые данные временно пишутся
|
После этого ввод-вывод восстанавливается, но новые данные временно пишутся
|
||||||
всего в 1 копии. Когда же проходит osd_out_time, монитор точно так же назначает
|
всего в 1 копии. Когда же проходит osd_out_time, монитор точно так же назначает
|
||||||
другие OSD на замену выбывшим и пул начинает восстанавливать копии данных.
|
другие OSD на замену выбывшим и пул начинает восстанавливать копии данных.
|
||||||
|
@ -211,6 +213,71 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
||||||
Иными словами, домен отказа - это то, от отказа чего вы защищаете себя избыточным
|
Иными словами, домен отказа - это то, от отказа чего вы защищаете себя избыточным
|
||||||
хранением.
|
хранением.
|
||||||
|
|
||||||
|
## level_placement
|
||||||
|
|
||||||
|
- Тип: строка
|
||||||
|
|
||||||
|
Правила дополнительных доменов отказа, применяемые вместе с failure_domain.
|
||||||
|
Должны задаваться в следующем виде:
|
||||||
|
|
||||||
|
`<уровень>=<последовательность символов>, <уровень2>=<последовательность2>, ...`
|
||||||
|
|
||||||
|
Каждая `<последовательность>` должна состоять ровно из [pg_size](#pg_size) символов.
|
||||||
|
Каждый символ соответствует одному OSD (размещению одной части PG) этого пула.
|
||||||
|
Одинаковые символы означают, что соответствующие части размещаются в один и тот же
|
||||||
|
узел дерева OSD на заданном `<уровне>`. Разные символы означают, что части
|
||||||
|
размещаются в разные узлы.
|
||||||
|
|
||||||
|
Например, если вы хотите сделать пул EC 4+2 и хотите поместить каждые 2 части
|
||||||
|
данных в свой датацентр, и также вы хотите, чтобы каждая часть размещалась на
|
||||||
|
другом хосте, то вы должны задать `level_placement` равным `dc=112233 host=123456`.
|
||||||
|
|
||||||
|
Либо вы просто можете задать `level_placement` равным `dc=112233` и оставить
|
||||||
|
`failure_domain` пустым, т.к. `host` это его значение по умолчанию и оно также
|
||||||
|
применится автоматически.
|
||||||
|
|
||||||
|
Без этого правила может получиться так, что в одном из датацентров окажется
|
||||||
|
3 части данных одной PG и данные окажутся недоступными при временном отключении
|
||||||
|
этого датацентра.
|
||||||
|
|
||||||
|
Естественно, перед установкой правила вам нужно сгруппировать ваши хосты в
|
||||||
|
датацентры, установив [placement_levels](monitor.ru.md#placement_levels) во что-то
|
||||||
|
типа `{"dc":90,"host":100,"osd":110}` и добавив датацентры в [node_placement](#дерево-размещения),
|
||||||
|
примерно так: `{"dc1":{"level":"dc"},"host1":{"parent":"dc1"},...}`.
|
||||||
|
|
||||||
|
## raw_placement
|
||||||
|
|
||||||
|
- Type: string
|
||||||
|
|
||||||
|
Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
|
||||||
|
Используйте, только если действительно знаете, зачем вам это надо :)
|
||||||
|
|
||||||
|
Спецификация DSL:
|
||||||
|
|
||||||
|
```
|
||||||
|
dsl := item | item ("\n" | ",") items
|
||||||
|
item := "any" | rules
|
||||||
|
rules := rule | rule rules
|
||||||
|
rule := level operator arg
|
||||||
|
level := /\w+/
|
||||||
|
operator := "!=" | "=" | ">" | "?="
|
||||||
|
arg := value | "(" values ")"
|
||||||
|
values := value | value "," values
|
||||||
|
value := item_ref | constant_id
|
||||||
|
item_ref := /\d+/
|
||||||
|
constant_id := /"([^"]+)"/
|
||||||
|
```
|
||||||
|
|
||||||
|
Оператор "?=" означает "предпочитаемый". Т.е. `dc ?= "meow"` означает "предпочитать
|
||||||
|
датацентр meow для этой части данных, но разместить её в другом датацентре, если
|
||||||
|
meow недоступен".
|
||||||
|
|
||||||
|
Примеры:
|
||||||
|
|
||||||
|
- Простые 3 реплики с failure_domain=host: `any, host!=1, host!=(1,2)`
|
||||||
|
- EC 4+2 в 3 датацентрах: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
|
||||||
|
- 1 копия в фиксированном ДЦ + 2 в других ДЦ: `dc?=meow, dc!=1, dc!=(1,2)`
|
||||||
|
|
||||||
## max_osd_combinations
|
## max_osd_combinations
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
|
|
|
@ -63,3 +63,12 @@
|
||||||
"host" и "osd" являются предопределёнными и не могут быть удалены. Если
|
"host" и "osd" являются предопределёнными и не могут быть удалены. Если
|
||||||
один из них отсутствует в конфигурации, он доопределяется с приоритетом по
|
один из них отсутствует в конфигурации, он доопределяется с приоритетом по
|
||||||
умолчанию (100 для уровня "host", 101 для "osd").
|
умолчанию (100 для уровня "host", 101 для "osd").
|
||||||
|
- name: use_old_pg_combinator
|
||||||
|
type: bool
|
||||||
|
default: false
|
||||||
|
info: |
|
||||||
|
Use the old PG combination generator which doesn't support [level_placement](pool.en.md#level_placement)
|
||||||
|
and [raw_placement](pool.en.md#raw_placement) for pools which don't use this features.
|
||||||
|
info_ru: |
|
||||||
|
Использовать старый генератор комбинаций PG, не поддерживающий [level_placement](pool.ru.md#level_placement)
|
||||||
|
и [raw_placement](pool.ru.md#raw_placement) для пулов, которые не используют данные функции.
|
||||||
|
|
|
@ -269,6 +269,8 @@ Optional parameters:
|
||||||
| `--block_size 128k` | Put pool only on OSDs with this data block size |
|
| `--block_size 128k` | Put pool only on OSDs with this data block size |
|
||||||
| `--bitmap_granularity 4k` | Put pool only on OSDs with this logical sector size |
|
| `--bitmap_granularity 4k` | Put pool only on OSDs with this logical sector size |
|
||||||
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
|
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
|
||||||
|
| `--level_placement <rules>` | Use additional failure domain rules (example: "dc=112233") |
|
||||||
|
| `--raw_placement <rules>` | Specify raw PG generation rules ([details](../config/pool.en.md#raw_placement)) |
|
||||||
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
|
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
|
||||||
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
|
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
|
||||||
| `--used_for_fs <name>` | Mark pool as used for VitastorFS with metadata in image <name> |
|
| `--used_for_fs <name>` | Mark pool as used for VitastorFS with metadata in image <name> |
|
||||||
|
|
|
@ -286,6 +286,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
||||||
| `--block_size 128k` | ...только OSD с данным размером блока |
|
| `--block_size 128k` | ...только OSD с данным размером блока |
|
||||||
| `--bitmap_granularity 4k` | ...только OSD с данным размером логического сектора |
|
| `--bitmap_granularity 4k` | ...только OSD с данным размером логического сектора |
|
||||||
| `--immediate_commit none` | ...только OSD с этим или большим immediate_commit (none < small < all) |
|
| `--immediate_commit none` | ...только OSD с этим или большим immediate_commit (none < small < all) |
|
||||||
|
| `--level_placement <rules>` | Задать правила дополнительных доменов отказа (пример: "dc=112233") |
|
||||||
|
| `--raw_placement <rules>` | Задать низкоуровневые правила генерации PG ([детали](../config/pool.ru.md#raw_placement)) |
|
||||||
| `--primary_affinity_tags tags` | Предпочитать OSD со всеми данными тегами для роли первичных |
|
| `--primary_affinity_tags tags` | Предпочитать OSD со всеми данными тегами для роли первичных |
|
||||||
| `--scrub_interval <time>` | Включить скрабы с заданным интервалом времени (число + единица s/m/h/d/M/y) |
|
| `--scrub_interval <time>` | Включить скрабы с заданным интервалом времени (число + единица s/m/h/d/M/y) |
|
||||||
| `--pg_stripe_size <number>` | Увеличить блок группировки объектов по PG |
|
| `--pg_stripe_size <number>` | Увеличить блок группировки объектов по PG |
|
||||||
|
|
|
@ -0,0 +1,408 @@
|
||||||
|
const { select_murmur3 } = require('./murmur3.js');
|
||||||
|
|
||||||
|
const NO_OSD = 'Z';
|
||||||
|
|
||||||
|
class RuleCombinator
|
||||||
|
{
|
||||||
|
constructor(osd_tree, rules, max_combinations, ordered)
|
||||||
|
{
|
||||||
|
this.osd_tree = index_tree(Object.values(osd_tree).filter(o => o.id));
|
||||||
|
this.rules = rules;
|
||||||
|
this.max_combinations = max_combinations;
|
||||||
|
this.ordered = ordered;
|
||||||
|
}
|
||||||
|
|
||||||
|
random_combinations()
|
||||||
|
{
|
||||||
|
return random_custom_combinations(this.osd_tree, this.rules, this.max_combinations, this.ordered);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_combinations(pgs)
|
||||||
|
{
|
||||||
|
return check_custom_combinations(this.osd_tree, this.rules, pgs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert alternative "level-index" format to rules
|
||||||
|
// level_index = { [level: string]: string | string[] }
|
||||||
|
// level_sequence = optional, levels from upper to lower, i.e. [ 'dc', 'host' ]
|
||||||
|
// Example: level_index = { dc: "112233", host: "ABCDEF" }
|
||||||
|
function parse_level_indexes(level_index, level_sequence)
|
||||||
|
{
|
||||||
|
const rules = [];
|
||||||
|
const lvl_first = {};
|
||||||
|
for (const level in level_index)
|
||||||
|
{
|
||||||
|
const idx = level_index[level];
|
||||||
|
while (rules.length < idx.length)
|
||||||
|
{
|
||||||
|
rules.push([]);
|
||||||
|
}
|
||||||
|
const seen = {};
|
||||||
|
for (let i = 0; i < idx.length; i++)
|
||||||
|
{
|
||||||
|
if (!seen[idx[i]])
|
||||||
|
{
|
||||||
|
const other = Object.values(seen);
|
||||||
|
if (other.length)
|
||||||
|
{
|
||||||
|
rules[i].push([ level, '!=', other ]);
|
||||||
|
}
|
||||||
|
seen[idx[i]] = i+1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rules[i].push([ level, '=', seen[idx[i]] ]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lvl_first[level] = seen;
|
||||||
|
}
|
||||||
|
if (level_sequence)
|
||||||
|
{
|
||||||
|
// Prune useless rules for the sake of prettiness
|
||||||
|
// For simplicity, call "upper" level DC and "lower" level host
|
||||||
|
const level_prio = Object.keys(level_sequence).reduce((a, c) => { a[level_sequence[c]] = c; return a; }, {});
|
||||||
|
for (let upper_i = 0; upper_i < level_sequence.length-1; upper_i++)
|
||||||
|
{
|
||||||
|
const upper_level = level_sequence[upper_i];
|
||||||
|
for (let i = 0; i < rules.length; i++)
|
||||||
|
{
|
||||||
|
const noteq = {};
|
||||||
|
for (let k = 0; k < level_index[upper_level].length; k++)
|
||||||
|
{
|
||||||
|
// If upper_level[x] is different from upper_level[y]
|
||||||
|
// then lower_level[x] is also different from lower_level[y]
|
||||||
|
if (level_index[upper_level][k] != level_index[upper_level][i])
|
||||||
|
{
|
||||||
|
noteq[k+1] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let j = 0; j < rules[i].length; j++)
|
||||||
|
{
|
||||||
|
if (level_prio[rules[i][j][0]] != null && level_prio[rules[i][j][0]] > upper_i && rules[i][j][1] == '!=')
|
||||||
|
{
|
||||||
|
rules[i][j][2] = rules[i][j][2].filter(other_host => !noteq[other_host]);
|
||||||
|
if (!rules[i][j][2].length)
|
||||||
|
{
|
||||||
|
rules[i].splice(j--, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse rules in DSL format
|
||||||
|
// dsl := item | item ("\n" | ",") items
|
||||||
|
// item := "any" | rules
|
||||||
|
// rules := rule | rule rules
|
||||||
|
// rule := level operator arg
|
||||||
|
// level := /\w+/
|
||||||
|
// operator := "!=" | "=" | ">" | "?="
|
||||||
|
// arg := value | "(" values ")"
|
||||||
|
// values := value | value "," values
|
||||||
|
// value := item_ref | constant_id
|
||||||
|
// item_ref := /\d+/
|
||||||
|
// constant_id := /"([^"]+)"/
|
||||||
|
//
|
||||||
|
// Output: [ level, operator, value ][][]
|
||||||
|
function parse_pg_dsl(text)
|
||||||
|
{
|
||||||
|
const tokens = [ ...text.matchAll(/\w+|!=|\?=|[>=\(\),\n]|"([^\"]+)"/g) ].map(t => [ t[0], t.index ]);
|
||||||
|
let positions = [ [] ];
|
||||||
|
let rules = positions[0];
|
||||||
|
for (let i = 0; i < tokens.length; )
|
||||||
|
{
|
||||||
|
if (tokens[i][0] === '\n' || tokens[i][0] === ',')
|
||||||
|
{
|
||||||
|
rules = [];
|
||||||
|
positions.push(rules);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (!rules.length && tokens[i][0] === 'any' && (i == tokens.length-1 || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n'))
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!/^\w/.exec(tokens[i][0]))
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (level name expected)');
|
||||||
|
}
|
||||||
|
if (i > tokens.length-3)
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected EOF (operator and value expected)');
|
||||||
|
}
|
||||||
|
if (/^\w/.exec(tokens[i+1][0]) || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n')
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected '+tokens[i+1][0]+' at '+tokens[i+1][1]+' (operator expected)');
|
||||||
|
}
|
||||||
|
if (!/^[\w"(]/.exec(tokens[i+2][0])) // "
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected '+tokens[i+2][0]+' at '+tokens[i+2][1]+' (id, round brace, number or node ID expected)');
|
||||||
|
}
|
||||||
|
let rule = [ tokens[i][0], tokens[i+1][0], tokens[i+2][0] ];
|
||||||
|
i += 3;
|
||||||
|
if (rule[2][0] == '"')
|
||||||
|
{
|
||||||
|
rule[2] = { id: rule[2].substr(1, rule[2].length-2) };
|
||||||
|
}
|
||||||
|
else if (rule[2] === '(')
|
||||||
|
{
|
||||||
|
rule[2] = [];
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (i > tokens.length-1)
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected EOF (expected list and a closing round brace)');
|
||||||
|
}
|
||||||
|
if (tokens[i][0] === ',')
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (tokens[i][0] === ')')
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (tokens[i][0][0] === '"')
|
||||||
|
{
|
||||||
|
rule[2].push({ id: tokens[i][0].substr(1, tokens[i][0].length-2) });
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (/^\d+$/.exec(tokens[i][0]))
|
||||||
|
{
|
||||||
|
const n = 0|tokens[i][0];
|
||||||
|
if (!n)
|
||||||
|
{
|
||||||
|
throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i][1]);
|
||||||
|
}
|
||||||
|
else if (n > positions.length)
|
||||||
|
{
|
||||||
|
throw new Error('Forward references are forbidden at '+tokens[i][1]);
|
||||||
|
}
|
||||||
|
rule[2].push(n);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (!/^\w/.exec(tokens[i][0]))
|
||||||
|
{
|
||||||
|
throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (number or node ID expected)');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rule[2].push({ id: tokens[i][0] });
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!/^\d+$/.exec(rule[2]))
|
||||||
|
{
|
||||||
|
rule[2] = { id: rule[2] };
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rule[2] = 0|rule[2];
|
||||||
|
if (!rule[2])
|
||||||
|
{
|
||||||
|
throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i-1][1]);
|
||||||
|
}
|
||||||
|
else if (rule[2] > positions.length)
|
||||||
|
{
|
||||||
|
throw new Error('Forward references are forbidden at '+tokens[i-1][1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rules.push(rule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return positions;
|
||||||
|
}
|
||||||
|
|
||||||
|
// osd_tree = index_tree() output
|
||||||
|
// levels = { string: number }
|
||||||
|
// rules = [ level, operator, value ][][]
|
||||||
|
// level = string
|
||||||
|
// operator = '=' | '!=' | '>' | '?='
|
||||||
|
// value = number|number[] | { id: string|string[] }
|
||||||
|
// examples:
|
||||||
|
// 1) simple 3 replicas with failure_domain=host:
|
||||||
|
// [ [], [ [ 'host', '!=', 1 ] ], [ [ 'host', '!=', [ 1, 2 ] ] ] ]
|
||||||
|
// in DSL form: any, host!=1, host!=(1,2)
|
||||||
|
// 2) EC 4+2 in 3 DC:
|
||||||
|
// [ [], [ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
|
||||||
|
// [ 'dc', '!=', 1 ], [ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
|
||||||
|
// [ 'dc', '!=', [ 1, 3 ] ], [ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ] ]
|
||||||
|
// in DSL form: any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5
|
||||||
|
// 3) 1 replica in fixed DC + 2 in random DCs:
|
||||||
|
// [ [ [ 'dc', '=', { id: 'meow' } ] ], [ [ 'dc', '!=', 1 ] ], [ [ 'dc', '!=', [ 1, 2 ] ] ] ]
|
||||||
|
// in DSL form: dc=meow, dc!=1, dc!=(1,2)
|
||||||
|
// 4) 2 replicas in each DC (almost the same as (2)):
|
||||||
|
// DSL: any, dc=1 host!=1, dc!=1, dc=3 host!=3
|
||||||
|
// Alternative simpler way to specify rules would be: [ DC: 112233 HOST: 123456 ]
|
||||||
|
function random_custom_combinations(osd_tree, rules, count, ordered)
|
||||||
|
{
|
||||||
|
const r = {};
|
||||||
|
const first = filter_tree_by_rules(osd_tree, rules[0], []);
|
||||||
|
let max_size = 0;
|
||||||
|
// All combinations for the first item (usually "any") to try to include each OSD at least once
|
||||||
|
for (const f of first)
|
||||||
|
{
|
||||||
|
const selected = [ f ];
|
||||||
|
for (let i = 1; i < rules.length; i++)
|
||||||
|
{
|
||||||
|
const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
|
||||||
|
const idx = select_murmur3(filtered.length, i => 'p:'+f.id+':'+filtered[i].id);
|
||||||
|
selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
|
||||||
|
}
|
||||||
|
const size = selected.filter(s => s.id !== null).length;
|
||||||
|
max_size = max_size < size ? size : max_size;
|
||||||
|
const pg = selected.map(s => s.id === null ? NO_OSD : (0|s.id));
|
||||||
|
if (!ordered)
|
||||||
|
pg.sort();
|
||||||
|
r['pg_'+pg.join('_')] = pg;
|
||||||
|
}
|
||||||
|
// Pseudo-random selection
|
||||||
|
for (let n = 0; n < count; n++)
|
||||||
|
{
|
||||||
|
const selected = [];
|
||||||
|
for (const item_rules of rules)
|
||||||
|
{
|
||||||
|
const filtered = selected.length ? filter_tree_by_rules(osd_tree, item_rules, selected) : first;
|
||||||
|
const idx = select_murmur3(filtered.length, i => n+':'+filtered[i].id);
|
||||||
|
selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
|
||||||
|
}
|
||||||
|
const size = selected.filter(s => s.id !== null).length;
|
||||||
|
max_size = max_size < size ? size : max_size;
|
||||||
|
const pg = selected.map(s => s.id === null ? NO_OSD : (0|s.id));
|
||||||
|
if (!ordered)
|
||||||
|
pg.sort();
|
||||||
|
r['pg_'+pg.join('_')] = pg;
|
||||||
|
}
|
||||||
|
// Exclude PGs with less successful selections than maximum
|
||||||
|
for (const k in r)
|
||||||
|
{
|
||||||
|
if (r[k].filter(s => s !== NO_OSD).length < max_size)
|
||||||
|
{
|
||||||
|
delete r[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
function filter_tree_by_rules(osd_tree, rules, selected)
|
||||||
|
{
|
||||||
|
let cur = osd_tree[''].children;
|
||||||
|
for (const rule of rules)
|
||||||
|
{
|
||||||
|
const val = (rule[2] instanceof Array ? rule[2] : [ rule[2] ])
|
||||||
|
.map(v => v instanceof Object ? v.id : selected[v-1].levels[rule[0]]);
|
||||||
|
let preferred = [], other = [];
|
||||||
|
for (let i = 0; i < cur.length; i++)
|
||||||
|
{
|
||||||
|
const item = cur[i];
|
||||||
|
const level_id = item.levels[rule[0]];
|
||||||
|
if (level_id)
|
||||||
|
{
|
||||||
|
if (rule[1] == '>' && val.filter(v => level_id <= v).length == 0 ||
|
||||||
|
(rule[1] == '=' || rule[1] == '?=') && val.filter(v => level_id != v).length == 0 ||
|
||||||
|
rule[1] == '!=' && val.filter(v => level_id == v).length == 0)
|
||||||
|
{
|
||||||
|
// Include
|
||||||
|
preferred.push(item);
|
||||||
|
}
|
||||||
|
else if (rule[1] == '?=' && val.filter(v => level_id != v).length > 0)
|
||||||
|
{
|
||||||
|
// Non-preferred
|
||||||
|
other.push(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (item.children)
|
||||||
|
{
|
||||||
|
// Descend
|
||||||
|
cur.splice(i+1, 0, ...item.children);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur = preferred.length ? preferred : other;
|
||||||
|
}
|
||||||
|
// Get leaf items
|
||||||
|
for (let i = 0; i < cur.length; i++)
|
||||||
|
{
|
||||||
|
if (cur[i].children)
|
||||||
|
{
|
||||||
|
// Descend
|
||||||
|
cur.splice(i, 1, ...cur[i].children);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert from
|
||||||
|
// node_list = { id: string|number, level: string, size?: number, parent?: string|number }[]
|
||||||
|
// to
|
||||||
|
// node_tree = { [node_id]: { id, level, size?, parent?, children?: child_node_id[], levels: { [level]: id, ... } } }
|
||||||
|
function index_tree(node_list)
|
||||||
|
{
|
||||||
|
const tree = { '': { children: [], levels: {} } };
|
||||||
|
for (const node of node_list)
|
||||||
|
{
|
||||||
|
tree[node.id] = { ...node, levels: {} };
|
||||||
|
delete tree[node.id].children;
|
||||||
|
}
|
||||||
|
for (const node of node_list)
|
||||||
|
{
|
||||||
|
const parent_id = node.parent && tree[node.parent] ? node.parent : '';
|
||||||
|
tree[parent_id].children = tree[parent_id].children || [];
|
||||||
|
tree[parent_id].children.push(tree[node.id]);
|
||||||
|
}
|
||||||
|
const cur = tree[''].children;
|
||||||
|
for (let i = 0; i < cur.length; i++)
|
||||||
|
{
|
||||||
|
cur[i].levels[cur[i].level] = cur[i].id;
|
||||||
|
if (cur[i].children)
|
||||||
|
{
|
||||||
|
for (const child of cur[i].children)
|
||||||
|
{
|
||||||
|
child.levels = { ...cur[i].levels, ...child.levels };
|
||||||
|
}
|
||||||
|
cur.splice(i, 1, ...cur[i].children);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
// selection = id[]
|
||||||
|
// osd_tree = index_tree output
|
||||||
|
// rules = parse_pg_dsl output
|
||||||
|
function check_custom_combinations(osd_tree, rules, pgs)
|
||||||
|
{
|
||||||
|
const res = [];
|
||||||
|
skip_pg: for (const pg of pgs)
|
||||||
|
{
|
||||||
|
let selected = pg.map(id => osd_tree[id] || null);
|
||||||
|
for (let i = 0; i < rules.length; i++)
|
||||||
|
{
|
||||||
|
const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
|
||||||
|
if (selected[i] === null && filtered.length ||
|
||||||
|
!filtered.filter(ok => selected[i].id === ok.id).length)
|
||||||
|
{
|
||||||
|
continue skip_pg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.push(pg);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
RuleCombinator,
|
||||||
|
NO_OSD,
|
||||||
|
|
||||||
|
index_tree,
|
||||||
|
parse_level_indexes,
|
||||||
|
parse_pg_dsl,
|
||||||
|
random_custom_combinations,
|
||||||
|
check_custom_combinations,
|
||||||
|
};
|
|
@ -50,15 +50,15 @@ async function lp_solve(text)
|
||||||
return { score, vars };
|
return { score, vars };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
|
// osd_weights = { [id]: weight }
|
||||||
|
async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
|
||||||
{
|
{
|
||||||
if (!pg_count || !osd_tree)
|
if (!pg_count || !osd_weights)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const all_weights = Object.assign({}, ...Object.values(osd_tree));
|
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
||||||
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
const all_pgs = Object.values(make_cyclic(combinator.random_combinations(), parity_space));
|
||||||
const all_pgs = Object.values(random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1));
|
|
||||||
const pg_per_osd = {};
|
const pg_per_osd = {};
|
||||||
for (const pg of all_pgs)
|
for (const pg of all_pgs)
|
||||||
{
|
{
|
||||||
|
@ -69,15 +69,15 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
|
||||||
pg_per_osd[osd].push((i >= pg_minsize ? parity_space+'*' : '')+"pg_"+pg.join("_"));
|
pg_per_osd[osd].push((i >= pg_minsize ? parity_space+'*' : '')+"pg_"+pg.join("_"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
|
let pg_effsize = all_pgs.reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
|
||||||
+ Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
|
pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
|
||||||
let lp = '';
|
let lp = '';
|
||||||
lp += "max: "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(' + ')+";\n";
|
lp += "max: "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(' + ')+";\n";
|
||||||
for (const osd in pg_per_osd)
|
for (const osd in pg_per_osd)
|
||||||
{
|
{
|
||||||
if (osd !== NO_OSD)
|
if (osd !== NO_OSD)
|
||||||
{
|
{
|
||||||
let osd_pg_count = all_weights[osd]/total_weight*pg_effsize*pg_count;
|
let osd_pg_count = osd_weights[osd]/total_weight*pg_effsize*pg_count;
|
||||||
lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
|
lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
|
||||||
throw new Error('Problem is infeasible or unbounded - is it a bug?');
|
throw new Error('Problem is infeasible or unbounded - is it a bug?');
|
||||||
}
|
}
|
||||||
const int_pgs = make_int_pgs(lp_result.vars, pg_count, ordered);
|
const int_pgs = make_int_pgs(lp_result.vars, pg_count, ordered);
|
||||||
const eff = pg_list_space_efficiency(int_pgs, all_weights, pg_minsize, parity_space);
|
const eff = pg_list_space_efficiency(int_pgs, osd_weights, pg_minsize, parity_space);
|
||||||
const res = {
|
const res = {
|
||||||
score: lp_result.score,
|
score: lp_result.score,
|
||||||
weights: lp_result.vars,
|
weights: lp_result.vars,
|
||||||
|
@ -104,6 +104,22 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function make_cyclic(pgs, parity_space)
|
||||||
|
{
|
||||||
|
if (parity_space > 1)
|
||||||
|
{
|
||||||
|
for (const pg in pgs)
|
||||||
|
{
|
||||||
|
for (let i = 1; i < pg.size; i++)
|
||||||
|
{
|
||||||
|
const cyclic = [ ...pg.slice(i), ...pg.slice(0, i) ];
|
||||||
|
pgs['pg_'+cyclic.join('_')] = cyclic;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pgs;
|
||||||
|
}
|
||||||
|
|
||||||
function shuffle(array)
|
function shuffle(array)
|
||||||
{
|
{
|
||||||
for (let i = array.length - 1, j, x; i > 0; i--)
|
for (let i = array.length - 1, j, x; i > 0; i--)
|
||||||
|
@ -216,47 +232,17 @@ function calc_intersect_weights(old_pg_size, pg_size, pg_count, prev_weights, al
|
||||||
return move_weights;
|
return move_weights;
|
||||||
}
|
}
|
||||||
|
|
||||||
function add_valid_previous(osd_tree, prev_weights, all_pgs)
|
|
||||||
{
|
|
||||||
// Add previous combinations that are still valid
|
|
||||||
const hosts = Object.keys(osd_tree).sort();
|
|
||||||
const host_per_osd = {};
|
|
||||||
for (const host in osd_tree)
|
|
||||||
{
|
|
||||||
for (const osd in osd_tree[host])
|
|
||||||
{
|
|
||||||
host_per_osd[osd] = host;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
skip_pg: for (const pg_name in prev_weights)
|
|
||||||
{
|
|
||||||
const seen_hosts = {};
|
|
||||||
const pg = pg_name.substr(3).split(/_/);
|
|
||||||
for (const osd of pg)
|
|
||||||
{
|
|
||||||
if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
|
|
||||||
{
|
|
||||||
continue skip_pg;
|
|
||||||
}
|
|
||||||
seen_hosts[host_per_osd[osd]] = true;
|
|
||||||
}
|
|
||||||
if (!all_pgs[pg_name])
|
|
||||||
{
|
|
||||||
all_pgs[pg_name] = pg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to minimize data movement
|
// Try to minimize data movement
|
||||||
async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
|
async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
|
||||||
{
|
{
|
||||||
if (!osd_tree)
|
if (!osd_weights)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// FIXME: use parity_chunks with parity_space instead of pg_minsize
|
// FIXME: use parity_chunks with parity_space instead of pg_minsize
|
||||||
const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
|
let all_pgs = make_cyclic(combinator.random_combinations(), parity_space);
|
||||||
+ Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
|
let pg_effsize = Object.values(all_pgs).reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
|
||||||
|
pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
|
||||||
const pg_count = prev_int_pgs.length;
|
const pg_count = prev_int_pgs.length;
|
||||||
const prev_weights = {};
|
const prev_weights = {};
|
||||||
const prev_pg_per_osd = {};
|
const prev_pg_per_osd = {};
|
||||||
|
@ -273,10 +259,13 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
|
||||||
}
|
}
|
||||||
const old_pg_size = prev_int_pgs[0].length;
|
const old_pg_size = prev_int_pgs[0].length;
|
||||||
// Get all combinations
|
// Get all combinations
|
||||||
let all_pgs = random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1);
|
|
||||||
if (old_pg_size == pg_size)
|
if (old_pg_size == pg_size)
|
||||||
{
|
{
|
||||||
add_valid_previous(osd_tree, prev_weights, all_pgs);
|
const still_valid = combinator.check_combinations(Object.keys(prev_weights).map(pg_name => pg_name.substr(3).split('_')));
|
||||||
|
for (const pg of still_valid)
|
||||||
|
{
|
||||||
|
all_pgs['pg_'+pg.join('_')] = pg;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
all_pgs = Object.values(all_pgs);
|
all_pgs = Object.values(all_pgs);
|
||||||
const pg_per_osd = {};
|
const pg_per_osd = {};
|
||||||
|
@ -295,8 +284,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
|
||||||
// Calculate total weight - old PG weights
|
// Calculate total weight - old PG weights
|
||||||
const all_pg_names = all_pgs.map(pg => 'pg_'+pg.join('_'));
|
const all_pg_names = all_pgs.map(pg => 'pg_'+pg.join('_'));
|
||||||
const all_pgs_hash = all_pg_names.reduce((a, c) => { a[c] = true; return a; }, {});
|
const all_pgs_hash = all_pg_names.reduce((a, c) => { a[c] = true; return a; }, {});
|
||||||
const all_weights = Object.assign({}, ...Object.values(osd_tree));
|
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
||||||
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
|
||||||
// Generate the LP problem
|
// Generate the LP problem
|
||||||
let lp = '';
|
let lp = '';
|
||||||
lp += 'max: '+all_pg_names.map(pg_name => (
|
lp += 'max: '+all_pg_names.map(pg_name => (
|
||||||
|
@ -311,7 +299,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
|
||||||
)).join(' + ');
|
)).join(' + ');
|
||||||
const rm_osd_pg_count = (prev_pg_per_osd[osd]||[])
|
const rm_osd_pg_count = (prev_pg_per_osd[osd]||[])
|
||||||
.reduce((a, [ old_pg_name, space ]) => (a + (all_pgs_hash[old_pg_name] ? space : 0)), 0);
|
.reduce((a, [ old_pg_name, space ]) => (a + (all_pgs_hash[old_pg_name] ? space : 0)), 0);
|
||||||
const osd_pg_count = all_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
|
const osd_pg_count = osd_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
|
||||||
lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
|
lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -421,7 +409,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
|
||||||
int_pgs: new_pgs,
|
int_pgs: new_pgs,
|
||||||
differs,
|
differs,
|
||||||
osd_differs,
|
osd_differs,
|
||||||
space: pg_effsize * pg_list_space_efficiency(new_pgs, all_weights, pg_minsize, parity_space),
|
space: pg_effsize * pg_list_space_efficiency(new_pgs, osd_weights, pg_minsize, parity_space),
|
||||||
total_space: total_weight,
|
total_space: total_weight,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -502,198 +490,6 @@ function put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, keygen)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
|
|
||||||
// levels = { string: number }
|
|
||||||
// to a two-level osd_tree suitable for all_combinations()
|
|
||||||
function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
|
|
||||||
{
|
|
||||||
osd_level = levels[osd_level] || osd_level;
|
|
||||||
failure_domain_level = levels[failure_domain_level] || failure_domain_level;
|
|
||||||
for (const node of osd_tree)
|
|
||||||
{
|
|
||||||
if ((levels[node.level] || node.level) < failure_domain_level)
|
|
||||||
{
|
|
||||||
flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return domains;
|
|
||||||
}
|
|
||||||
|
|
||||||
function extract_osds(osd_tree, levels, osd_level, osds = {})
|
|
||||||
{
|
|
||||||
for (const node of osd_tree)
|
|
||||||
{
|
|
||||||
if ((levels[node.level] || node.level) >= osd_level)
|
|
||||||
{
|
|
||||||
osds[node.id] = node.size;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
extract_osds(node.children||[], levels, osd_level, osds);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return osds;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ordered = don't treat (x,y) and (y,x) as equal
|
|
||||||
function random_combinations(osd_tree, pg_size, count, ordered)
|
|
||||||
{
|
|
||||||
let seed = 0x5f020e43;
|
|
||||||
let rng = () =>
|
|
||||||
{
|
|
||||||
seed ^= seed << 13;
|
|
||||||
seed ^= seed >> 17;
|
|
||||||
seed ^= seed << 5;
|
|
||||||
return seed + 2147483648;
|
|
||||||
};
|
|
||||||
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
|
||||||
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
|
|
||||||
const r = {};
|
|
||||||
// Generate random combinations including each OSD at least once
|
|
||||||
for (let h = 0; h < hosts.length; h++)
|
|
||||||
{
|
|
||||||
for (let o = 0; o < osds[hosts[h]].length; o++)
|
|
||||||
{
|
|
||||||
const pg = [ osds[hosts[h]][o] ];
|
|
||||||
const cur_hosts = [ ...hosts ];
|
|
||||||
cur_hosts.splice(h, 1);
|
|
||||||
for (let i = 1; i < pg_size && i < hosts.length; i++)
|
|
||||||
{
|
|
||||||
const next_host = rng() % cur_hosts.length;
|
|
||||||
const next_osd = rng() % osds[cur_hosts[next_host]].length;
|
|
||||||
pg.push(osds[cur_hosts[next_host]][next_osd]);
|
|
||||||
cur_hosts.splice(next_host, 1);
|
|
||||||
}
|
|
||||||
const cyclic_pgs = [ pg ];
|
|
||||||
if (ordered)
|
|
||||||
{
|
|
||||||
for (let i = 1; i < pg.size; i++)
|
|
||||||
{
|
|
||||||
cyclic_pgs.push([ ...pg.slice(i), ...pg.slice(0, i) ]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (const pg of cyclic_pgs)
|
|
||||||
{
|
|
||||||
while (pg.length < pg_size)
|
|
||||||
{
|
|
||||||
pg.push(NO_OSD);
|
|
||||||
}
|
|
||||||
r['pg_'+pg.join('_')] = pg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Generate purely random combinations
|
|
||||||
while (count > 0)
|
|
||||||
{
|
|
||||||
let host_idx = [];
|
|
||||||
const cur_hosts = [ ...hosts.map((h, i) => i) ];
|
|
||||||
const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
|
|
||||||
if (ordered)
|
|
||||||
{
|
|
||||||
for (let i = 0; i < max_hosts; i++)
|
|
||||||
{
|
|
||||||
const r = rng() % cur_hosts.length;
|
|
||||||
host_idx[i] = cur_hosts[r];
|
|
||||||
cur_hosts.splice(r, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (let i = 0; i < max_hosts; i++)
|
|
||||||
{
|
|
||||||
const r = rng() % (cur_hosts.length - (max_hosts - i - 1));
|
|
||||||
host_idx[i] = cur_hosts[r];
|
|
||||||
cur_hosts.splice(0, r+1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let pg = host_idx.map(h => osds[hosts[h]][rng() % osds[hosts[h]].length]);
|
|
||||||
while (pg.length < pg_size)
|
|
||||||
{
|
|
||||||
pg.push(NO_OSD);
|
|
||||||
}
|
|
||||||
r['pg_'+pg.join('_')] = pg;
|
|
||||||
count--;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
|
|
||||||
// osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
|
|
||||||
// ordered = return combinations without duplicates having different order
|
|
||||||
function all_combinations(osd_tree, pg_size, ordered, count)
|
|
||||||
{
|
|
||||||
const hosts = Object.keys(osd_tree).sort();
|
|
||||||
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
|
||||||
while (hosts.length < pg_size)
|
|
||||||
{
|
|
||||||
osds[NO_OSD] = [ NO_OSD ];
|
|
||||||
hosts.push(NO_OSD);
|
|
||||||
}
|
|
||||||
let host_idx = [];
|
|
||||||
let osd_idx = [];
|
|
||||||
for (let i = 0; i < pg_size; i++)
|
|
||||||
{
|
|
||||||
host_idx.push(i);
|
|
||||||
osd_idx.push(0);
|
|
||||||
}
|
|
||||||
const r = [];
|
|
||||||
while (!count || count < 0 || r.length < count)
|
|
||||||
{
|
|
||||||
r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
|
|
||||||
let inc = pg_size-1;
|
|
||||||
while (inc >= 0)
|
|
||||||
{
|
|
||||||
osd_idx[inc]++;
|
|
||||||
if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
|
|
||||||
{
|
|
||||||
osd_idx[inc] = 0;
|
|
||||||
inc--;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inc < 0)
|
|
||||||
{
|
|
||||||
// no osds left in the current host combination, select the next one
|
|
||||||
inc = pg_size-1;
|
|
||||||
same_again: while (inc >= 0)
|
|
||||||
{
|
|
||||||
host_idx[inc]++;
|
|
||||||
for (let prev_host = 0; prev_host < inc; prev_host++)
|
|
||||||
{
|
|
||||||
if (host_idx[prev_host] == host_idx[inc])
|
|
||||||
{
|
|
||||||
continue same_again;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
|
|
||||||
{
|
|
||||||
while ((++inc) < pg_size)
|
|
||||||
{
|
|
||||||
host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
inc--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inc < 0)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
function pg_weights_space_efficiency(weights, pg_count, osd_sizes)
|
function pg_weights_space_efficiency(weights, pg_count, osd_sizes)
|
||||||
{
|
{
|
||||||
const per_osd = {};
|
const per_osd = {};
|
||||||
|
@ -752,11 +548,8 @@ module.exports = {
|
||||||
pg_weights_space_efficiency,
|
pg_weights_space_efficiency,
|
||||||
pg_list_space_efficiency,
|
pg_list_space_efficiency,
|
||||||
pg_per_osd_space_efficiency,
|
pg_per_osd_space_efficiency,
|
||||||
flatten_tree,
|
|
||||||
|
|
||||||
lp_solve,
|
lp_solve,
|
||||||
make_int_pgs,
|
make_int_pgs,
|
||||||
align_pgs,
|
align_pgs,
|
||||||
random_combinations,
|
|
||||||
all_combinations,
|
|
||||||
};
|
};
|
||||||
|
|
159
mon/mon.js
159
mon/mon.js
|
@ -6,6 +6,8 @@ const http = require('http');
|
||||||
const crypto = require('crypto');
|
const crypto = require('crypto');
|
||||||
const os = require('os');
|
const os = require('os');
|
||||||
const WebSocket = require('ws');
|
const WebSocket = require('ws');
|
||||||
|
const { RuleCombinator, parse_level_indexes, parse_pg_dsl } = require('./dsl_pgs.js');
|
||||||
|
const { SimpleCombinator, flatten_tree } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
const stableStringify = require('./stable-stringify.js');
|
const stableStringify = require('./stable-stringify.js');
|
||||||
const PGUtil = require('./PGUtil.js');
|
const PGUtil = require('./PGUtil.js');
|
||||||
|
@ -63,6 +65,7 @@ const etcd_tree = {
|
||||||
mon_stats_timeout: 1000, // ms. min: 100
|
mon_stats_timeout: 1000, // ms. min: 100
|
||||||
osd_out_time: 600, // seconds. min: 0
|
osd_out_time: 600, // seconds. min: 0
|
||||||
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
||||||
|
use_old_pg_combinator: false,
|
||||||
// client and osd
|
// client and osd
|
||||||
tcp_header_buffer_size: 65536,
|
tcp_header_buffer_size: 65536,
|
||||||
use_sync_send_recv: false,
|
use_sync_send_recv: false,
|
||||||
|
@ -185,7 +188,12 @@ const etcd_tree = {
|
||||||
// number of parity chunks, required for EC
|
// number of parity chunks, required for EC
|
||||||
parity_chunks?: 1,
|
parity_chunks?: 1,
|
||||||
pg_count: 100,
|
pg_count: 100,
|
||||||
failure_domain: 'host',
|
// default is failure_domain=host
|
||||||
|
failure_domain?: 'host',
|
||||||
|
// additional failure domain rules; failure_domain=x is equivalent to x=123..N
|
||||||
|
level_placement?: 'dc=112233 host=123456',
|
||||||
|
raw_placement?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
|
||||||
|
old_combinator: false,
|
||||||
max_osd_combinations: 10000,
|
max_osd_combinations: 10000,
|
||||||
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
|
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
|
||||||
block_size: 131072,
|
block_size: 131072,
|
||||||
|
@ -930,7 +938,6 @@ class Mon
|
||||||
// Parent's level must be less than child's; OSDs must be leaves
|
// Parent's level must be less than child's; OSDs must be leaves
|
||||||
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
|
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
|
||||||
tree[parent].children.push(tree[node_id]);
|
tree[parent].children.push(tree[node_id]);
|
||||||
delete node_cfg.parent;
|
|
||||||
}
|
}
|
||||||
return { up_osds, levels, osd_tree: tree };
|
return { up_osds, levels, osd_tree: tree };
|
||||||
}
|
}
|
||||||
|
@ -1096,7 +1103,6 @@ class Mon
|
||||||
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
|
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
|
||||||
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
|
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
|
||||||
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
|
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
|
||||||
pool_cfg.failure_domain = pool_cfg.failure_domain || 'host';
|
|
||||||
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
|
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
|
||||||
if (!/^[1-9]\d*$/.exec(''+pool_id))
|
if (!/^[1-9]\d*$/.exec(''+pool_id))
|
||||||
{
|
{
|
||||||
|
@ -1176,10 +1182,32 @@ class Mon
|
||||||
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
|
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!this.get_pg_rules(pool_id, pool_cfg, true))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_osds_by_tags(orig_tree, flat_tree, tags)
|
filter_osds_by_root_node(pool_tree, root_node)
|
||||||
|
{
|
||||||
|
if (!root_node)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pool_tree = pool_tree[pool_cfg.root_node];
|
||||||
|
const cur = [ ...(pool_tree||{}).children||[] ];
|
||||||
|
for (let i = 0; i < cur.length; i++)
|
||||||
|
{
|
||||||
|
if (cur.children)
|
||||||
|
{
|
||||||
|
cur.splice(i+1, 1, ...cur.children);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_osds_by_tags(orig_tree, tags)
|
||||||
{
|
{
|
||||||
if (!tags)
|
if (!tags)
|
||||||
{
|
{
|
||||||
|
@ -1187,30 +1215,22 @@ class Mon
|
||||||
}
|
}
|
||||||
for (const tag of (tags instanceof Array ? tags : [ tags ]))
|
for (const tag of (tags instanceof Array ? tags : [ tags ]))
|
||||||
{
|
{
|
||||||
for (const host in flat_tree)
|
for (const osd in orig_tree)
|
||||||
{
|
{
|
||||||
let found = 0;
|
if (orig_tree[osd].level === 'osd' &&
|
||||||
for (const osd in flat_tree[host])
|
(!orig_tree[osd].tags || !orig_tree[osd].tags[tag]))
|
||||||
{
|
{
|
||||||
if (!orig_tree[osd].tags || !orig_tree[osd].tags[tag])
|
delete orig_tree[osd];
|
||||||
delete flat_tree[host][osd];
|
|
||||||
else
|
|
||||||
found++;
|
|
||||||
}
|
|
||||||
if (!found)
|
|
||||||
{
|
|
||||||
delete flat_tree[host];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_osds_by_block_layout(flat_tree, block_size, bitmap_granularity, immediate_commit)
|
filter_osds_by_block_layout(orig_tree, block_size, bitmap_granularity, immediate_commit)
|
||||||
{
|
{
|
||||||
for (const host in flat_tree)
|
for (const osd in orig_tree)
|
||||||
{
|
{
|
||||||
let found = 0;
|
if (orig_tree[osd].level === 'osd')
|
||||||
for (const osd in flat_tree[host])
|
|
||||||
{
|
{
|
||||||
const osd_stat = this.state.osd.stats[osd];
|
const osd_stat = this.state.osd.stats[osd];
|
||||||
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
||||||
|
@ -1218,16 +1238,8 @@ class Mon
|
||||||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
||||||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
||||||
{
|
{
|
||||||
delete flat_tree[host][osd];
|
delete orig_tree[host][osd];
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
found++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found)
|
|
||||||
{
|
|
||||||
delete flat_tree[host];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1237,12 +1249,84 @@ class Mon
|
||||||
let aff_osds = up_osds;
|
let aff_osds = up_osds;
|
||||||
if (pool_cfg.primary_affinity_tags)
|
if (pool_cfg.primary_affinity_tags)
|
||||||
{
|
{
|
||||||
aff_osds = { ...up_osds };
|
aff_osds = Object.keys(up_osds).reduce((a, c) => { a[c] = osd_tree[c]; return a; }, {});
|
||||||
this.filter_osds_by_tags(osd_tree, { x: aff_osds }, pool_cfg.primary_affinity_tags);
|
this.filter_osds_by_tags(aff_osds, pool_cfg.primary_affinity_tags);
|
||||||
|
for (const osd in aff_osds)
|
||||||
|
{
|
||||||
|
aff_osds[osd] = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return aff_osds;
|
return aff_osds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get_pg_rules(pool_id, pool_cfg, warn)
|
||||||
|
{
|
||||||
|
if (pool_cfg.level_placement)
|
||||||
|
{
|
||||||
|
const pg_size = (0|pool_cfg.pg_size);
|
||||||
|
let rules = pool_cfg.level_placement;
|
||||||
|
if (typeof rules === 'string')
|
||||||
|
{
|
||||||
|
rules = rules.split(/\s+/).map(s => s.split(/=/, 2)).reduce((a, c) => { a[c[0]] = c[1]; return a; }, {});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rules = { ...rules };
|
||||||
|
}
|
||||||
|
// Always add failure_domain to prevent rules from being totally incorrect
|
||||||
|
const all_diff = [];
|
||||||
|
for (let i = 1; i <= pg_size; i++)
|
||||||
|
{
|
||||||
|
all_diff.push(i);
|
||||||
|
}
|
||||||
|
rules[pool_cfg.failure_domain || 'host'] = all_diff;
|
||||||
|
const levels = this.config.placement_levels||{};
|
||||||
|
levels.host = levels.host || 100;
|
||||||
|
levels.osd = levels.osd || 101;
|
||||||
|
for (const k in rules)
|
||||||
|
{
|
||||||
|
if (!levels[k] || typeof rules[k] !== 'string' &&
|
||||||
|
(!rules[k] instanceof Array ||
|
||||||
|
rules[k].filter(s => typeof s !== 'string' && typeof s !== 'number').length > 0))
|
||||||
|
{
|
||||||
|
if (warn)
|
||||||
|
console.log('Pool '+pool_id+' configuration is invalid: level_placement should be { [level]: string | (string|number)[] }');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else if (rules[k].length != pg_size)
|
||||||
|
{
|
||||||
|
if (warn)
|
||||||
|
console.log('Pool '+pool_id+' configuration is invalid: values in level_placement should contain exactly pg_size ('+pg_size+') items');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parse_level_indexes(rules);
|
||||||
|
}
|
||||||
|
else if (typeof pool_cfg.raw_placement === 'string')
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
return parse_pg_dsl(pool_cfg.raw_placement);
|
||||||
|
}
|
||||||
|
catch (e)
|
||||||
|
{
|
||||||
|
if (warn)
|
||||||
|
console.log('Pool '+pool_id+' configuration is invalid: invalid raw_placement: '+e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
let rules = [ [] ];
|
||||||
|
let prev = [ 1 ];
|
||||||
|
for (let i = 1; i < pool_cfg.pg_size; i++)
|
||||||
|
{
|
||||||
|
rules.push([ [ pool_cfg.failure_domain||'host', '!=', prev ] ]);
|
||||||
|
prev = [ ...prev, i+1 ];
|
||||||
|
}
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async generate_pool_pgs(pool_id, osd_tree, levels)
|
async generate_pool_pgs(pool_id, osd_tree, levels)
|
||||||
{
|
{
|
||||||
const pool_cfg = this.state.config.pools[pool_id];
|
const pool_cfg = this.state.config.pools[pool_id];
|
||||||
|
@ -1250,10 +1334,9 @@ class Mon
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
let pool_tree = osd_tree[pool_cfg.root_node || ''];
|
let pool_tree = osd_tree;
|
||||||
pool_tree = pool_tree ? pool_tree.children : [];
|
this.filter_osds_by_root_node(pool_tree, pool_cfg.root_node);
|
||||||
pool_tree = LPOptimizer.flatten_tree(pool_tree, levels, pool_cfg.failure_domain, 'osd');
|
this.filter_osds_by_tags(pool_tree, pool_cfg.osd_tags);
|
||||||
this.filter_osds_by_tags(osd_tree, pool_tree, pool_cfg.osd_tags);
|
|
||||||
this.filter_osds_by_block_layout(
|
this.filter_osds_by_block_layout(
|
||||||
pool_tree,
|
pool_tree,
|
||||||
pool_cfg.block_size || this.config.block_size || 131072,
|
pool_cfg.block_size || this.config.block_size || 131072,
|
||||||
|
@ -1276,11 +1359,15 @@ class Mon
|
||||||
}
|
}
|
||||||
const old_pg_count = prev_pgs.length;
|
const old_pg_count = prev_pgs.length;
|
||||||
const optimize_cfg = {
|
const optimize_cfg = {
|
||||||
osd_tree: pool_tree,
|
osd_weights: Object.values(pool_tree).filter(item => item.level === 'osd').reduce((a, c) => { a[c.id] = c.size; return a; }, {}),
|
||||||
|
combinator: !this.config.use_old_pg_combinator || pool_cfg.level_placement || pool_cfg.raw_placement
|
||||||
|
// new algorithm:
|
||||||
|
? new RuleCombinator(osd_tree, this.get_pg_rules(pool_id, pool_cfg), pool_cfg.max_osd_combinations)
|
||||||
|
// old algorithm:
|
||||||
|
: new SimpleCombinator(flatten_tree(osd_tree[''].children, levels, pool_cfg.failure_domain, 'osd'), pool_cfg.pg_size, pool_cfg.max_osd_combinations),
|
||||||
pg_count: pool_cfg.pg_count,
|
pg_count: pool_cfg.pg_count,
|
||||||
pg_size: pool_cfg.pg_size,
|
pg_size: pool_cfg.pg_size,
|
||||||
pg_minsize: pool_cfg.pg_minsize,
|
pg_minsize: pool_cfg.pg_minsize,
|
||||||
max_combinations: pool_cfg.max_osd_combinations,
|
|
||||||
ordered: pool_cfg.scheme != 'replicated',
|
ordered: pool_cfg.scheme != 'replicated',
|
||||||
};
|
};
|
||||||
let optimize_result;
|
let optimize_result;
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
function select_murmur3(count, cb)
|
||||||
|
{
|
||||||
|
if (!count)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
let i = 0, maxh = -1;
|
||||||
|
for (let j = 0; j < count; j++)
|
||||||
|
{
|
||||||
|
const h = murmur3(cb(j));
|
||||||
|
if (h > maxh)
|
||||||
|
{
|
||||||
|
i = j;
|
||||||
|
maxh = h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function murmur3(s)
|
||||||
|
{
|
||||||
|
let hash = 0x12345678;
|
||||||
|
for (let i = 0; i < s.length; i++)
|
||||||
|
{
|
||||||
|
hash ^= s.charCodeAt(i);
|
||||||
|
hash = (hash*0x5bd1e995) & 0xFFFFFFFF;
|
||||||
|
hash ^= (hash >> 15);
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
murmur3,
|
||||||
|
select_murmur3,
|
||||||
|
};
|
|
@ -0,0 +1,241 @@
|
||||||
|
const { select_murmur3 } = require('./murmur3.js');
|
||||||
|
|
||||||
|
const NO_OSD = 'Z';
|
||||||
|
|
||||||
|
class SimpleCombinator
|
||||||
|
{
|
||||||
|
constructor(flat_tree, pg_size, max_combinations, ordered)
|
||||||
|
{
|
||||||
|
this.osd_tree = flat_tree;
|
||||||
|
this.pg_size = pg_size;
|
||||||
|
this.max_combinations = max_combinations;
|
||||||
|
this.ordered = ordered;
|
||||||
|
}
|
||||||
|
|
||||||
|
random_combinations()
|
||||||
|
{
|
||||||
|
return random_combinations(this.osd_tree, this.pg_size, this.max_combinations, this.ordered);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_combinations(pgs)
|
||||||
|
{
|
||||||
|
return check_combinations(this.osd_tree, pgs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
|
||||||
|
// levels = { string: number }
|
||||||
|
// to a two-level osd_tree suitable for all_combinations()
|
||||||
|
function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
|
||||||
|
{
|
||||||
|
osd_level = levels[osd_level] || osd_level;
|
||||||
|
failure_domain_level = levels[failure_domain_level] || failure_domain_level;
|
||||||
|
for (const node of osd_tree)
|
||||||
|
{
|
||||||
|
if ((levels[node.level] || node.level) < failure_domain_level)
|
||||||
|
{
|
||||||
|
flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return domains;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_osds(osd_tree, levels, osd_level, osds = {})
|
||||||
|
{
|
||||||
|
for (const node of osd_tree)
|
||||||
|
{
|
||||||
|
if ((levels[node.level] || node.level) >= osd_level)
|
||||||
|
{
|
||||||
|
osds[node.id] = node.size;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
extract_osds(node.children||[], levels, osd_level, osds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return osds;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ordered = don't treat (x,y) and (y,x) as equal
|
||||||
|
function random_combinations(osd_tree, pg_size, count, ordered)
|
||||||
|
{
|
||||||
|
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
||||||
|
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
|
||||||
|
const r = {};
|
||||||
|
// Generate random combinations including each OSD at least once
|
||||||
|
for (let h = 0; h < hosts.length; h++)
|
||||||
|
{
|
||||||
|
for (let o = 0; o < osds[hosts[h]].length; o++)
|
||||||
|
{
|
||||||
|
const pg = [ osds[hosts[h]][o] ];
|
||||||
|
const cur_hosts = [ ...hosts ];
|
||||||
|
cur_hosts.splice(h, 1);
|
||||||
|
for (let i = 1; i < pg_size && i < hosts.length; i++)
|
||||||
|
{
|
||||||
|
const next_host = select_murmur3(cur_hosts.length, i => pg[0]+':i:'+cur_hosts[i]);
|
||||||
|
const next_osd = select_murmur3(osds[cur_hosts[next_host]].length, i => pg[0]+':i:'+osds[cur_hosts[next_host]][i]);
|
||||||
|
pg.push(osds[cur_hosts[next_host]][next_osd]);
|
||||||
|
cur_hosts.splice(next_host, 1);
|
||||||
|
}
|
||||||
|
while (pg.length < pg_size)
|
||||||
|
{
|
||||||
|
pg.push(NO_OSD);
|
||||||
|
}
|
||||||
|
r['pg_'+pg.join('_')] = pg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Generate purely random combinations
|
||||||
|
while (count > 0)
|
||||||
|
{
|
||||||
|
let host_idx = [];
|
||||||
|
const cur_hosts = [ ...hosts.map((h, i) => i) ];
|
||||||
|
const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
|
||||||
|
if (ordered)
|
||||||
|
{
|
||||||
|
for (let i = 0; i < max_hosts; i++)
|
||||||
|
{
|
||||||
|
const r = select_murmur3(cur_hosts.length, i => count+':h:'+cur_hosts[i]);
|
||||||
|
host_idx[i] = cur_hosts[r];
|
||||||
|
cur_hosts.splice(r, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (let i = 0; i < max_hosts; i++)
|
||||||
|
{
|
||||||
|
const r = select_murmur3(cur_hosts.length - (max_hosts - i - 1), i => count+':h:'+cur_hosts[i]);
|
||||||
|
host_idx[i] = cur_hosts[r];
|
||||||
|
cur_hosts.splice(0, r+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let pg = host_idx.map(h => osds[hosts[h]][select_murmur3(osds[hosts[h]].length, i => count+':o:'+osds[hosts[h]][i])]);
|
||||||
|
while (pg.length < pg_size)
|
||||||
|
{
|
||||||
|
pg.push(NO_OSD);
|
||||||
|
}
|
||||||
|
r['pg_'+pg.join('_')] = pg;
|
||||||
|
count--;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
|
||||||
|
// osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
|
||||||
|
// ordered = return combinations without duplicates having different order
|
||||||
|
function all_combinations(osd_tree, pg_size, ordered, count)
|
||||||
|
{
|
||||||
|
const hosts = Object.keys(osd_tree).sort();
|
||||||
|
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
||||||
|
while (hosts.length < pg_size)
|
||||||
|
{
|
||||||
|
osds[NO_OSD] = [ NO_OSD ];
|
||||||
|
hosts.push(NO_OSD);
|
||||||
|
}
|
||||||
|
let host_idx = [];
|
||||||
|
let osd_idx = [];
|
||||||
|
for (let i = 0; i < pg_size; i++)
|
||||||
|
{
|
||||||
|
host_idx.push(i);
|
||||||
|
osd_idx.push(0);
|
||||||
|
}
|
||||||
|
const r = [];
|
||||||
|
while (!count || count < 0 || r.length < count)
|
||||||
|
{
|
||||||
|
r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
|
||||||
|
let inc = pg_size-1;
|
||||||
|
while (inc >= 0)
|
||||||
|
{
|
||||||
|
osd_idx[inc]++;
|
||||||
|
if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
|
||||||
|
{
|
||||||
|
osd_idx[inc] = 0;
|
||||||
|
inc--;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inc < 0)
|
||||||
|
{
|
||||||
|
// no osds left in the current host combination, select the next one
|
||||||
|
inc = pg_size-1;
|
||||||
|
same_again: while (inc >= 0)
|
||||||
|
{
|
||||||
|
host_idx[inc]++;
|
||||||
|
for (let prev_host = 0; prev_host < inc; prev_host++)
|
||||||
|
{
|
||||||
|
if (host_idx[prev_host] == host_idx[inc])
|
||||||
|
{
|
||||||
|
continue same_again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
|
||||||
|
{
|
||||||
|
while ((++inc) < pg_size)
|
||||||
|
{
|
||||||
|
host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
inc--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inc < 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_combinations(osd_tree, pgs)
|
||||||
|
{
|
||||||
|
const hosts = Object.keys(osd_tree).sort();
|
||||||
|
const host_per_osd = {};
|
||||||
|
for (const host in osd_tree)
|
||||||
|
{
|
||||||
|
for (const osd in osd_tree[host])
|
||||||
|
{
|
||||||
|
host_per_osd[osd] = host;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const res = [];
|
||||||
|
skip_pg: for (const pg of pgs)
|
||||||
|
{
|
||||||
|
const seen_hosts = {};
|
||||||
|
for (const osd of pg)
|
||||||
|
{
|
||||||
|
if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
|
||||||
|
{
|
||||||
|
continue skip_pg;
|
||||||
|
}
|
||||||
|
seen_hosts[host_per_osd[osd]] = true;
|
||||||
|
}
|
||||||
|
res.push(pg);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
function compat(params)
|
||||||
|
{
|
||||||
|
return {
|
||||||
|
...params,
|
||||||
|
osd_weights: Object.assign({}, ...Object.values(params.osd_tree)),
|
||||||
|
combinator: new SimpleCombinator(params.osd_tree, params.pg_size, params.max_combinations||10000),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
flatten_tree,
|
||||||
|
SimpleCombinator,
|
||||||
|
compat,
|
||||||
|
NO_OSD,
|
||||||
|
};
|
|
@ -7,6 +7,7 @@
|
||||||
// This leads to really uneven OSD fill ratio in Ceph even when PGs are perfectly balanced.
|
// This leads to really uneven OSD fill ratio in Ceph even when PGs are perfectly balanced.
|
||||||
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
|
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
|
||||||
|
|
||||||
|
const { SimpleCombinator } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
|
|
||||||
const osd_tree = {
|
const osd_tree = {
|
||||||
|
@ -114,16 +115,17 @@ Fine, let's try to optimize for it.
|
||||||
|
|
||||||
async function run()
|
async function run()
|
||||||
{
|
{
|
||||||
const all_weights = Object.assign({}, ...Object.values(osd_tree));
|
const osd_weights = Object.assign({}, ...Object.values(osd_tree));
|
||||||
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
|
||||||
const eff = LPOptimizer.pg_list_space_efficiency(prev_pgs, all_weights, 2, 2.26);
|
const eff = LPOptimizer.pg_list_space_efficiency(prev_pgs, osd_weights, 2, 2.26);
|
||||||
const orig = eff*4.26 / total_weight;
|
const orig = eff*4.26 / total_weight;
|
||||||
console.log('Original efficiency was: '+Math.round(orig*10000)/100+' %');
|
console.log('Original efficiency was: '+Math.round(orig*10000)/100+' %');
|
||||||
|
|
||||||
let prev = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 256, parity_space: 2.26 });
|
const combinator = new SimpleCombinator(osd_tree, 3, 10000);
|
||||||
|
let prev = await LPOptimizer.optimize_initial({ osd_weights, combinator, pg_size: 3, pg_count: 256, parity_space: 2.26 });
|
||||||
LPOptimizer.print_change_stats(prev);
|
LPOptimizer.print_change_stats(prev);
|
||||||
|
|
||||||
let next = await LPOptimizer.optimize_change({ prev_pgs, osd_tree, pg_size: 3, max_combinations: 10000, parity_space: 2.26 });
|
let next = await LPOptimizer.optimize_change({ prev_pgs, osd_weights, combinator, pg_size: 3, parity_space: 2.26 });
|
||||||
LPOptimizer.print_change_stats(next);
|
LPOptimizer.print_change_stats(next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// Copyright (c) Vitaliy Filippov, 2019+
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
// License: VNPL-1.1 (see README.md for details)
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
const { compat } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
|
|
||||||
async function run()
|
async function run()
|
||||||
|
@ -14,26 +15,26 @@ async function run()
|
||||||
let res;
|
let res;
|
||||||
|
|
||||||
console.log('16 PGs, size=3');
|
console.log('16 PGs, size=3');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 16, ordered: false });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 16, ordered: false }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 3, 'Initial distribution');
|
assert(res.space == 3, 'Initial distribution');
|
||||||
console.log('\nChange size to 2');
|
console.log('\nChange size to 2');
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: false });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: false }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space >= 3*14/16 && res.osd_differs == 0, 'Redistribution');
|
assert(res.space >= 3*14/16 && res.osd_differs == 0, 'Redistribution');
|
||||||
console.log('\nRemove OSD 3');
|
console.log('\nRemove OSD 3');
|
||||||
const no3_tree = { ...osd_tree };
|
const no3_tree = { ...osd_tree };
|
||||||
delete no3_tree['300'];
|
delete no3_tree['300'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: no3_tree, pg_size: 2, ordered: false });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: no3_tree, pg_size: 2, ordered: false }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 2, 'Redistribution after OSD removal');
|
assert(res.space == 2, 'Redistribution after OSD removal');
|
||||||
|
|
||||||
console.log('\n16 PGs, size=3, ordered');
|
console.log('\n16 PGs, size=3, ordered');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 16, ordered: true });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 16, ordered: true }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 3, 'Initial distribution');
|
assert(res.space == 3, 'Initial distribution');
|
||||||
console.log('\nChange size to 2, ordered');
|
console.log('\nChange size to 2, ordered');
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: true });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: true }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space >= 3*14/16 && res.osd_differs < 8, 'Redistribution');
|
assert(res.space >= 3*14/16 && res.osd_differs < 8, 'Redistribution');
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// Copyright (c) Vitaliy Filippov, 2019+
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
// License: VNPL-1.1 (see README.md for details)
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
const { compat, flatten_tree } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
|
|
||||||
const crush_tree = [
|
const crush_tree = [
|
||||||
|
@ -36,44 +37,44 @@ const crush_tree = [
|
||||||
] },
|
] },
|
||||||
];
|
];
|
||||||
|
|
||||||
const osd_tree = LPOptimizer.flatten_tree(crush_tree, {}, 1, 3);
|
const osd_tree = flatten_tree(crush_tree, {}, 1, 3);
|
||||||
console.log(osd_tree);
|
console.log(osd_tree);
|
||||||
|
|
||||||
async function run()
|
async function run()
|
||||||
{
|
{
|
||||||
const cur_tree = {};
|
const cur_tree = {};
|
||||||
console.log('Empty tree:');
|
console.log('Empty tree:');
|
||||||
let res = await LPOptimizer.optimize_initial({ osd_tree: cur_tree, pg_size: 3, pg_count: 256 });
|
let res = await LPOptimizer.optimize_initial(compat({ osd_tree: cur_tree, pg_size: 3, pg_count: 256 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 0);
|
assert(res.space == 0);
|
||||||
console.log('\nAdding 1st failure domain:');
|
console.log('\nAdding 1st failure domain:');
|
||||||
cur_tree['dom1'] = osd_tree['dom1'];
|
cur_tree['dom1'] = osd_tree['dom1'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 12 && res.total_space == 12);
|
assert(res.space == 12 && res.total_space == 12);
|
||||||
console.log('\nAdding 2nd failure domain:');
|
console.log('\nAdding 2nd failure domain:');
|
||||||
cur_tree['dom2'] = osd_tree['dom2'];
|
cur_tree['dom2'] = osd_tree['dom2'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 24 && res.total_space == 24);
|
assert(res.space == 24 && res.total_space == 24);
|
||||||
console.log('\nAdding 3rd failure domain:');
|
console.log('\nAdding 3rd failure domain:');
|
||||||
cur_tree['dom3'] = osd_tree['dom3'];
|
cur_tree['dom3'] = osd_tree['dom3'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 36 && res.total_space == 36);
|
assert(res.space == 36 && res.total_space == 36);
|
||||||
console.log('\nRemoving 3rd failure domain:');
|
console.log('\nRemoving 3rd failure domain:');
|
||||||
delete cur_tree['dom3'];
|
delete cur_tree['dom3'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 24 && res.total_space == 24);
|
assert(res.space == 24 && res.total_space == 24);
|
||||||
console.log('\nRemoving 2nd failure domain:');
|
console.log('\nRemoving 2nd failure domain:');
|
||||||
delete cur_tree['dom2'];
|
delete cur_tree['dom2'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 12 && res.total_space == 12);
|
assert(res.space == 12 && res.total_space == 12);
|
||||||
console.log('\nRemoving 1st failure domain:');
|
console.log('\nRemoving 1st failure domain:');
|
||||||
delete cur_tree['dom1'];
|
delete cur_tree['dom1'];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
assert(res.space == 0);
|
assert(res.space == 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// Copyright (c) Vitaliy Filippov, 2019+
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
// License: VNPL-1.1 (see README.md for details)
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
const { compat } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
|
|
||||||
const osd_tree = {
|
const osd_tree = {
|
||||||
|
@ -20,13 +21,13 @@ async function run()
|
||||||
{
|
{
|
||||||
let res;
|
let res;
|
||||||
console.log('256 PGs, 3+3 OSDs, size=2');
|
console.log('256 PGs, 3+3 OSDs, size=2');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 2, pg_count: 256 });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 2, pg_count: 256 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
|
|
||||||
// Should NOT fail with the "unfeasible or unbounded" exception
|
// Should NOT fail with the "unfeasible or unbounded" exception
|
||||||
console.log('\nRemoving osd.2');
|
console.log('\nRemoving osd.2');
|
||||||
delete osd_tree[100][2];
|
delete osd_tree[100][2];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// Copyright (c) Vitaliy Filippov, 2019+
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
// License: VNPL-1.1 (see README.md for details)
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
const { compat, flatten_tree } = require('./simple_pgs.js');
|
||||||
const LPOptimizer = require('./lp-optimizer.js');
|
const LPOptimizer = require('./lp-optimizer.js');
|
||||||
|
|
||||||
const osd_tree = {
|
const osd_tree = {
|
||||||
|
@ -84,31 +85,31 @@ async function run()
|
||||||
// Space efficiency is ~99% in all cases.
|
// Space efficiency is ~99% in all cases.
|
||||||
|
|
||||||
console.log('256 PGs, size=2');
|
console.log('256 PGs, size=2');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 2, pg_count: 256 });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 2, pg_count: 256 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
console.log('\nAdding osd.8');
|
console.log('\nAdding osd.8');
|
||||||
osd_tree[500][8] = 3.58589;
|
osd_tree[500][8] = 3.58589;
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
console.log('\nRemoving osd.8');
|
console.log('\nRemoving osd.8');
|
||||||
delete osd_tree[500][8];
|
delete osd_tree[500][8];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
|
|
||||||
console.log('\n256 PGs, size=3');
|
console.log('\n256 PGs, size=3');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 256 });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 256 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
console.log('\nAdding osd.8');
|
console.log('\nAdding osd.8');
|
||||||
osd_tree[500][8] = 3.58589;
|
osd_tree[500][8] = 3.58589;
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
console.log('\nRemoving osd.8');
|
console.log('\nRemoving osd.8');
|
||||||
delete osd_tree[500][8];
|
delete osd_tree[500][8];
|
||||||
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 });
|
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
|
|
||||||
console.log('\n256 PGs, size=3, failure domain=rack');
|
console.log('\n256 PGs, size=3, failure domain=rack');
|
||||||
res = await LPOptimizer.optimize_initial({ osd_tree: LPOptimizer.flatten_tree(crush_tree, {}, 1, 3), pg_size: 3, pg_count: 256 });
|
res = await LPOptimizer.optimize_initial(compat({ osd_tree: flatten_tree(crush_tree, {}, 1, 3), pg_size: 3, pg_count: 256 }));
|
||||||
LPOptimizer.print_change_stats(res, false);
|
LPOptimizer.print_change_stats(res, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
const { random_custom_combinations, index_tree, parse_level_indexes, parse_pg_dsl } = require('./dsl_pgs.js');
|
||||||
|
|
||||||
|
function check(result, expected)
|
||||||
|
{
|
||||||
|
console.dir(result, { depth: null });
|
||||||
|
if (JSON.stringify(result) !== JSON.stringify(expected))
|
||||||
|
{
|
||||||
|
process.stderr.write('Unexpected value, expected: ');
|
||||||
|
console.dir(expected, { depth: null });
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_pg_dsl("any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5"),
|
||||||
|
[
|
||||||
|
[],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
|
||||||
|
[ [ 'dc', '!=', 1 ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1, 3 ] ] ],
|
||||||
|
[ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ],
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_pg_dsl("dc=meow, dc!=1, dc>2"),
|
||||||
|
[
|
||||||
|
[ [ 'dc', '=', { id: 'meow' } ] ],
|
||||||
|
[ [ 'dc', '!=', 1 ] ],
|
||||||
|
[ [ 'dc', '>', 2 ] ],
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_level_indexes({ dc: '112233', host: 'ABCDEF' }),
|
||||||
|
[
|
||||||
|
[],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1 ] ], [ 'host', '!=', [ 1, 2 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
|
||||||
|
[ [ 'dc', '=', 5 ], [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_level_indexes({ dc: '112233', host: 'ABCDEF' }, [ 'dc', 'host' ]),
|
||||||
|
[
|
||||||
|
[],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1, 3 ] ] ],
|
||||||
|
[ [ 'dc', '=', 5 ], [ 'host', '!=', [ 5 ] ] ],
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }),
|
||||||
|
[
|
||||||
|
[],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1 ] ], [ 'host', '!=', [ 1, 2 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3 ] ] ],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] ] ],
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }, [ 'dc', 'host' ]),
|
||||||
|
[
|
||||||
|
[],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3 ] ] ],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2 ] ] ],
|
||||||
|
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 5 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4 ] ] ],
|
||||||
|
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4, 7 ] ] ],
|
||||||
|
[ [ 'dc', '!=', [ 1, 3 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10 ] ] ],
|
||||||
|
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10, 11 ] ] ]
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
Object.keys(random_custom_combinations(index_tree([
|
||||||
|
{ id: '1', size: 1, level: 'osd' },
|
||||||
|
{ id: '2', size: 2, level: 'osd' },
|
||||||
|
{ id: '3', size: 3, level: 'osd' }
|
||||||
|
]), parse_level_indexes({ osd: '12' }), 10000)).sort(),
|
||||||
|
[ 'pg_1_2', 'pg_1_3', 'pg_2_3' ]
|
||||||
|
);
|
||||||
|
|
||||||
|
check(
|
||||||
|
Object.keys(random_custom_combinations(index_tree([
|
||||||
|
{ id: 'h1', level: 'host' },
|
||||||
|
{ id: 'h2', level: 'host' },
|
||||||
|
{ id: 'h3', level: 'host' },
|
||||||
|
{ id: '1', size: 1, level: 'osd', parent: 'h1' },
|
||||||
|
{ id: '2', size: 1, level: 'osd', parent: 'h2' },
|
||||||
|
{ id: '3', size: 1, level: 'osd', parent: 'h2' },
|
||||||
|
{ id: '4', size: 1, level: 'osd', parent: 'h3' },
|
||||||
|
{ id: '5', size: 1, level: 'osd', parent: 'h3' },
|
||||||
|
]), parse_level_indexes({ host: '1122', osd: '1234' }), 10000)).sort(),
|
||||||
|
[ 'pg_2_3_4_5' ]
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('OK');
|
|
@ -129,6 +129,8 @@ static const char* help_text =
|
||||||
" --block_size 128k Put pool only on OSDs with this data block size\n"
|
" --block_size 128k Put pool only on OSDs with this data block size\n"
|
||||||
" --bitmap_granularity 4k Put pool only on OSDs with this logical sector size\n"
|
" --bitmap_granularity 4k Put pool only on OSDs with this logical sector size\n"
|
||||||
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
||||||
|
" --level_placement <rules> Use additional failure domain rules (example: \"dc=112233\")\n"
|
||||||
|
" --raw_placement <rules> Specify raw PG generation rules (see documentation for details)\n"
|
||||||
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
|
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
|
||||||
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
|
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
|
||||||
" --used_for_fs <name> Mark pool as used for VitastorFS with metadata in image <name>\n"
|
" --used_for_fs <name> Mark pool as used for VitastorFS with metadata in image <name>\n"
|
||||||
|
@ -145,6 +147,7 @@ static const char* help_text =
|
||||||
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
|
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
|
||||||
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--used_for_fs <name>]\n"
|
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--used_for_fs <name>]\n"
|
||||||
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
|
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
|
||||||
|
" [--level_placement <rules>] [--raw_placement <rules>]\n"
|
||||||
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
|
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
|
||||||
" [--block_size <size>] [--bitmap_granularity <size>]\n"
|
" [--block_size <size>] [--bitmap_granularity <size>]\n"
|
||||||
" [--immediate_commit <all|small|none>] [--pg_stripe_size <size>]\n"
|
" [--immediate_commit <all|small|none>] [--pg_stripe_size <size>]\n"
|
||||||
|
|
|
@ -82,9 +82,38 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
||||||
value = value.uint64_value();
|
value = value.uint64_value();
|
||||||
}
|
}
|
||||||
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
|
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
|
||||||
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_fs")
|
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_fs" ||
|
||||||
|
key == "raw_placement")
|
||||||
{
|
{
|
||||||
// OK
|
if (!value.is_string())
|
||||||
|
{
|
||||||
|
return key+" must be a string";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (key == "level_placement")
|
||||||
|
{
|
||||||
|
// level=rule, level=rule, ...
|
||||||
|
if (!value.is_object())
|
||||||
|
{
|
||||||
|
json11::Json::object obj;
|
||||||
|
for (auto & item: explode(",", value.string_value(), true))
|
||||||
|
{
|
||||||
|
auto pair = explode("=", item, true);
|
||||||
|
if (pair.size() >= 2)
|
||||||
|
{
|
||||||
|
obj[pair[0]] = pair[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (obj.size())
|
||||||
|
{
|
||||||
|
value = obj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
new_cfg.erase(kv_it++);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (key == "osd_tags" || key == "primary_affinity_tags")
|
else if (key == "osd_tags" || key == "primary_affinity_tags")
|
||||||
{
|
{
|
||||||
|
@ -184,6 +213,38 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
||||||
return "PG size can't be greater than 256";
|
return "PG size can't be greater than 256";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PG rules
|
||||||
|
if (!cfg["level_placement"].is_null())
|
||||||
|
{
|
||||||
|
for (auto & lr: cfg["level_placement"].object_items())
|
||||||
|
{
|
||||||
|
int len = 0;
|
||||||
|
if (lr.second.is_array())
|
||||||
|
{
|
||||||
|
for (auto & lri: lr.second.array_items())
|
||||||
|
{
|
||||||
|
if (!lri.is_string() && !lri.is_number())
|
||||||
|
{
|
||||||
|
return "--level_placement contains an array with non-scalar value: "+lri.dump();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
len = lr.second.array_items().size();
|
||||||
|
}
|
||||||
|
else if (!lr.second.is_string())
|
||||||
|
{
|
||||||
|
return "--level_placement contains a non-array and non-string value: "+lr.second.dump();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
len = lr.second.string_value().size();
|
||||||
|
}
|
||||||
|
if (len != pg_size)
|
||||||
|
{
|
||||||
|
return "values in --level_placement should be exactly pg_size ("+std::to_string(pg_size)+") long";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// parity_chunks
|
// parity_chunks
|
||||||
uint64_t parity_chunks = 1;
|
uint64_t parity_chunks = 1;
|
||||||
if (scheme == POOL_SCHEME_EC)
|
if (scheme == POOL_SCHEME_EC)
|
||||||
|
|
Loading…
Reference in New Issue