forked from vitalif/vitastor
Rename "jerasure" to "ec" in pool configuration, function names, fix documentation and Debian build scripts
Old pool configurations with "jerasure" also remain supported as an alias for "ec"lrc-matrix
parent
c4eb46600d
commit
a0cae4c180
|
@ -1,5 +1,19 @@
|
||||||
|
vitastor (0.7.1-1) unstable; urgency=medium
|
||||||
|
|
||||||
|
* Bugfixes
|
||||||
|
|
||||||
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||||
|
|
||||||
vitastor (0.7.0-1) unstable; urgency=medium
|
vitastor (0.7.0-1) unstable; urgency=medium
|
||||||
|
|
||||||
|
* Implement NFS proxy
|
||||||
|
* Add documentation
|
||||||
|
* Bugfixes
|
||||||
|
|
||||||
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Sun, 29 May 2022 23:39:13 +0300
|
||||||
|
|
||||||
|
vitastor (0.6.3-1) unstable; urgency=medium
|
||||||
|
|
||||||
* RDMA support
|
* RDMA support
|
||||||
* Bugfixes
|
* Bugfixes
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ Source: vitastor
|
||||||
Section: admin
|
Section: admin
|
||||||
Priority: optional
|
Priority: optional
|
||||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev
|
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, pkgconf
|
||||||
Standards-Version: 4.5.0
|
Standards-Version: 4.5.0
|
||||||
Homepage: https://vitastor.io/
|
Homepage: https://vitastor.io/
|
||||||
Rules-Requires-Root: no
|
Rules-Requires-Root: no
|
||||||
|
|
|
@ -22,7 +22,7 @@ RUN apt-get update
|
||||||
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
|
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
|
||||||
RUN apt-get -y build-dep fio
|
RUN apt-get -y build-dep fio
|
||||||
RUN apt-get --download-only source fio
|
RUN apt-get --download-only source fio
|
||||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev
|
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||||
|
|
||||||
ADD . /root/vitastor
|
ADD . /root/vitastor
|
||||||
RUN set -e -x; \
|
RUN set -e -x; \
|
||||||
|
|
|
@ -106,9 +106,10 @@ Pool name.
|
||||||
|
|
||||||
- Type: string
|
- Type: string
|
||||||
- Required
|
- Required
|
||||||
- One of: "replicated", "xor" or "jerasure"
|
- One of: "replicated", "xor", "ec" or "jerasure"
|
||||||
|
|
||||||
Redundancy scheme used for data in this pool.
|
Redundancy scheme used for data in this pool. "jerasure" is an alias for "ec",
|
||||||
|
both use Reed-Solomon-Vandermonde codes.
|
||||||
|
|
||||||
## pg_size
|
## pg_size
|
||||||
|
|
||||||
|
@ -243,7 +244,7 @@ of the OSDs containing a data chunk for a PG.
|
||||||
{
|
{
|
||||||
"2": {
|
"2": {
|
||||||
"name":"ecpool",
|
"name":"ecpool",
|
||||||
"scheme":"jerasure",
|
"scheme":"ec",
|
||||||
"pg_size":3,
|
"pg_size":3,
|
||||||
"parity_chunks":1,
|
"parity_chunks":1,
|
||||||
"pg_minsize":2,
|
"pg_minsize":2,
|
||||||
|
|
|
@ -106,9 +106,10 @@
|
||||||
|
|
||||||
- Тип: строка
|
- Тип: строка
|
||||||
- Обязательный
|
- Обязательный
|
||||||
- Возможные значения: "replicated", "xor" или "jerasure"
|
- Возможные значения: "replicated", "xor", "ec" или "jerasure"
|
||||||
|
|
||||||
Схема избыточности, используемая в данном пуле.
|
Схема избыточности, используемая в данном пуле. "jerasure" - синоним для "ec",
|
||||||
|
в обеих схемах используются коды Рида-Соломона-Вандермонда.
|
||||||
|
|
||||||
## pg_size
|
## pg_size
|
||||||
|
|
||||||
|
@ -242,7 +243,7 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
||||||
{
|
{
|
||||||
"2": {
|
"2": {
|
||||||
"name":"ecpool",
|
"name":"ecpool",
|
||||||
"scheme":"jerasure",
|
"scheme":"ec",
|
||||||
"pg_size":3,
|
"pg_size":3,
|
||||||
"parity_chunks":1,
|
"parity_chunks":1,
|
||||||
"pg_minsize":2,
|
"pg_minsize":2,
|
||||||
|
|
|
@ -15,7 +15,8 @@
|
||||||
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
||||||
designated initializers support from C++20
|
designated initializers support from C++20
|
||||||
- CMake
|
- CMake
|
||||||
- liburing, jerasure headers
|
- liburing, jerasure headers and libraries
|
||||||
|
- ISA-L, libibverbs headers and libraries (optional)
|
||||||
- tcmalloc (google-perftools-dev)
|
- tcmalloc (google-perftools-dev)
|
||||||
|
|
||||||
## Basic instructions
|
## Basic instructions
|
||||||
|
|
|
@ -15,7 +15,8 @@
|
||||||
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
||||||
назначенных инициализаторов (designated initializers) из C++20
|
назначенных инициализаторов (designated initializers) из C++20
|
||||||
- CMake
|
- CMake
|
||||||
- Заголовки liburing, jerasure
|
- Заголовки и библиотеки liburing, jerasure
|
||||||
|
- Опционально - заголовки и библиотеки ISA-L, libibverbs
|
||||||
- tcmalloc (google-perftools-dev)
|
- tcmalloc (google-perftools-dev)
|
||||||
|
|
||||||
## Базовая инструкция
|
## Базовая инструкция
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
- Basic part: highly-available block storage with symmetric clustering and no SPOF
|
- Basic part: highly-available block storage with symmetric clustering and no SPOF
|
||||||
- [Performance](../performance/comparison1.en.md) ;-D
|
- [Performance](../performance/comparison1.en.md) ;-D
|
||||||
- [Multiple redundancy schemes](../config/pool.en.md#scheme): Replication, XOR n+1, Reed-Solomon erasure codes
|
- [Multiple redundancy schemes](../config/pool.en.md#scheme): Replication, XOR n+1, Reed-Solomon erasure codes
|
||||||
based on jerasure library with any number of data and parity drives in a group
|
based on jerasure and ISA-L libraries with any number of data and parity drives in a group
|
||||||
- Configuration via simple JSON data structures in etcd (parameters, pools and images)
|
- Configuration via simple JSON data structures in etcd (parameters, pools and images)
|
||||||
- Automatic data distribution over OSDs, with support for:
|
- Automatic data distribution over OSDs, with support for:
|
||||||
- Mathematical optimization for better uniformity and less data movement
|
- Mathematical optimization for better uniformity and less data movement
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
||||||
- [Производительность](../comparison1.ru.md) ;-D
|
- [Производительность](../comparison1.ru.md) ;-D
|
||||||
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
||||||
Рида-Соломона на основе библиотеки jerasure с любым числом дисков данных и чётности в группе
|
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
||||||
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
||||||
- Автоматическое распределение данных по OSD, с поддержкой:
|
- Автоматическое распределение данных по OSD, с поддержкой:
|
||||||
- Математической оптимизации для лучшей равномерности распределения и минимизации перемещений данных
|
- Математической оптимизации для лучшей равномерности распределения и минимизации перемещений данных
|
||||||
|
|
|
@ -63,11 +63,11 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
|
||||||
"scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
|
"scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
For jerasure pools the configuration should look like the following:
|
For EC pools the configuration should look like the following:
|
||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"jerasure","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
||||||
```
|
```
|
||||||
|
|
||||||
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
||||||
|
|
|
@ -75,7 +75,7 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
|
||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"jerasure","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
||||||
```
|
```
|
||||||
|
|
||||||
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
||||||
|
|
16
mon/mon.js
16
mon/mon.js
|
@ -147,11 +147,11 @@ const etcd_tree = {
|
||||||
/* pools: {
|
/* pools: {
|
||||||
<id>: {
|
<id>: {
|
||||||
name: 'testpool',
|
name: 'testpool',
|
||||||
// jerasure uses Reed-Solomon-Vandermonde codes
|
// 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
|
||||||
scheme: 'replicated' | 'xor' | 'jerasure',
|
scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
|
||||||
pg_size: 3,
|
pg_size: 3,
|
||||||
pg_minsize: 2,
|
pg_minsize: 2,
|
||||||
// number of parity chunks, required for jerasure
|
// number of parity chunks, required for EC
|
||||||
parity_chunks?: 1,
|
parity_chunks?: 1,
|
||||||
pg_count: 100,
|
pg_count: 100,
|
||||||
failure_domain: 'host',
|
failure_domain: 'host',
|
||||||
|
@ -1013,14 +1013,15 @@ class Mon
|
||||||
console.log('Pool ID '+pool_id+' is invalid');
|
console.log('Pool ID '+pool_id+' is invalid');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' && pool_cfg.scheme !== 'jerasure')
|
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' &&
|
||||||
|
pool_cfg.scheme !== 'ec' && pool_cfg.scheme !== 'jerasure')
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated" and "jerasure" required)');
|
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated", "ec" and "jerasure" required)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
|
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
|
||||||
(pool_cfg.scheme === 'xor' || pool_cfg.scheme == 'jerasure') && pool_cfg.pg_size < 3)
|
pool_cfg.scheme !== 'replicated' && pool_cfg.pg_size < 3)
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid pg_size');
|
console.log('Pool '+pool_id+' has invalid pg_size');
|
||||||
|
@ -1039,7 +1040,8 @@ class Mon
|
||||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
|
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (pool_cfg.scheme === 'jerasure' && (pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
|
if ((pool_cfg.scheme === 'ec' || pool_cfg.scheme === 'jerasure') &&
|
||||||
|
(pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
|
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
|
||||||
|
|
|
@ -127,7 +127,7 @@ resume_1:
|
||||||
pool_stats[pool_cfg.id] = json11::Json::object {
|
pool_stats[pool_cfg.id] = json11::Json::object {
|
||||||
{ "name", pool_cfg.name },
|
{ "name", pool_cfg.name },
|
||||||
{ "pg_count", pool_cfg.pg_count },
|
{ "pg_count", pool_cfg.pg_count },
|
||||||
{ "scheme", pool_cfg.scheme == POOL_SCHEME_REPLICATED ? "replicated" : "jerasure" },
|
{ "scheme", pool_cfg.scheme == POOL_SCHEME_REPLICATED ? "replicated" : "ec" },
|
||||||
{ "scheme_name", pool_cfg.scheme == POOL_SCHEME_REPLICATED
|
{ "scheme_name", pool_cfg.scheme == POOL_SCHEME_REPLICATED
|
||||||
? std::to_string(pool_cfg.pg_size)+"/"+std::to_string(pool_cfg.pg_minsize)
|
? std::to_string(pool_cfg.pg_size)+"/"+std::to_string(pool_cfg.pg_minsize)
|
||||||
: "EC "+std::to_string(pool_cfg.pg_size-pool_cfg.parity_chunks)+"+"+std::to_string(pool_cfg.parity_chunks) },
|
: "EC "+std::to_string(pool_cfg.pg_size-pool_cfg.parity_chunks)+"+"+std::to_string(pool_cfg.parity_chunks) },
|
||||||
|
|
|
@ -673,18 +673,18 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
pc.scheme = POOL_SCHEME_REPLICATED;
|
pc.scheme = POOL_SCHEME_REPLICATED;
|
||||||
else if (pool_item.second["scheme"] == "xor")
|
else if (pool_item.second["scheme"] == "xor")
|
||||||
pc.scheme = POOL_SCHEME_XOR;
|
pc.scheme = POOL_SCHEME_XOR;
|
||||||
else if (pool_item.second["scheme"] == "jerasure")
|
else if (pool_item.second["scheme"] == "ec" || pool_item.second["scheme"] == "jerasure")
|
||||||
pc.scheme = POOL_SCHEME_JERASURE;
|
pc.scheme = POOL_SCHEME_EC;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid coding scheme (one of \"xor\", \"replicated\" or \"jerasure\" required), skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid coding scheme (one of \"xor\", \"replicated\", \"ec\" or \"jerasure\" required), skipping pool\n", pool_id);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// PG Size
|
// PG Size
|
||||||
pc.pg_size = pool_item.second["pg_size"].uint64_value();
|
pc.pg_size = pool_item.second["pg_size"].uint64_value();
|
||||||
if (pc.pg_size < 1 ||
|
if (pc.pg_size < 1 ||
|
||||||
pool_item.second["pg_size"].uint64_value() < 3 &&
|
pool_item.second["pg_size"].uint64_value() < 3 &&
|
||||||
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_JERASURE) ||
|
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_EC) ||
|
||||||
pool_item.second["pg_size"].uint64_value() > 256)
|
pool_item.second["pg_size"].uint64_value() > 256)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid pg_size, skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid pg_size, skipping pool\n", pool_id);
|
||||||
|
@ -701,7 +701,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
}
|
}
|
||||||
pc.parity_chunks = 1;
|
pc.parity_chunks = 1;
|
||||||
}
|
}
|
||||||
if (pc.scheme == POOL_SCHEME_JERASURE &&
|
if (pc.scheme == POOL_SCHEME_EC &&
|
||||||
(pc.parity_chunks < 1 || pc.parity_chunks > pc.pg_size-2))
|
(pc.parity_chunks < 1 || pc.parity_chunks > pc.pg_size-2))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid parity_chunks (must be between 1 and pg_size-2), skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid parity_chunks (must be between 1 and pg_size-2), skipping pool\n", pool_id);
|
||||||
|
@ -710,7 +710,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
// PG MinSize
|
// PG MinSize
|
||||||
pc.pg_minsize = pool_item.second["pg_minsize"].uint64_value();
|
pc.pg_minsize = pool_item.second["pg_minsize"].uint64_value();
|
||||||
if (pc.pg_minsize < 1 || pc.pg_minsize > pc.pg_size ||
|
if (pc.pg_minsize < 1 || pc.pg_minsize > pc.pg_size ||
|
||||||
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_JERASURE) &&
|
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_EC) &&
|
||||||
pc.pg_minsize < (pc.pg_size-pc.parity_chunks))
|
pc.pg_minsize < (pc.pg_size-pc.parity_chunks))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid pg_minsize, skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid pg_minsize, skipping pool\n", pool_id);
|
||||||
|
|
|
@ -676,9 +676,9 @@ void osd_t::apply_pg_config()
|
||||||
.all_peers = std::vector<osd_num_t>(all_peers.begin(), all_peers.end()),
|
.all_peers = std::vector<osd_num_t>(all_peers.begin(), all_peers.end()),
|
||||||
.target_set = pg_cfg.target_set,
|
.target_set = pg_cfg.target_set,
|
||||||
};
|
};
|
||||||
if (pg.scheme == POOL_SCHEME_JERASURE)
|
if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
use_jerasure(pg.pg_size, pg.pg_data_size, true);
|
use_ec(pg.pg_size, pg.pg_data_size, true);
|
||||||
}
|
}
|
||||||
this->pg_state_dirty.insert({ .pool_id = pool_id, .pg_num = pg_num });
|
this->pg_state_dirty.insert({ .pool_id = pool_id, .pg_num = pg_num });
|
||||||
pg.print_state();
|
pg.print_state();
|
||||||
|
@ -890,9 +890,9 @@ void osd_t::report_pg_states()
|
||||||
{
|
{
|
||||||
// Forget offline PGs after reporting their state
|
// Forget offline PGs after reporting their state
|
||||||
// (if the state wasn't changed again)
|
// (if the state wasn't changed again)
|
||||||
if (pg_it->second.scheme == POOL_SCHEME_JERASURE)
|
if (pg_it->second.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
use_jerasure(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
|
use_ec(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
|
||||||
}
|
}
|
||||||
this->pgs.erase(pg_it);
|
this->pgs.erase(pg_it);
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#define POOL_SCHEME_REPLICATED 1
|
#define POOL_SCHEME_REPLICATED 1
|
||||||
#define POOL_SCHEME_XOR 2
|
#define POOL_SCHEME_XOR 2
|
||||||
#define POOL_SCHEME_JERASURE 3
|
#define POOL_SCHEME_EC 3
|
||||||
#define POOL_ID_MAX 0x10000
|
#define POOL_ID_MAX 0x10000
|
||||||
#define POOL_ID_BITS 16
|
#define POOL_ID_BITS 16
|
||||||
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
|
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
|
||||||
|
|
|
@ -241,9 +241,9 @@ resume_2:
|
||||||
{
|
{
|
||||||
reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
else if (op_data->scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
|
|
|
@ -110,9 +110,9 @@ resume_1:
|
||||||
{
|
{
|
||||||
reconstruct_stripes_xor(local_stripes, pg.pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(local_stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
else if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(local_stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(local_stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -506,9 +506,9 @@ void osd_t::send_chained_read_results(pg_t & pg, osd_op_t *cur_op)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_xor(stripes, pg.pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
else if (op_data->scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,9 +132,9 @@ resume_3:
|
||||||
{
|
{
|
||||||
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
else if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
calc_rmw_parity_ec(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Send writes
|
// Send writes
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <jerasure/reed_sol.h>
|
#include <reed_sol.h>
|
||||||
#include <jerasure.h>
|
#include <jerasure.h>
|
||||||
#ifdef WITH_ISAL
|
#ifdef WITH_ISAL
|
||||||
#include <isa-l/erasure_code.h>
|
#include <isa-l/erasure_code.h>
|
||||||
|
@ -155,9 +155,9 @@ struct reed_sol_matrix_t
|
||||||
std::map<reed_sol_erased_t, void*> decodings;
|
std::map<reed_sol_erased_t, void*> decodings;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::map<uint64_t, reed_sol_matrix_t> matrices;
|
static std::map<uint64_t, reed_sol_matrix_t> matrices;
|
||||||
|
|
||||||
void use_jerasure(int pg_size, int pg_minsize, bool use)
|
void use_ec(int pg_size, int pg_minsize, bool use)
|
||||||
{
|
{
|
||||||
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
||||||
auto rs_it = matrices.find(key);
|
auto rs_it = matrices.find(key);
|
||||||
|
@ -202,7 +202,7 @@ void use_jerasure(int pg_size, int pg_minsize, bool use)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reed_sol_matrix_t* get_jerasure_matrix(int pg_size, int pg_minsize)
|
static reed_sol_matrix_t* get_ec_matrix(int pg_size, int pg_minsize)
|
||||||
{
|
{
|
||||||
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
||||||
auto rs_it = matrices.find(key);
|
auto rs_it = matrices.find(key);
|
||||||
|
@ -228,7 +228,7 @@ static void* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size
|
||||||
edd++;
|
edd++;
|
||||||
if (edd == 0)
|
if (edd == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
reed_sol_matrix_t *matrix = get_jerasure_matrix(pg_size, pg_minsize);
|
reed_sol_matrix_t *matrix = get_ec_matrix(pg_size, pg_minsize);
|
||||||
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
||||||
if (dec_it == matrix->decodings.end())
|
if (dec_it == matrix->decodings.end())
|
||||||
{
|
{
|
||||||
|
@ -293,7 +293,7 @@ static void* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WITH_ISAL
|
#ifdef WITH_ISAL
|
||||||
void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
||||||
{
|
{
|
||||||
uint8_t *dectable = (uint8_t*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
uint8_t *dectable = (uint8_t*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
||||||
if (!dectable)
|
if (!dectable)
|
||||||
|
@ -342,7 +342,7 @@ void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
||||||
{
|
{
|
||||||
int *dm_ids = (int*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
int *dm_ids = (int*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
||||||
if (!dm_ids)
|
if (!dm_ids)
|
||||||
|
@ -792,12 +792,12 @@ void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_
|
||||||
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calc_rmw_parity_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||||
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size)
|
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size)
|
||||||
{
|
{
|
||||||
uint32_t bitmap_granularity = bitmap_size > 0 ? chunk_size / bitmap_size / 8 : 0;
|
uint32_t bitmap_granularity = bitmap_size > 0 ? chunk_size / bitmap_size / 8 : 0;
|
||||||
reed_sol_matrix_t *matrix = get_jerasure_matrix(pg_size, pg_minsize);
|
reed_sol_matrix_t *matrix = get_ec_matrix(pg_size, pg_minsize);
|
||||||
reconstruct_stripes_jerasure(stripes, pg_size, pg_minsize, bitmap_size);
|
reconstruct_stripes_ec(stripes, pg_size, pg_minsize, bitmap_size);
|
||||||
uint32_t start = 0, end = 0;
|
uint32_t start = 0, end = 0;
|
||||||
calc_rmw_parity_copy_mod(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, bitmap_granularity, start, end);
|
calc_rmw_parity_copy_mod(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, bitmap_granularity, start, end);
|
||||||
if (end != 0)
|
if (end != 0)
|
||||||
|
|
|
@ -44,9 +44,9 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set,
|
void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set,
|
||||||
uint32_t chunk_size, uint32_t bitmap_size);
|
uint32_t chunk_size, uint32_t bitmap_size);
|
||||||
|
|
||||||
void use_jerasure(int pg_size, int pg_minsize, bool use);
|
void use_ec(int pg_size, int pg_minsize, bool use);
|
||||||
|
|
||||||
void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size);
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size);
|
||||||
|
|
||||||
void calc_rmw_parity_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||||
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size);
|
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size);
|
||||||
|
|
|
@ -587,14 +587,14 @@ void test12()
|
||||||
input buffer: [ write0, write1 ],
|
input buffer: [ write0, write1 ],
|
||||||
rmw buffer: [ write2, write3, read0, read1 ],
|
rmw buffer: [ write2, write3, read0, read1 ],
|
||||||
}
|
}
|
||||||
then, after calc_rmw_parity_jerasure(): all the same
|
then, after calc_rmw_parity_ec(): all the same
|
||||||
then simulate read with read_osd_set=[0,0,3,4] and check read0,read1 buffers
|
then simulate read with read_osd_set=[0,0,3,4] and check read0,read1 buffers
|
||||||
|
|
||||||
***/
|
***/
|
||||||
|
|
||||||
void test13()
|
void test13()
|
||||||
{
|
{
|
||||||
use_jerasure(4, 2, true);
|
use_ec(4, 2, true);
|
||||||
osd_num_t osd_set[4] = { 1, 2, 0, 0 };
|
osd_num_t osd_set[4] = { 1, 2, 0, 0 };
|
||||||
osd_num_t write_osd_set[4] = { 1, 2, 3, 4 };
|
osd_num_t write_osd_set[4] = { 1, 2, 3, 4 };
|
||||||
osd_rmw_stripe_t stripes[4] = {};
|
osd_rmw_stripe_t stripes[4] = {};
|
||||||
|
@ -628,7 +628,7 @@ void test13()
|
||||||
set_pattern(write_buf, 8192, PATTERN3);
|
set_pattern(write_buf, 8192, PATTERN3);
|
||||||
set_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
set_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN2);
|
set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN2);
|
||||||
calc_rmw_parity_jerasure(stripes, 4, 2, osd_set, write_osd_set, 128*1024, 0);
|
calc_rmw_parity_ec(stripes, 4, 2, osd_set, write_osd_set, 128*1024, 0);
|
||||||
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
||||||
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
||||||
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
@ -663,7 +663,7 @@ void test13()
|
||||||
assert(stripes[3].read_buf == (uint8_t*)read_buf+3*128*1024);
|
assert(stripes[3].read_buf == (uint8_t*)read_buf+3*128*1024);
|
||||||
memcpy((uint8_t*)read_buf+2*128*1024, rmw_buf, 128*1024);
|
memcpy((uint8_t*)read_buf+2*128*1024, rmw_buf, 128*1024);
|
||||||
memcpy((uint8_t*)read_buf+3*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
memcpy((uint8_t*)read_buf+3*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 4, 2, 0);
|
reconstruct_stripes_ec(stripes, 4, 2, 0);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
check_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
check_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
||||||
|
@ -694,14 +694,14 @@ void test13()
|
||||||
assert(stripes[3].read_buf == (uint8_t*)read_buf+2*128*1024);
|
assert(stripes[3].read_buf == (uint8_t*)read_buf+2*128*1024);
|
||||||
memcpy((uint8_t*)read_buf+128*1024, rmw_buf, 128*1024);
|
memcpy((uint8_t*)read_buf+128*1024, rmw_buf, 128*1024);
|
||||||
memcpy((uint8_t*)read_buf+2*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
memcpy((uint8_t*)read_buf+2*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 4, 2, 0);
|
reconstruct_stripes_ec(stripes, 4, 2, 0);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
free(read_buf);
|
free(read_buf);
|
||||||
// Huh done
|
// Huh done
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
use_jerasure(4, 2, false);
|
use_ec(4, 2, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***
|
/***
|
||||||
|
@ -714,7 +714,7 @@ void test13()
|
||||||
input buffer: [ write0, write1 ],
|
input buffer: [ write0, write1 ],
|
||||||
rmw buffer: [ write2, read0, read1 ],
|
rmw buffer: [ write2, read0, read1 ],
|
||||||
}
|
}
|
||||||
then, after calc_rmw_parity_jerasure(): all the same
|
then, after calc_rmw_parity_ec(): all the same
|
||||||
then simulate read with read_osd_set=[0,2,3] and check read0 buffer
|
then simulate read with read_osd_set=[0,2,3] and check read0 buffer
|
||||||
|
|
||||||
***/
|
***/
|
||||||
|
@ -722,7 +722,7 @@ void test13()
|
||||||
void test14()
|
void test14()
|
||||||
{
|
{
|
||||||
const int bmp = 4;
|
const int bmp = 4;
|
||||||
use_jerasure(3, 2, true);
|
use_ec(3, 2, true);
|
||||||
osd_num_t osd_set[3] = { 1, 2, 0 };
|
osd_num_t osd_set[3] = { 1, 2, 0 };
|
||||||
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
osd_rmw_stripe_t stripes[3] = {};
|
osd_rmw_stripe_t stripes[3] = {};
|
||||||
|
@ -757,7 +757,7 @@ void test14()
|
||||||
memset(stripes[0].bmp_buf, 0, bmp);
|
memset(stripes[0].bmp_buf, 0, bmp);
|
||||||
memset(stripes[1].bmp_buf, 0, bmp);
|
memset(stripes[1].bmp_buf, 0, bmp);
|
||||||
memset(stripes[2].bmp_buf, 0, bmp);
|
memset(stripes[2].bmp_buf, 0, bmp);
|
||||||
calc_rmw_parity_jerasure(stripes, 3, 2, osd_set, write_osd_set, 128*1024, bmp);
|
calc_rmw_parity_ec(stripes, 3, 2, osd_set, write_osd_set, 128*1024, bmp);
|
||||||
assert(*(uint32_t*)stripes[0].bmp_buf == 0x80000000);
|
assert(*(uint32_t*)stripes[0].bmp_buf == 0x80000000);
|
||||||
assert(*(uint32_t*)stripes[1].bmp_buf == 0x00000001);
|
assert(*(uint32_t*)stripes[1].bmp_buf == 0x00000001);
|
||||||
assert(*(uint32_t*)stripes[2].bmp_buf == 0x80000001); // jerasure 2+1 is still just XOR
|
assert(*(uint32_t*)stripes[2].bmp_buf == 0x80000001); // jerasure 2+1 is still just XOR
|
||||||
|
@ -793,12 +793,12 @@ void test14()
|
||||||
set_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
set_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
||||||
set_pattern(stripes[1].read_buf+4096, 128*1024-4096, PATTERN2);
|
set_pattern(stripes[1].read_buf+4096, 128*1024-4096, PATTERN2);
|
||||||
memcpy(stripes[2].read_buf, rmw_buf, 128*1024);
|
memcpy(stripes[2].read_buf, rmw_buf, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 3, 2, bmp);
|
reconstruct_stripes_ec(stripes, 3, 2, bmp);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
free(read_buf);
|
free(read_buf);
|
||||||
// Huh done
|
// Huh done
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
use_jerasure(3, 2, false);
|
use_ec(3, 2, false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ if [ "$SCHEME" = "ec" ]; then
|
||||||
PG_SIZE=${PG_SIZE:-5}
|
PG_SIZE=${PG_SIZE:-5}
|
||||||
PG_MINSIZE=${PG_MINSIZE:-3}
|
PG_MINSIZE=${PG_MINSIZE:-3}
|
||||||
PG_DATA_SIZE=$PG_MINSIZE
|
PG_DATA_SIZE=$PG_MINSIZE
|
||||||
POOLCFG='"scheme":"jerasure","parity_chunks":'$((PG_SIZE-PG_MINSIZE))
|
POOLCFG='"scheme":"ec","parity_chunks":'$((PG_SIZE-PG_MINSIZE))
|
||||||
elif [ "$SCHEME" = "xor" ]; then
|
elif [ "$SCHEME" = "xor" ]; then
|
||||||
PG_SIZE=${PG_SIZE:-3}
|
PG_SIZE=${PG_SIZE:-3}
|
||||||
PG_MINSIZE=${PG_MINSIZE:-2}
|
PG_MINSIZE=${PG_MINSIZE:-2}
|
||||||
|
|
Loading…
Reference in New Issue