Move NBD netlink map&unmap to separate commands, add "netlink-revive" command
Test / buildenv (push) Successful in 11s
Details
Test / build (push) Successful in 3m31s
Details
Test / test_cas (push) Successful in 11s
Details
Test / make_test (push) Successful in 38s
Details
Test / test_change_pg_count (push) Successful in 36s
Details
Test / test_change_pg_size (push) Successful in 7s
Details
Test / test_change_pg_count_ec (push) Successful in 33s
Details
Test / test_create_nomaxid (push) Successful in 7s
Details
Test / test_etcd_fail (push) Successful in 1m27s
Details
Test / test_add_osd (push) Successful in 2m46s
Details
Test / test_interrupted_rebalance (push) Successful in 3m3s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 3m7s
Details
Test / test_failure_domain (push) Successful in 11s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 2m10s
Details
Test / test_snapshot (push) Successful in 42s
Details
Test / test_minsize_1 (push) Successful in 17s
Details
Test / test_snapshot_ec (push) Successful in 40s
Details
Test / test_rm (push) Successful in 16s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m19s
Details
Test / test_snapshot_down (push) Successful in 26s
Details
Test / test_snapshot_down_ec (push) Successful in 32s
Details
Test / test_splitbrain (push) Successful in 21s
Details
Test / test_snapshot_chain (push) Successful in 2m57s
Details
Test / test_snapshot_chain_ec (push) Successful in 3m18s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m40s
Details
Test / test_rebalance_verify (push) Successful in 4m19s
Details
Test / test_switch_primary (push) Successful in 33s
Details
Test / test_write (push) Successful in 53s
Details
Test / test_write_xor (push) Successful in 58s
Details
Test / test_write_no_same (push) Successful in 13s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m29s
Details
Test / test_rebalance_verify_ec (push) Successful in 5m12s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m50s
Details
Test / test_heal_ec (push) Successful in 3m46s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 6m12s
Details
Test / test_heal_csum_32k_dj (push) Successful in 6m40s
Details
Test / test_heal_csum_32k (push) Successful in 6m52s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m51s
Details
Test / test_enospc (push) Successful in 1m42s
Details
Test / test_enospc_xor (push) Successful in 2m23s
Details
Test / test_enospc_imm (push) Successful in 1m42s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m12s
Details
Test / test_heal_csum_4k (push) Successful in 5m40s
Details
Test / test_enospc_imm_xor (push) Successful in 1m26s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Details
Test / test_scrub (push) Successful in 35s
Details
Test / test_scrub_xor (push) Successful in 27s
Details
Test / test_nfs (push) Successful in 23s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Details
Test / test_scrub_ec (push) Successful in 30s
Details
Test / test_scrub_pg_size_3 (push) Successful in 43s
Details
Test / buildenv (push) Successful in 11s
Details
Test / build (push) Successful in 3m31s
Details
Test / test_cas (push) Successful in 11s
Details
Test / make_test (push) Successful in 38s
Details
Test / test_change_pg_count (push) Successful in 36s
Details
Test / test_change_pg_size (push) Successful in 7s
Details
Test / test_change_pg_count_ec (push) Successful in 33s
Details
Test / test_create_nomaxid (push) Successful in 7s
Details
Test / test_etcd_fail (push) Successful in 1m27s
Details
Test / test_add_osd (push) Successful in 2m46s
Details
Test / test_interrupted_rebalance (push) Successful in 3m3s
Details
Test / test_interrupted_rebalance_imm (push) Successful in 3m7s
Details
Test / test_failure_domain (push) Successful in 11s
Details
Test / test_interrupted_rebalance_ec (push) Successful in 2m10s
Details
Test / test_snapshot (push) Successful in 42s
Details
Test / test_minsize_1 (push) Successful in 17s
Details
Test / test_snapshot_ec (push) Successful in 40s
Details
Test / test_rm (push) Successful in 16s
Details
Test / test_move_reappear (push) Successful in 22s
Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m19s
Details
Test / test_snapshot_down (push) Successful in 26s
Details
Test / test_snapshot_down_ec (push) Successful in 32s
Details
Test / test_splitbrain (push) Successful in 21s
Details
Test / test_snapshot_chain (push) Successful in 2m57s
Details
Test / test_snapshot_chain_ec (push) Successful in 3m18s
Details
Test / test_rebalance_verify_imm (push) Successful in 3m40s
Details
Test / test_rebalance_verify (push) Successful in 4m19s
Details
Test / test_switch_primary (push) Successful in 33s
Details
Test / test_write (push) Successful in 53s
Details
Test / test_write_xor (push) Successful in 58s
Details
Test / test_write_no_same (push) Successful in 13s
Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m29s
Details
Test / test_rebalance_verify_ec (push) Successful in 5m12s
Details
Test / test_heal_pg_size_2 (push) Successful in 3m50s
Details
Test / test_heal_ec (push) Successful in 3m46s
Details
Test / test_heal_csum_32k_dmj (push) Successful in 6m12s
Details
Test / test_heal_csum_32k_dj (push) Successful in 6m40s
Details
Test / test_heal_csum_32k (push) Successful in 6m52s
Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m51s
Details
Test / test_enospc (push) Successful in 1m42s
Details
Test / test_enospc_xor (push) Successful in 2m23s
Details
Test / test_enospc_imm (push) Successful in 1m42s
Details
Test / test_heal_csum_4k_dj (push) Successful in 6m12s
Details
Test / test_heal_csum_4k (push) Successful in 5m40s
Details
Test / test_enospc_imm_xor (push) Successful in 1m26s
Details
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Details
Test / test_scrub (push) Successful in 35s
Details
Test / test_scrub_xor (push) Successful in 27s
Details
Test / test_nfs (push) Successful in 23s
Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Details
Test / test_scrub_ec (push) Successful in 30s
Details
Test / test_scrub_pg_size_3 (push) Successful in 43s
Details
parent
b7a3275af3
commit
af9a853db6
|
@ -15,12 +15,21 @@ See also [VDUSE](qemu.en.md#vduse) as a better alternative to NBD.
|
||||||
|
|
||||||
Vitastor Kubernetes CSI driver uses NBD when VDUSE is unavailable.
|
Vitastor Kubernetes CSI driver uses NBD when VDUSE is unavailable.
|
||||||
|
|
||||||
## Map image
|
Supports the following commands:
|
||||||
|
|
||||||
|
- [map](#map)
|
||||||
|
- [unmap](#unmap)
|
||||||
|
- [ls](#ls)
|
||||||
|
- [netlink-map](#netlink-map)
|
||||||
|
- [netlink-unmap](#netlink-unmap)
|
||||||
|
- [netlink-revive](#netlink-revive)
|
||||||
|
|
||||||
|
## map
|
||||||
|
|
||||||
To create a local block device for a Vitastor image run:
|
To create a local block device for a Vitastor image run:
|
||||||
|
|
||||||
```
|
```
|
||||||
vitastor-nbd map --image testimg
|
vitastor-nbd map [/dev/nbdN] --image testimg
|
||||||
```
|
```
|
||||||
|
|
||||||
It will output a block device name like /dev/nbd0 which you can then use as a normal disk.
|
It will output a block device name like /dev/nbd0 which you can then use as a normal disk.
|
||||||
|
@ -29,25 +38,25 @@ You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--ima
|
||||||
|
|
||||||
vitastor-nbd supports all usual Vitastor configuration options like `--config_file <path_to_config>` plus NBD-specific:
|
vitastor-nbd supports all usual Vitastor configuration options like `--config_file <path_to_config>` plus NBD-specific:
|
||||||
|
|
||||||
* `--nbd_timeout 300` \
|
* `--nbd_timeout 0` \
|
||||||
Timeout for I/O operations in seconds after exceeding which the kernel stops
|
Timeout for I/O operations in seconds after exceeding which the kernel stops the device.
|
||||||
the device. You can set it to 0 to disable the timeout, but beware that you
|
Before Linux 5.19, if nbd_timeout is 0, a dead NBD device can't be removed from
|
||||||
won't be able to stop the device at all if vitastor-nbd process dies.
|
the system at all without rebooting.
|
||||||
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
||||||
Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
|
Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
|
||||||
note that maximum allowed (nbds_max)*(1+max_part) is 256.
|
|
||||||
* `--logfile /path/to/log/file.txt` \
|
* `--logfile /path/to/log/file.txt` \
|
||||||
Write log messages to the specified file instead of dropping them (in background mode)
|
Write log messages to the specified file instead of dropping them (in background mode)
|
||||||
or printing them to the standard output (in foreground mode).
|
or printing them to the standard output (in foreground mode).
|
||||||
* `--dev_num N` \
|
* `--dev_num N` \
|
||||||
Use the specified device /dev/nbdN instead of automatic selection.
|
Use the specified device /dev/nbdN instead of automatic selection (alternative syntax
|
||||||
|
to /dev/nbdN positional parameter).
|
||||||
* `--foreground 1` \
|
* `--foreground 1` \
|
||||||
Stay in foreground, do not daemonize.
|
Stay in foreground, do not daemonize.
|
||||||
|
|
||||||
Note that `nbd_timeout`, `nbd_max_devices` and `nbd_max_part` options may also be specified
|
Note that `nbd_timeout`, `nbd_max_devices` and `nbd_max_part` options may also be specified
|
||||||
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_file`.
|
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_file`.
|
||||||
|
|
||||||
## Unmap image
|
## unmap
|
||||||
|
|
||||||
To unmap the device run:
|
To unmap the device run:
|
||||||
|
|
||||||
|
@ -55,12 +64,14 @@ To unmap the device run:
|
||||||
vitastor-nbd unmap /dev/nbd0
|
vitastor-nbd unmap /dev/nbd0
|
||||||
```
|
```
|
||||||
|
|
||||||
## List mapped images
|
## ls
|
||||||
|
|
||||||
```
|
```
|
||||||
vitastor-nbd ls [--json]
|
vitastor-nbd ls [--json]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
List mapped images.
|
||||||
|
|
||||||
Example output (normal format):
|
Example output (normal format):
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -78,3 +89,45 @@ Example output (JSON format):
|
||||||
```
|
```
|
||||||
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## netlink-map
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-map [/dev/nbdN] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
|
||||||
|
```
|
||||||
|
|
||||||
|
On recent kernel versions it's also possinle to map NBD devices using netlink interface.
|
||||||
|
|
||||||
|
This is an experimental feature because it doesn't solve all issues of NBD. Differences from regular ioctl-based 'map':
|
||||||
|
|
||||||
|
1. netlink-map can create new `/dev/nbdN` devices (those not present in /dev/).
|
||||||
|
2. netlink-mapped devices can be unmapped only using `netlink-unmap` command.
|
||||||
|
3. netlink-mapped devices don't show up `ls` output (yet).
|
||||||
|
4. Dead netlink-mapped devices can be 'revived' using `netlink-revive`.
|
||||||
|
However, old I/O requests will hang forever if `nbd_timeout` is not specified.
|
||||||
|
5. netlink-map supports additional options:
|
||||||
|
|
||||||
|
* `--nbd_conn_timeout 0` \
|
||||||
|
Disconnect a dead device automatically after this number of seconds.
|
||||||
|
* `--nbd_destroy_on_disconnect 1` \
|
||||||
|
Delete the nbd device on disconnect.
|
||||||
|
* `--nbd_disconnect_on_close 1` \
|
||||||
|
Disconnect the nbd device on close by last opener.
|
||||||
|
* `--nbd_ro 1` \
|
||||||
|
Set device into read only mode.
|
||||||
|
|
||||||
|
## netlink-unmap
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-unmap /dev/nbdN
|
||||||
|
```
|
||||||
|
|
||||||
|
Unmap a device using netlink interface. Works with both netlink and ioctl mapped devices.
|
||||||
|
|
||||||
|
## netlink-revive
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart a dead NBD netlink-mapped device without removing it. Supports the same options as `netlink-map`.
|
||||||
|
|
|
@ -18,12 +18,21 @@ NBD немного снижает производительность из-за
|
||||||
|
|
||||||
CSI-драйвер Kubernetes Vitastor использует NBD, когда VDUSE недоступен.
|
CSI-драйвер Kubernetes Vitastor использует NBD, когда VDUSE недоступен.
|
||||||
|
|
||||||
## Подключить устройство
|
Поддерживаются следующие команды:
|
||||||
|
|
||||||
|
- [map](#map)
|
||||||
|
- [unmap](#unmap)
|
||||||
|
- [ls](#ls)
|
||||||
|
- [netlink-map](#netlink-map)
|
||||||
|
- [netlink-unmap](#netlink-unmap)
|
||||||
|
- [netlink-revive](#netlink-revive)
|
||||||
|
|
||||||
|
## map
|
||||||
|
|
||||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||||
|
|
||||||
```
|
```
|
||||||
vitastor-nbd map --image testimg
|
vitastor-nbd map [/dev/nbdN] --image testimg
|
||||||
```
|
```
|
||||||
|
|
||||||
Команда напечатает название блочного устройства вида /dev/nbd0, которое потом можно
|
Команда напечатает название блочного устройства вида /dev/nbd0, которое потом можно
|
||||||
|
@ -35,16 +44,13 @@ vitastor-nbd map --image testimg
|
||||||
vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_file <path_to_config>`,
|
vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_file <path_to_config>`,
|
||||||
плюс специфичные для NBD:
|
плюс специфичные для NBD:
|
||||||
|
|
||||||
* `--nbd_timeout 30` \
|
* `--nbd_timeout 0` \
|
||||||
Максимальное время выполнения любой операции чтения/записи в секундах, при
|
Максимальное время выполнения любой операции чтения/записи в секундах, при
|
||||||
превышении которого ядро остановит NBD-устройство. Вы можете установить опцию
|
превышении которого ядро остановит NBD-устройство. На ядрах Linux старее 5.19,
|
||||||
в 0, чтобы отключить ограничение времени, но имейте в виду, что в этом случае
|
если таймаут установлен в 0, NBD-устройство вообще невозможно отключить из системы
|
||||||
вы вообще не сможете отключить NBD-устройство при нештатном завершении процесса
|
при нештатном завершении процесса.
|
||||||
vitastor-nbd.
|
|
||||||
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
||||||
Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd
|
Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd (`nbds_max` и `max_part`).
|
||||||
(`nbds_max` и `max_part`). Имейте в виду, что (nbds_max)*(1+max_part)
|
|
||||||
обычно не должно превышать 256.
|
|
||||||
* `--logfile /path/to/log/file.txt` \
|
* `--logfile /path/to/log/file.txt` \
|
||||||
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
|
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
|
||||||
при фоновом режиме запуска или печати на стандартный вывод при запуске
|
при фоновом режиме запуска или печати на стандартный вывод при запуске
|
||||||
|
@ -58,7 +64,7 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
|
||||||
также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
|
также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
|
||||||
заданном опцией `--config_file`.
|
заданном опцией `--config_file`.
|
||||||
|
|
||||||
## Отключить устройство
|
## unmap
|
||||||
|
|
||||||
Для отключения устройства выполните:
|
Для отключения устройства выполните:
|
||||||
|
|
||||||
|
@ -66,12 +72,14 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
|
||||||
vitastor-nbd unmap /dev/nbd0
|
vitastor-nbd unmap /dev/nbd0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Вывести подключённые устройства
|
## ls
|
||||||
|
|
||||||
```
|
```
|
||||||
vitastor-nbd ls [--json]
|
vitastor-nbd ls [--json]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Вывести подключённые устройства.
|
||||||
|
|
||||||
Пример вывода в обычном формате:
|
Пример вывода в обычном формате:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -89,3 +97,46 @@ pid: 584546
|
||||||
```
|
```
|
||||||
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## netlink-map
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-map [/dev/nbdN] (--image <image> | --pool <POOL> --inode <INODE> --size <SIZE>)
|
||||||
|
```
|
||||||
|
|
||||||
|
На свежих версиях ядра Linux также возможно подключать NBD-устройства через интерфейс netlink.
|
||||||
|
|
||||||
|
Это экспериментальная функция, так как она не решает всех проблем NBD. Отличия от обычного 'map':
|
||||||
|
|
||||||
|
1. Можно создавать новые `/dev/nbdN` устройства (отсутствующие в /dev/).
|
||||||
|
2. Отключать netlink-устройства можно только командой `netlink-unmap`.
|
||||||
|
3. netlink-устройства не видно в выводе `ls` (пока что).
|
||||||
|
4. Мёртвые netlink-устройства можно "оживить" командой `netlink-revive`. Правда, предыдущие
|
||||||
|
запросы ввода-вывода всё равно зависнут навсегда, если `nbd_timeout` не задан.
|
||||||
|
5. Поддерживаются дополнительные опции:
|
||||||
|
|
||||||
|
* `--nbd_conn_timeout 0` \
|
||||||
|
Отключать мёртвое устройство автоматически через данное число секунд.
|
||||||
|
* `--nbd_destroy_on_disconnect 1` \
|
||||||
|
Удалять NBD-устройство при отключении.
|
||||||
|
* `--nbd_disconnect_on_close 1` \
|
||||||
|
Отключать NBD-устройство автоматически, когда его все закроют.
|
||||||
|
* `--nbd_ro 1` \
|
||||||
|
Установить для NBD-устройства режим "только для чтения".
|
||||||
|
|
||||||
|
## netlink-unmap
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-unmap /dev/nbdN
|
||||||
|
```
|
||||||
|
|
||||||
|
Отключить устройство через интерфейс netlink. Работает и с обычными, и с netlink-устройствами.
|
||||||
|
|
||||||
|
## netlink-revive
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)
|
||||||
|
```
|
||||||
|
|
||||||
|
Оживить мёртвое NBD-устройство, ранее подключённое через netlink, без удаления. Поддерживает
|
||||||
|
те же опции, что и `netlink-map`.
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
#include "cluster_client.h"
|
#include "cluster_client.h"
|
||||||
#include "epoll_manager.h"
|
#include "epoll_manager.h"
|
||||||
|
#include "str_util.h"
|
||||||
|
|
||||||
#ifdef HAVE_NBD_NETLINK_H
|
#ifdef HAVE_NBD_NETLINK_H
|
||||||
#include <netlink/attr.h>
|
#include <netlink/attr.h>
|
||||||
|
@ -108,18 +109,27 @@ static int netlink_status_cb(struct nl_msg *sk_msg, void *devnum)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint64_t size,
|
static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint64_t size,
|
||||||
uint64_t blocksize, uint64_t flags, uint64_t cflags, uint64_t timeout, uint64_t conn_timeout)
|
uint64_t blocksize, uint64_t flags, uint64_t cflags, uint64_t timeout, uint64_t conn_timeout,
|
||||||
|
const char *backend, bool reconfigure)
|
||||||
{
|
{
|
||||||
struct netlink_ctx ctx;
|
struct netlink_ctx ctx;
|
||||||
struct nlattr *msg_attr, *msg_opt_attr;
|
struct nlattr *msg_attr, *msg_opt_attr;
|
||||||
struct nl_msg *msg;
|
struct nl_msg *msg;
|
||||||
int i, err, sock;
|
int i, err, sock;
|
||||||
uint32_t devnum;
|
uint32_t devnum = dev_num;
|
||||||
|
|
||||||
|
if (reconfigure && dev_num < 0)
|
||||||
|
{
|
||||||
|
return -NLE_INVAL;
|
||||||
|
}
|
||||||
|
|
||||||
netlink_sock_alloc(&ctx);
|
netlink_sock_alloc(&ctx);
|
||||||
|
|
||||||
|
if (!reconfigure)
|
||||||
|
{
|
||||||
// A callback we set for a response we get on send
|
// A callback we set for a response we get on send
|
||||||
nl_socket_modify_cb(ctx.sk, NL_CB_VALID, NL_CB_CUSTOM, netlink_status_cb, &devnum);
|
nl_socket_modify_cb(ctx.sk, NL_CB_VALID, NL_CB_CUSTOM, netlink_status_cb, &devnum);
|
||||||
|
}
|
||||||
|
|
||||||
msg = nlmsg_alloc();
|
msg = nlmsg_alloc();
|
||||||
if (!msg)
|
if (!msg)
|
||||||
|
@ -128,7 +138,8 @@ static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint
|
||||||
fail("Failed to allocate netlink message\n");
|
fail("Failed to allocate netlink message\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0, NBD_CMD_CONNECT, 0);
|
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0,
|
||||||
|
reconfigure ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0);
|
||||||
|
|
||||||
if (dev_num >= 0)
|
if (dev_num >= 0)
|
||||||
{
|
{
|
||||||
|
@ -150,6 +161,13 @@ static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint
|
||||||
NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, conn_timeout);
|
NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, conn_timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (backend)
|
||||||
|
{
|
||||||
|
// Backend is an attribute useful for identication of the device
|
||||||
|
// Also it prevents reconfiguration of the device with a different backend string
|
||||||
|
NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, backend);
|
||||||
|
}
|
||||||
|
|
||||||
msg_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
|
msg_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
|
||||||
if (!msg_attr)
|
if (!msg_attr)
|
||||||
{
|
{
|
||||||
|
@ -172,7 +190,7 @@ static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint
|
||||||
|
|
||||||
nla_nest_end(msg, msg_attr);
|
nla_nest_end(msg, msg_attr);
|
||||||
|
|
||||||
if ((err = nl_send_sync(ctx.sk, msg)) < 0)
|
if ((err = nl_send_sync(ctx.sk, msg)) != 0)
|
||||||
{
|
{
|
||||||
netlink_sock_free(&ctx);
|
netlink_sock_free(&ctx);
|
||||||
return err;
|
return err;
|
||||||
|
@ -232,14 +250,78 @@ nla_put_failure:
|
||||||
|
|
||||||
const char *exe_name = NULL;
|
const char *exe_name = NULL;
|
||||||
|
|
||||||
|
const char *help_text =
|
||||||
|
"Vitastor NBD proxy " VERSION "\n"
|
||||||
|
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||||
|
"\n"
|
||||||
|
"COMMANDS:\n"
|
||||||
|
"\n"
|
||||||
|
"vitastor-nbd map [OPTIONS] [/dev/nbdN] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||||
|
" Map an NBD device using ioctl interface. Options:\n"
|
||||||
|
" --nbd_timeout 0\n"
|
||||||
|
" Timeout for I/O operations in seconds after exceeding which the kernel stops the device.\n"
|
||||||
|
" Before Linux 5.19, if nbd_timeout is 0, a dead NBD device can't be removed from\n"
|
||||||
|
" the system at all without rebooting.\n"
|
||||||
|
" --nbd_max_devices 64 --nbd_max_part 3\n"
|
||||||
|
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
|
||||||
|
" --logfile /path/to/log/file.txt\n"
|
||||||
|
" Write log messages to the specified file instead of dropping them (in background mode)\n"
|
||||||
|
" or printing them to the standard output (in foreground mode).\n"
|
||||||
|
" --dev_num N\n"
|
||||||
|
" Use the specified device /dev/nbdN instead of automatic selection (alternative syntax\n"
|
||||||
|
" to /dev/nbdN positional parameter).\n"
|
||||||
|
" --foreground 1\n"
|
||||||
|
" Stay in foreground, do not daemonize.\n"
|
||||||
|
"\n"
|
||||||
|
"vitastor-nbd unmap /dev/nbdN\n"
|
||||||
|
" Unmap an ioctl-mapped NBD device.\n"
|
||||||
|
"\n"
|
||||||
|
"vitastor-nbd ls [--json]\n"
|
||||||
|
" List ioctl-mapped Vitastor NBD devices, optionally in JSON format.\n"
|
||||||
|
"\n"
|
||||||
|
#ifdef HAVE_NBD_NETLINK_H
|
||||||
|
"vitastor-nbd netlink-map [/dev/nbd<number>] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||||
|
" Map a device using netlink interface. Experimental mode. Differences from 'map':\n"
|
||||||
|
" 1) netlink-map can create new /dev/nbdN devices.\n"
|
||||||
|
" 2) netlink-mapped devices can be unmapped only using netlink-unmap command.\n"
|
||||||
|
" 3) netlink-mapped devices don't show up `ls` output (yet).\n"
|
||||||
|
" 4) dead netlink-mapped devices can be 'revived' (however, old I/O may hang forever without timeout).\n"
|
||||||
|
" 5) netlink-map supports additional options:\n"
|
||||||
|
" --nbd_conn_timeout 0\n"
|
||||||
|
" Disconnect a dead device automatically after this number of seconds.\n"
|
||||||
|
#ifdef NBD_CFLAG_DESTROY_ON_DISCONNECT
|
||||||
|
" --nbd_destroy_on_disconnect 1\n"
|
||||||
|
" Delete the nbd device on disconnect.\n"
|
||||||
|
#endif
|
||||||
|
#ifdef NBD_CFLAG_DISCONNECT_ON_CLOSE
|
||||||
|
" --nbd_disconnect_on_close 1\n"
|
||||||
|
" Disconnect the nbd device on close by last opener.\n"
|
||||||
|
#endif
|
||||||
|
#ifdef NBD_FLAG_READ_ONLY
|
||||||
|
" --nbd_ro 1\n"
|
||||||
|
" Set device into read only mode.\n"
|
||||||
|
#endif
|
||||||
|
"\n"
|
||||||
|
"vitastor-nbd netlink-unmap /dev/nbdN\n"
|
||||||
|
" Unmap a device using netlink interface. Works with both netlink and ioctl mapped devices.\n"
|
||||||
|
"\n"
|
||||||
|
"vitastor-nbd netlink-revive /dev/nbdN (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||||
|
" Restart a dead NBD device without removing it. Supports the same options as netlink-map.\n"
|
||||||
|
"\n"
|
||||||
|
#endif
|
||||||
|
"Use vitastor-nbd --help <command> for command details or vitastor-nbd --help --all for all details.\n"
|
||||||
|
"\n"
|
||||||
|
"All usual Vitastor config options like --config_file <path_to_config> may also be specified in CLI.\n"
|
||||||
|
;
|
||||||
|
|
||||||
class nbd_proxy
|
class nbd_proxy
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
std::string image_name;
|
std::string image_name;
|
||||||
uint64_t inode = 0;
|
uint64_t inode = 0;
|
||||||
uint64_t device_size = 0;
|
uint64_t device_size = 0;
|
||||||
uint64_t nbd_lease = 0;
|
uint64_t nbd_conn_timeout = 0;
|
||||||
int nbd_timeout = 300;
|
int nbd_timeout = 0;
|
||||||
int nbd_max_devices = 64;
|
int nbd_max_devices = 64;
|
||||||
int nbd_max_part = 3;
|
int nbd_max_part = 3;
|
||||||
inode_watch_t *watch = NULL;
|
inode_watch_t *watch = NULL;
|
||||||
|
@ -283,19 +365,19 @@ public:
|
||||||
{
|
{
|
||||||
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
||||||
{
|
{
|
||||||
help();
|
cfg["help"] = 1;
|
||||||
}
|
}
|
||||||
else if (args[i][0] == '-' && args[i][1] == '-')
|
else if (args[i][0] == '-' && args[i][1] == '-')
|
||||||
{
|
{
|
||||||
const char *opt = args[i]+2;
|
const char *opt = args[i]+2;
|
||||||
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
|
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") || i == narg-1 ? "1" : args[++i];
|
||||||
}
|
}
|
||||||
else if (pos == 0)
|
else if (pos == 0)
|
||||||
{
|
{
|
||||||
cfg["command"] = args[i];
|
cfg["command"] = args[i];
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
else if (pos == 1 && (cfg["command"] == "map" || cfg["command"] == "unmap"))
|
else if (pos == 1)
|
||||||
{
|
{
|
||||||
int n = 0;
|
int n = 0;
|
||||||
if (sscanf(args[i], "/dev/nbd%d", &n) > 0)
|
if (sscanf(args[i], "/dev/nbd%d", &n) > 0)
|
||||||
|
@ -310,9 +392,13 @@ public:
|
||||||
|
|
||||||
void exec(json11::Json cfg)
|
void exec(json11::Json cfg)
|
||||||
{
|
{
|
||||||
|
if (cfg["help"].bool_value())
|
||||||
|
{
|
||||||
|
goto help;
|
||||||
|
}
|
||||||
if (cfg["command"] == "map")
|
if (cfg["command"] == "map")
|
||||||
{
|
{
|
||||||
start(cfg);
|
start(cfg, false, false);
|
||||||
}
|
}
|
||||||
else if (cfg["command"] == "unmap")
|
else if (cfg["command"] == "unmap")
|
||||||
{
|
{
|
||||||
|
@ -323,18 +409,26 @@ public:
|
||||||
}
|
}
|
||||||
if (cfg["netlink"].is_null())
|
if (cfg["netlink"].is_null())
|
||||||
{
|
{
|
||||||
unmap(cfg["dev_num"].uint64_value());
|
ioctl_unmap(cfg["dev_num"].uint64_value());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
#ifdef HAVE_NBD_NETLINK_H
|
#ifdef HAVE_NBD_NETLINK_H
|
||||||
|
else if (cfg["command"] == "netlink-map")
|
||||||
|
{
|
||||||
|
start(cfg, true, false);
|
||||||
|
}
|
||||||
|
else if (cfg["command"] == "netlink-revive")
|
||||||
|
{
|
||||||
|
start(cfg, true, true);
|
||||||
|
}
|
||||||
|
else if (cfg["command"] == "netlink-unmap")
|
||||||
|
{
|
||||||
netlink_disconnect(cfg["dev_num"].uint64_value());
|
netlink_disconnect(cfg["dev_num"].uint64_value());
|
||||||
#else
|
}
|
||||||
fprintf(stderr, "netlink support is disabled in this build\n");
|
|
||||||
exit(1);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
|
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
|
||||||
{
|
{
|
||||||
auto mapped = list_mapped();
|
auto mapped = list_mapped();
|
||||||
|
@ -342,55 +436,13 @@ public:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
help();
|
help:
|
||||||
}
|
print_help(help_text, "vitastor-nbd", cfg["command"].string_value(), cfg["all"].bool_value());
|
||||||
}
|
|
||||||
|
|
||||||
static void help()
|
|
||||||
{
|
|
||||||
printf(
|
|
||||||
"Vitastor NBD proxy\n"
|
|
||||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n\n"
|
|
||||||
"USAGE:\n"
|
|
||||||
" %s map [OPTIONS] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
|
||||||
" %s unmap /dev/nbd0\n"
|
|
||||||
" %s ls [--json]\n"
|
|
||||||
"OPTIONS:\n"
|
|
||||||
" All usual Vitastor config options like --config_file <path_to_config> plus NBD-specific:\n"
|
|
||||||
" --nbd_timeout 300\n"
|
|
||||||
" Timeout for I/O operations in seconds after exceeding which the kernel stops\n"
|
|
||||||
" the device. You can set it to 0 to disable the timeout, but beware that you\n"
|
|
||||||
" won't be able to stop the device at all if vitastor-nbd process dies.\n"
|
|
||||||
" --nbd_max_devices 64 --nbd_max_part 3\n"
|
|
||||||
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
|
|
||||||
" Maximum allowed (nbds_max)*(1+max_part) is 2^20.\n"
|
|
||||||
" Note that nbd_timeout, nbd_max_devices and nbd_max_part options may also be specified\n"
|
|
||||||
" in /etc/vitastor/vitastor.conf or in other configuration file specified with --config_file.\n"
|
|
||||||
" --nbd_lease 60\n"
|
|
||||||
" Timeout in seconds which is waited at max before nbd device\n"
|
|
||||||
" is returned after no I/O is performed on device.\n"
|
|
||||||
" By default is not set.\n"
|
|
||||||
" --nbd_destroy_on_disconnect 1\n"
|
|
||||||
" Delete the nbd device on disconnect.\n"
|
|
||||||
" --nbd_disconnect_on_close 1\n"
|
|
||||||
" Disconnect the nbd device on close by last opener.\n"
|
|
||||||
" --nbd_ro 1\n"
|
|
||||||
" Set device into read only mode.\n"
|
|
||||||
" --logfile /path/to/log/file.txt\n"
|
|
||||||
" Write log messages to the specified file instead of dropping them (in background mode)\n"
|
|
||||||
" or printing them to the standard output (in foreground mode).\n"
|
|
||||||
" --dev_num N\n"
|
|
||||||
" Use the specified device /dev/nbdN instead of automatic selection.\n"
|
|
||||||
" --foreground 1\n"
|
|
||||||
" Stay in foreground, do not daemonize.\n"
|
|
||||||
" --netlink 1\n"
|
|
||||||
" Use netlink to configure NBD device.\n",
|
|
||||||
exe_name, exe_name, exe_name
|
|
||||||
);
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void unmap(int dev_num)
|
void ioctl_unmap(int dev_num)
|
||||||
{
|
{
|
||||||
char path[64] = { 0 };
|
char path[64] = { 0 };
|
||||||
sprintf(path, "/dev/nbd%d", dev_num);
|
sprintf(path, "/dev/nbd%d", dev_num);
|
||||||
|
@ -409,7 +461,7 @@ public:
|
||||||
close(nbd);
|
close(nbd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void start(json11::Json cfg)
|
void start(json11::Json cfg, bool netlink, bool revive)
|
||||||
{
|
{
|
||||||
// Check options
|
// Check options
|
||||||
if (cfg["image"].string_value() != "")
|
if (cfg["image"].string_value() != "")
|
||||||
|
@ -439,24 +491,6 @@ public:
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto file_config = osd_messenger_t::read_config(cfg);
|
|
||||||
if (file_config["nbd_max_devices"].is_number() || file_config["nbd_max_devices"].is_string())
|
|
||||||
{
|
|
||||||
nbd_max_devices = file_config["nbd_max_devices"].uint64_value();
|
|
||||||
}
|
|
||||||
if (file_config["nbd_max_part"].is_number() || file_config["nbd_max_part"].is_string())
|
|
||||||
{
|
|
||||||
nbd_max_part = file_config["nbd_max_part"].uint64_value();
|
|
||||||
}
|
|
||||||
if (file_config["nbd_timeout"].is_number() || file_config["nbd_timeout"].is_string())
|
|
||||||
{
|
|
||||||
nbd_timeout = file_config["nbd_timeout"].uint64_value();
|
|
||||||
}
|
|
||||||
if (cfg["nbd_lease"].is_number() || cfg["nbd_lease"].is_string())
|
|
||||||
{
|
|
||||||
nbd_lease = cfg["nbd_lease"].uint64_value();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cfg["client_writeback_allowed"].is_null())
|
if (cfg["client_writeback_allowed"].is_null())
|
||||||
{
|
{
|
||||||
// NBD is always aware of fsync, so we allow write-back cache
|
// NBD is always aware of fsync, so we allow write-back cache
|
||||||
|
@ -465,6 +499,7 @@ public:
|
||||||
obj["client_writeback_allowed"] = true;
|
obj["client_writeback_allowed"] = true;
|
||||||
cfg = obj;
|
cfg = obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create client
|
// Create client
|
||||||
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE);
|
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE);
|
||||||
epmgr = new epoll_manager_t(ringloop);
|
epmgr = new epoll_manager_t(ringloop);
|
||||||
|
@ -489,6 +524,24 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cli->config contains merged config
|
||||||
|
if (cli->config.find("nbd_max_devices") != cli->config.end())
|
||||||
|
{
|
||||||
|
nbd_max_devices = cli->config["nbd_max_devices"].uint64_value();
|
||||||
|
}
|
||||||
|
if (cli->config.find("nbd_max_part") != cli->config.end())
|
||||||
|
{
|
||||||
|
nbd_max_part = cli->config["nbd_max_part"].uint64_value();
|
||||||
|
}
|
||||||
|
if (cli->config.find("nbd_timeout") != cli->config.end())
|
||||||
|
{
|
||||||
|
nbd_timeout = cli->config["nbd_timeout"].uint64_value();
|
||||||
|
}
|
||||||
|
if (cli->config.find("nbd_conn_timeout") != cli->config.end())
|
||||||
|
{
|
||||||
|
nbd_conn_timeout = cli->config["nbd_conn_timeout"].uint64_value();
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize NBD
|
// Initialize NBD
|
||||||
int sockfd[2];
|
int sockfd[2];
|
||||||
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd) < 0)
|
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd) < 0)
|
||||||
|
@ -502,7 +555,7 @@ public:
|
||||||
load_module();
|
load_module();
|
||||||
bool bg = cfg["foreground"].is_null();
|
bool bg = cfg["foreground"].is_null();
|
||||||
|
|
||||||
if (!cfg["netlink"].is_null())
|
if (netlink)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_NBD_NETLINK_H
|
#ifdef HAVE_NBD_NETLINK_H
|
||||||
int devnum = -1;
|
int devnum = -1;
|
||||||
|
@ -524,13 +577,14 @@ public:
|
||||||
if (!cfg["nbd_disconnect_on_close"].is_null())
|
if (!cfg["nbd_disconnect_on_close"].is_null())
|
||||||
cflags |= NBD_CFLAG_DISCONNECT_ON_CLOSE;
|
cflags |= NBD_CFLAG_DISCONNECT_ON_CLOSE;
|
||||||
#endif
|
#endif
|
||||||
int err = netlink_configure(sockfd + 1, 1, devnum, device_size, 4096, flags, cflags, nbd_timeout, nbd_lease);
|
int err = netlink_configure(sockfd + 1, 1, devnum, device_size, 4096, flags, cflags, nbd_timeout, nbd_conn_timeout, NULL, revive);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
{
|
{
|
||||||
errno = (err == -NLE_BUSY ? EBUSY : EIO);
|
errno = (err == -NLE_BUSY ? EBUSY : EIO);
|
||||||
fprintf(stderr, "netlink_configure failed: %s (code %d)\n", nl_geterror(err), err);
|
fprintf(stderr, "netlink_configure failed: %s (code %d)\n", nl_geterror(err), err);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
close(sockfd[1]);
|
||||||
printf("/dev/nbd%d\n", err);
|
printf("/dev/nbd%d\n", err);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "netlink support is disabled in this build\n");
|
fprintf(stderr, "netlink support is disabled in this build\n");
|
||||||
|
@ -647,9 +701,10 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int r;
|
int r;
|
||||||
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a big shit
|
// NBD module creates ALL <nbd_max_devices> devices in /dev/ when loaded
|
||||||
// 64 also isn't too high, but the possible maximum is nbds_max=256 max_part=0 and it won't reserve
|
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a bit too low.
|
||||||
// any block device minor numbers for partitions
|
// ...and ioctl setup method can't create additional devices.
|
||||||
|
// netlink setup method, however, CAN create additional devices.
|
||||||
if ((r = system(("modprobe nbd nbds_max="+std::to_string(nbd_max_devices)+" max_part="+std::to_string(nbd_max_part)).c_str())) != 0)
|
if ((r = system(("modprobe nbd nbds_max="+std::to_string(nbd_max_devices)+" max_part="+std::to_string(nbd_max_part)).c_str())) != 0)
|
||||||
{
|
{
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
|
|
@ -209,7 +209,7 @@ void print_help(const char *help_text, std::string exe_name, std::string cmd, bo
|
||||||
const char *var_end = var_start;
|
const char *var_end = var_start;
|
||||||
while (*var_end && !isspace(*var_end))
|
while (*var_end && !isspace(*var_end))
|
||||||
var_end++;
|
var_end++;
|
||||||
if ((std::string(var_start, var_end-var_start)+"|").find(cmd+"|") != std::string::npos)
|
if (("|"+std::string(var_start, var_end-var_start)+"|").find("|"+cmd+"|") != std::string::npos)
|
||||||
found = matched = true;
|
found = matched = true;
|
||||||
}
|
}
|
||||||
else if (*next_line && isspace(*next_line))
|
else if (*next_line && isspace(*next_line))
|
||||||
|
|
Loading…
Reference in New Issue