Compare commits

...

4 Commits

Author SHA1 Message Date
Vitaliy Filippov 3a67d365e0 Move NBD netlink map&unmap to separate commands, add "netlink-revive" command
Test / test_splitbrain (push) Successful in 21s Details
Test / test_snapshot_chain (push) Successful in 2m33s Details
Test / test_snapshot_chain_ec (push) Successful in 2m53s Details
Test / test_rebalance_verify_imm (push) Successful in 2m56s Details
Test / test_rebalance_verify (push) Successful in 3m30s Details
Test / test_switch_primary (push) Successful in 34s Details
Test / test_write (push) Successful in 49s Details
Test / test_write_no_same (push) Successful in 14s Details
Test / test_write_xor (push) Successful in 1m3s Details
Test / test_rebalance_verify_ec (push) Successful in 3m54s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m31s Details
Test / test_heal_pg_size_2 (push) Successful in 4m4s Details
Test / test_heal_ec (push) Successful in 4m49s Details
Test / test_heal_csum_32k_dmj (push) Successful in 6m3s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m54s Details
Test / test_heal_csum_4k_dj (push) Has started running Details
Test / test_heal_csum_4k (push) Has started running Details
Test / test_enospc (push) Has been cancelled Details
Test / test_enospc_xor (push) Has been cancelled Details
Test / test_enospc_imm (push) Has been cancelled Details
Test / test_enospc_imm_xor (push) Has been cancelled Details
Test / test_heal_csum_32k (push) Has been cancelled Details
Test / test_scrub (push) Has been cancelled Details
Test / test_scrub_zero_osd_2 (push) Has been cancelled Details
Test / test_scrub_xor (push) Has been cancelled Details
Test / test_scrub_pg_size_3 (push) Has been cancelled Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled Details
Test / test_scrub_ec (push) Has been cancelled Details
Test / test_nfs (push) Has been cancelled Details
Test / test_heal_csum_4k_dmj (push) Has been cancelled Details
2024-04-08 15:36:36 +03:00
Vitaliy Filippov b7a3275af3 Make netlink optional 2024-04-08 01:51:28 +03:00
Vitaliy Filippov 64c5c4ca26 Fix code style 2024-04-08 01:35:03 +03:00
idelson 442a9d838d nbd-proxy: add configuration via netlink to support kinds of timeouts.
PR #58 - https://github.com/vitalif/vitastor/pull/58/commits

By MIND Software LLC

By submitting this pull request, I accept Vitastor CLA
2024-04-08 00:50:08 +03:00
11 changed files with 438 additions and 96 deletions

2
debian/control vendored
View File

@ -2,7 +2,7 @@ Source: vitastor
Section: admin
Priority: optional
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev
Standards-Version: 4.5.0
Homepage: https://vitastor.io/
Rules-Requires-Root: no

View File

@ -25,7 +25,7 @@ RUN apt-get update
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep fio
RUN apt-get --download-only source fio
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev
ADD . /root/vitastor
RUN set -e -x; \

View File

@ -10,7 +10,7 @@ RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel \
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libnl3-devel
RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
RUN rpm --nomd5 -i fio*.src.rpm
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo

View File

@ -17,6 +17,7 @@ BuildRequires: libisa-l-devel
BuildRequires: gf-complete-devel
BuildRequires: libibverbs-devel
BuildRequires: cmake3
BuildRequires: libnl3-devel
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}

View File

@ -11,7 +11,7 @@ RUN dnf -y install centos-release-advanced-virtualization epel-release dnf-plugi
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/$basearch/$avdir/!; s!^baseurl=.*Source/.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/Source/advanced-virtualization/!' /etc/yum.repos.d/CentOS-Advanced-Virtualization.repo
RUN yum -y install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm
RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel libibverbs-devel libarchive cmake
fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel libibverbs-devel libarchive cmake libnl3-devel
RUN dnf download --source fio
RUN rpm --nomd5 -i fio*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec

View File

@ -16,6 +16,7 @@ BuildRequires: libisa-l-devel
BuildRequires: gf-complete-devel
BuildRequires: libibverbs-devel
BuildRequires: cmake
BuildRequires: libnl3-devel
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}

View File

@ -8,7 +8,7 @@ WORKDIR /root
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
RUN dnf -y install epel-release dnf-plugins-core
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake libnl3-devel
RUN dnf download --source fio
RUN rpm --nomd5 -i fio*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec

View File

@ -16,6 +16,7 @@ BuildRequires: libisa-l-devel
BuildRequires: gf-complete-devel
BuildRequires: rdma-core-devel
BuildRequires: cmake
BuildRequires: libnl3-devel
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}

View File

@ -4,6 +4,9 @@ project(vitastor)
include(GNUInstallDirs)
include(CTest)
include(CheckIncludeFile)
find_package(PkgConfig)
set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
set(WITH_FIO true CACHE BOOL "Build FIO driver")
@ -43,6 +46,8 @@ macro(install_symlink filepath sympath)
install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")")
endmacro(install_symlink)
check_include_file("linux/nbd-netlink.h" HAVE_NBD_NETLINK_H)
find_package(PkgConfig)
pkg_check_modules(LIBURING REQUIRED liburing)
if (${WITH_QEMU})
@ -178,12 +183,15 @@ if (${WITH_FIO})
endif (${WITH_FIO})
# vitastor-nbd
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
add_executable(vitastor-nbd
nbd_proxy.cpp
)
target_link_libraries(vitastor-nbd
vitastor_client
)
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
# libvitastor_kv.so
add_library(vitastor_kv SHARED

View File

@ -2,21 +2,247 @@
// License: VNPL-1.1 (see README.md for details)
// Similar to qemu-nbd, but sets timeout and uses io_uring
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <linux/genetlink.h>
#include <linux/nbd.h>
#include <linux/netlink.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <sys/un.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include "epoll_manager.h"
#include "cluster_client.h"
#include "epoll_manager.h"
#include "str_util.h"
#ifdef HAVE_NBD_NETLINK_H
#include <netlink/attr.h>
#include <netlink/genl/ctrl.h>
#include <netlink/genl/genl.h>
#include <netlink/handlers.h>
#include <netlink/msg.h>
#include <netlink/netlink.h>
#include <netlink/socket.h>
#include <netlink/errno.h>
#include <linux/nbd-netlink.h>
#define fail(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
struct netlink_ctx
{
struct nl_sock *sk;
int driver_id;
};
static void netlink_sock_alloc(struct netlink_ctx *ctx)
{
struct nl_sock *sk;
int nl_driver_id;
sk = nl_socket_alloc();
if (!sk)
{
fail("Failed to alloc netlink socket\n");
}
if (genl_connect(sk))
{
nl_socket_free(sk);
fail("Couldn't connect to the generic netlink socket\n");
}
nl_driver_id = genl_ctrl_resolve(sk, "nbd");
if (nl_driver_id < 0)
{
nl_socket_free(sk);
fail("Couldn't resolve the nbd netlink family\n");
}
ctx->driver_id = nl_driver_id;
ctx->sk = sk;
}
static void netlink_sock_free(struct netlink_ctx *ctx)
{
free(ctx->sk);
ctx->sk = NULL;
}
static int netlink_status_cb(struct nl_msg *sk_msg, void *devnum)
{
struct nlmsghdr *nl_hdr;
struct genlmsghdr *gnl_hdr;
struct nlattr *msg_attr[NBD_ATTR_MAX + 1];
struct nlattr *attr_data;
int attr_len;
uint32_t* dev_num;
dev_num = (uint32_t*)devnum;
nl_hdr = nlmsg_hdr(sk_msg);
gnl_hdr = (struct genlmsghdr *)nlmsg_data(nl_hdr);
attr_data = genlmsg_attrdata(gnl_hdr, 0);
attr_len = genlmsg_attrlen(gnl_hdr, 0);
if (nla_parse(msg_attr, NBD_ATTR_MAX, attr_data, attr_len, NULL))
{
fail("Failed to parse netlink response\n");
}
if (!msg_attr[NBD_ATTR_INDEX])
{
fail("Got malformed netlink reponse\n");
}
*dev_num = nla_get_u32(msg_attr[NBD_ATTR_INDEX]);
return NL_OK;
}
static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint64_t size,
uint64_t blocksize, uint64_t flags, uint64_t cflags, uint64_t timeout, uint64_t conn_timeout,
const char *backend, bool reconfigure)
{
struct netlink_ctx ctx;
struct nlattr *msg_attr, *msg_opt_attr;
struct nl_msg *msg;
int i, err, sock;
uint32_t devnum = dev_num;
if (reconfigure && dev_num < 0)
{
return -NLE_INVAL;
}
netlink_sock_alloc(&ctx);
if (!reconfigure)
{
// A callback we set for a response we get on send
nl_socket_modify_cb(ctx.sk, NL_CB_VALID, NL_CB_CUSTOM, netlink_status_cb, &devnum);
}
msg = nlmsg_alloc();
if (!msg)
{
netlink_sock_free(&ctx);
fail("Failed to allocate netlink message\n");
}
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0,
reconfigure ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0);
if (dev_num >= 0)
{
NLA_PUT_U32(msg, NBD_ATTR_INDEX, (uint32_t)dev_num);
}
NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, blocksize);
NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
NLA_PUT_U64(msg, NBD_ATTR_CLIENT_FLAGS, cflags);
if (timeout)
{
NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, timeout);
}
if (conn_timeout)
{
NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, conn_timeout);
}
if (backend)
{
// Backend is an attribute useful for identication of the device
// Also it prevents reconfiguration of the device with a different backend string
NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, backend);
}
msg_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
if (!msg_attr)
{
goto nla_put_failure;
}
for (i = 0; i < sock_size; i++)
{
msg_opt_attr = nla_nest_start(msg, NBD_SOCK_ITEM);
if (!msg_opt_attr)
{
goto nla_put_failure;
}
sock = sockfd[i];
NLA_PUT_U32(msg, NBD_SOCK_FD, sock);
nla_nest_end(msg, msg_opt_attr);
}
nla_nest_end(msg, msg_attr);
if ((err = nl_send_sync(ctx.sk, msg)) != 0)
{
netlink_sock_free(&ctx);
return err;
}
netlink_sock_free(&ctx);
return devnum;
nla_put_failure:
nlmsg_free(msg);
netlink_sock_free(&ctx);
fail("Failed to create netlink message\n");
}
static void netlink_disconnect(uint32_t dev_num)
{
struct netlink_ctx ctx;
struct nl_msg *msg;
int err;
netlink_sock_alloc(&ctx);
msg = nlmsg_alloc();
if (!msg)
{
netlink_sock_free(&ctx);
fail("Failed to allocate netlink message\n");
}
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0, NBD_CMD_DISCONNECT, 0);
NLA_PUT_U32(msg, NBD_ATTR_INDEX, dev_num);
if ((err = nl_send_sync(ctx.sk, msg)) < 0)
{
netlink_sock_free(&ctx);
fail("Failed to send netlink message %d\n", err);
}
netlink_sock_free(&ctx);
return;
nla_put_failure:
nlmsg_free(msg);
netlink_sock_free(&ctx);
fail("Failed to create netlink message\n");
}
#undef fail
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0
@ -24,13 +250,75 @@
const char *exe_name = NULL;
const char *help_text =
"Vitastor NBD proxy " VERSION "\n"
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"
"\n"
"vitastor-nbd map [OPTIONS] [/dev/nbdX] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
" Map an NBD device using ioctl interface. Options:\n"
" --nbd_timeout 0\n"
" Timeout for I/O operations in seconds after exceeding which the kernel stops the device.\n"
" Before Linux 5.19, if nbd_timeout is empty, a dead NBD device can't be removed from\n"
" the system at all without rebooting.\n"
" --nbd_max_devices 64 --nbd_max_part 3\n"
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
" --logfile /path/to/log/file.txt\n"
" Write log messages to the specified file instead of dropping them (in background mode)\n"
" or printing them to the standard output (in foreground mode).\n"
" --foreground 1\n"
" Stay in foreground, do not daemonize.\n"
"\n"
"vitastor-nbd unmap /dev/nbdX\n"
" Unmap an ioctl-mapped NBD device.\n"
"\n"
"vitastor-nbd ls [--json]\n"
" List ioctl-mapped Vitastor NBD devices, optionally in JSON format.\n"
"\n"
#ifdef HAVE_NBD_NETLINK_H
"vitastor-nbd netlink-map [/dev/nbd<number>] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
" Map a device using netlink interface. Experimental mode. Differences from 'map':\n"
" 1) netlink-map can create new /dev/nbdXXX devices.\n"
" 2) netlink-mapped devices can be unmapped only using netlink-unmap command.\n"
" 3) netlink-mapped devices don't show up `ls` output (yet).\n"
" 4) dead netlink-mapped devices can be 'revived' (however, losing old I/O requests).\n"
" 5) netlink-map supports additional options:\n"
" --nbd_conn_timeout 0\n"
" Disconnect a dead device automatically after this number of seconds.\n"
#ifdef NBD_CFLAG_DESTROY_ON_DISCONNECT
" --nbd_destroy_on_disconnect 1\n"
" Delete the nbd device on disconnect.\n"
#endif
#ifdef NBD_CFLAG_DISCONNECT_ON_CLOSE
" --nbd_disconnect_on_close 1\n"
" Disconnect the nbd device on close by last opener.\n"
#endif
#ifdef NBD_FLAG_READ_ONLY
" --nbd_ro 1\n"
" Set device into read only mode.\n"
#endif
"\n"
"vitastor-nbd netlink-unmap /dev/nbdX\n"
" Unmap a device using netlink interface. Works with both netlink and ioctl mapped devices.\n"
"\n"
"vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
" Restart a dead NBD device without removing it. Supports the same options as netlink-map.\n"
"\n"
"Use vitastor-nbd --help <command> for command details or vitastor-nbd --help --all for all details.\n"
"\n"
"All usual Vitastor config options like --config_file <path_to_config> may also be specified in CLI.\n"
#endif
;
class nbd_proxy
{
protected:
std::string image_name;
uint64_t inode = 0;
uint64_t device_size = 0;
int nbd_timeout = 300;
uint64_t nbd_conn_timeout = 0;
int nbd_timeout = 0;
int nbd_max_devices = 64;
int nbd_max_part = 3;
inode_watch_t *watch = NULL;
@ -74,19 +362,19 @@ public:
{
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
{
help();
cfg["help"] = 1;
}
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") || i == narg-1 ? "1" : args[++i];
}
else if (pos == 0)
{
cfg["command"] = args[i];
pos++;
}
else if (pos == 1 && (cfg["command"] == "map" || cfg["command"] == "unmap"))
else if (pos == 1)
{
int n = 0;
if (sscanf(args[i], "/dev/nbd%d", &n) > 0)
@ -101,9 +389,13 @@ public:
void exec(json11::Json cfg)
{
if (cfg["help"].bool_value())
{
goto help;
}
if (cfg["command"] == "map")
{
start(cfg);
start(cfg, false, false);
}
else if (cfg["command"] == "unmap")
{
@ -112,8 +404,28 @@ public:
fprintf(stderr, "device name or number is missing\n");
exit(1);
}
unmap(cfg["dev_num"].uint64_value());
if (cfg["netlink"].is_null())
{
ioctl_unmap(cfg["dev_num"].uint64_value());
}
else
{
}
}
#ifdef HAVE_NBD_NETLINK_H
else if (cfg["command"] == "netlink-map")
{
start(cfg, true, false);
}
else if (cfg["command"] == "netlink-revive")
{
start(cfg, true, true);
}
else if (cfg["command"] == "netlink-unmap")
{
netlink_disconnect(cfg["dev_num"].uint64_value());
}
#endif
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
{
auto mapped = list_mapped();
@ -121,43 +433,13 @@ public:
}
else
{
help();
help:
print_help(help_text, "vitastor-nbd", cfg["command"].string_value(), cfg["all"].bool_value());
exit(0);
}
}
static void help()
{
printf(
"Vitastor NBD proxy\n"
"(c) Vitaliy Filippov, 2020-2021 (VNPL-1.1)\n\n"
"USAGE:\n"
" %s map [OPTIONS] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
" %s unmap /dev/nbd0\n"
" %s ls [--json]\n"
"OPTIONS:\n"
" All usual Vitastor config options like --config_file <path_to_config> plus NBD-specific:\n"
" --nbd_timeout 300\n"
" Timeout for I/O operations in seconds after exceeding which the kernel stops\n"
" the device. You can set it to 0 to disable the timeout, but beware that you\n"
" won't be able to stop the device at all if vitastor-nbd process dies.\n"
" --nbd_max_devices 64 --nbd_max_part 3\n"
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
" note that maximum allowed (nbds_max)*(1+max_part) is 256.\n"
" Note that nbd_timeout, nbd_max_devices and nbd_max_part options may also be specified\n"
" in /etc/vitastor/vitastor.conf or in other configuration file specified with --config_file.\n"
" --logfile /path/to/log/file.txt\n"
" Write log messages to the specified file instead of dropping them (in background mode)\n"
" or printing them to the standard output (in foreground mode).\n"
" --dev_num N\n"
" Use the specified device /dev/nbdN instead of automatic selection.\n"
" --foreground 1\n"
" Stay in foreground, do not daemonize.\n",
exe_name, exe_name, exe_name
);
exit(0);
}
void unmap(int dev_num)
void ioctl_unmap(int dev_num)
{
char path[64] = { 0 };
sprintf(path, "/dev/nbd%d", dev_num);
@ -176,7 +458,7 @@ public:
close(nbd);
}
void start(json11::Json cfg)
void start(json11::Json cfg, bool netlink, bool revive)
{
// Check options
if (cfg["image"].string_value() != "")
@ -206,19 +488,6 @@ public:
exit(1);
}
}
auto file_config = osd_messenger_t::read_config(cfg);
if (file_config["nbd_max_devices"].is_number() || file_config["nbd_max_devices"].is_string())
{
nbd_max_devices = file_config["nbd_max_devices"].uint64_value();
}
if (file_config["nbd_max_part"].is_number() || file_config["nbd_max_part"].is_string())
{
nbd_max_part = file_config["nbd_max_part"].uint64_value();
}
if (file_config["nbd_timeout"].is_number() || file_config["nbd_timeout"].is_string())
{
nbd_timeout = file_config["nbd_timeout"].uint64_value();
}
if (cfg["client_writeback_allowed"].is_null())
{
// NBD is always aware of fsync, so we allow write-back cache
@ -227,6 +496,7 @@ public:
obj["client_writeback_allowed"] = true;
cfg = obj;
}
// Create client
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE);
epmgr = new epoll_manager_t(ringloop);
@ -250,6 +520,25 @@ public:
exit(1);
}
}
// cli->config contains merged config
if (cli->config.find("nbd_max_devices") != cli->config.end())
{
nbd_max_devices = cli->config["nbd_max_devices"].uint64_value();
}
if (cli->config.find("nbd_max_part") != cli->config.end())
{
nbd_max_part = cli->config["nbd_max_part"].uint64_value();
}
if (cli->config.find("nbd_timeout") != cli->config.end())
{
nbd_timeout = cli->config["nbd_timeout"].uint64_value();
}
if (cli->config.find("nbd_conn_timeout") != cli->config.end())
{
nbd_conn_timeout = cli->config["nbd_conn_timeout"].uint64_value();
}
// Initialize NBD
int sockfd[2];
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd) < 0)
@ -257,46 +546,87 @@ public:
perror("socketpair");
exit(1);
}
fcntl(sockfd[0], F_SETFL, fcntl(sockfd[0], F_GETFL, 0) | O_NONBLOCK);
nbd_fd = sockfd[0];
load_module();
bool bg = cfg["foreground"].is_null();
if (!cfg["dev_num"].is_null())
if (netlink)
{
if (run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg) < 0)
#ifdef HAVE_NBD_NETLINK_H
int devnum = -1;
if (!cfg["dev_num"].is_null())
{
perror("run_nbd");
devnum = (int)cfg["dev_num"].uint64_value();
}
uint64_t flags = NBD_FLAG_SEND_FLUSH;
uint64_t cflags = 0;
#ifdef NBD_FLAG_READ_ONLY
if (!cfg["nbd_ro"].is_null())
flags |= NBD_FLAG_READ_ONLY;
#endif
#ifdef NBD_CFLAG_DESTROY_ON_DISCONNECT
if (!cfg["nbd_destroy_on_disconnect"].is_null())
cflags |= NBD_CFLAG_DESTROY_ON_DISCONNECT;
#endif
#ifdef NBD_CFLAG_DISCONNECT_ON_CLOSE
if (!cfg["nbd_disconnect_on_close"].is_null())
cflags |= NBD_CFLAG_DISCONNECT_ON_CLOSE;
#endif
int err = netlink_configure(sockfd + 1, 1, devnum, device_size, 4096, flags, cflags, nbd_timeout, nbd_conn_timeout, NULL, revive);
if (err < 0)
{
errno = (err == -NLE_BUSY ? EBUSY : EIO);
fprintf(stderr, "netlink_configure failed: %s (code %d)\n", nl_geterror(err), err);
exit(1);
}
close(sockfd[1]);
printf("/dev/nbd%d\n", err);
#else
fprintf(stderr, "netlink support is disabled in this build\n");
exit(1);
#endif
}
else
{
// Find an unused device
int i = 0;
while (true)
if (!cfg["dev_num"].is_null())
{
int r = run_nbd(sockfd, i, device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg);
if (r == 0)
if (run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg) < 0)
{
printf("/dev/nbd%d\n", i);
break;
}
else if (r == -1 && errno == ENOENT)
{
fprintf(stderr, "No free NBD devices found\n");
exit(1);
}
else if (r == -2 && errno == EBUSY)
{
i++;
}
else
{
printf("%d %d\n", r, errno);
perror("run_nbd");
exit(1);
}
}
else
{
// Find an unused device
int i = 0;
while (true)
{
int r = run_nbd(sockfd, i, device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg);
if (r == 0)
{
printf("/dev/nbd%d\n", i);
break;
}
else if (r == -1 && errno == ENOENT)
{
fprintf(stderr, "No free NBD devices found\n");
exit(1);
}
else if (r == -2 && errno == EBUSY)
{
i++;
}
else
{
printf("%d %d\n", r, errno);
perror("run_nbd");
exit(1);
}
}
}
}
if (cfg["logfile"].string_value() != "")
{
@ -368,9 +698,10 @@ public:
return;
}
int r;
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a big shit
// 64 also isn't too high, but the possible maximum is nbds_max=256 max_part=0 and it won't reserve
// any block device minor numbers for partitions
// NBD module creates ALL <nbd_max_devices> devices in /dev/ when loaded
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a bit too low.
// ...and ioctl setup method can't create additional devices.
// netlink setup method, however, CAN create additional devices.
if ((r = system(("modprobe nbd nbds_max="+std::to_string(nbd_max_devices)+" max_part="+std::to_string(nbd_max_part)).c_str())) != 0)
{
if (r < 0)

View File

@ -209,7 +209,7 @@ void print_help(const char *help_text, std::string exe_name, std::string cmd, bo
const char *var_end = var_start;
while (*var_end && !isspace(*var_end))
var_end++;
if ((std::string(var_start, var_end-var_start)+"|").find(cmd+"|") != std::string::npos)
if (("|"+std::string(var_start, var_end-var_start)+"|").find("|"+cmd+"|") != std::string::npos)
found = matched = true;
}
else if (*next_line && isspace(*next_line))