Compare commits
4 Commits
6366972fe8
...
3a67d365e0
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | 3a67d365e0 | |
Vitaliy Filippov | b7a3275af3 | |
Vitaliy Filippov | 64c5c4ca26 | |
idelson | 442a9d838d |
|
@ -2,7 +2,7 @@ Source: vitastor
|
|||
Section: admin
|
||||
Priority: optional
|
||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev
|
||||
Standards-Version: 4.5.0
|
||||
Homepage: https://vitastor.io/
|
||||
Rules-Requires-Root: no
|
||||
|
|
|
@ -25,7 +25,7 @@ RUN apt-get update
|
|||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN apt-get -y build-dep fio
|
||||
RUN apt-get --download-only source fio
|
||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev
|
||||
|
||||
ADD . /root/vitastor
|
||||
RUN set -e -x; \
|
||||
|
|
|
@ -10,7 +10,7 @@ RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
|||
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
|
||||
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
|
||||
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel \
|
||||
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel
|
||||
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libnl3-devel
|
||||
RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||
|
|
|
@ -17,6 +17,7 @@ BuildRequires: libisa-l-devel
|
|||
BuildRequires: gf-complete-devel
|
||||
BuildRequires: libibverbs-devel
|
||||
BuildRequires: cmake3
|
||||
BuildRequires: libnl3-devel
|
||||
Requires: vitastor-osd = %{version}-%{release}
|
||||
Requires: vitastor-mon = %{version}-%{release}
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
|
|
|
@ -11,7 +11,7 @@ RUN dnf -y install centos-release-advanced-virtualization epel-release dnf-plugi
|
|||
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/$basearch/$avdir/!; s!^baseurl=.*Source/.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/Source/advanced-virtualization/!' /etc/yum.repos.d/CentOS-Advanced-Virtualization.repo
|
||||
RUN yum -y install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm
|
||||
RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
|
||||
fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel libibverbs-devel libarchive cmake
|
||||
fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel libibverbs-devel libarchive cmake libnl3-devel
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec
|
||||
|
|
|
@ -16,6 +16,7 @@ BuildRequires: libisa-l-devel
|
|||
BuildRequires: gf-complete-devel
|
||||
BuildRequires: libibverbs-devel
|
||||
BuildRequires: cmake
|
||||
BuildRequires: libnl3-devel
|
||||
Requires: vitastor-osd = %{version}-%{release}
|
||||
Requires: vitastor-mon = %{version}-%{release}
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
|
|
|
@ -8,7 +8,7 @@ WORKDIR /root
|
|||
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
|
||||
RUN dnf -y install epel-release dnf-plugins-core
|
||||
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake libnl3-devel
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
|
||||
|
|
|
@ -16,6 +16,7 @@ BuildRequires: libisa-l-devel
|
|||
BuildRequires: gf-complete-devel
|
||||
BuildRequires: rdma-core-devel
|
||||
BuildRequires: cmake
|
||||
BuildRequires: libnl3-devel
|
||||
Requires: vitastor-osd = %{version}-%{release}
|
||||
Requires: vitastor-mon = %{version}-%{release}
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
|
|
|
@ -4,6 +4,9 @@ project(vitastor)
|
|||
|
||||
include(GNUInstallDirs)
|
||||
include(CTest)
|
||||
include(CheckIncludeFile)
|
||||
|
||||
find_package(PkgConfig)
|
||||
|
||||
set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
|
||||
set(WITH_FIO true CACHE BOOL "Build FIO driver")
|
||||
|
@ -43,6 +46,8 @@ macro(install_symlink filepath sympath)
|
|||
install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")")
|
||||
endmacro(install_symlink)
|
||||
|
||||
check_include_file("linux/nbd-netlink.h" HAVE_NBD_NETLINK_H)
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(LIBURING REQUIRED liburing)
|
||||
if (${WITH_QEMU})
|
||||
|
@ -178,12 +183,15 @@ if (${WITH_FIO})
|
|||
endif (${WITH_FIO})
|
||||
|
||||
# vitastor-nbd
|
||||
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
|
||||
add_executable(vitastor-nbd
|
||||
nbd_proxy.cpp
|
||||
)
|
||||
target_link_libraries(vitastor-nbd
|
||||
vitastor_client
|
||||
)
|
||||
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
|
||||
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
|
||||
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
|
||||
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
|
||||
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
|
||||
|
||||
# libvitastor_kv.so
|
||||
add_library(vitastor_kv SHARED
|
||||
|
|
|
@ -2,21 +2,247 @@
|
|||
// License: VNPL-1.1 (see README.md for details)
|
||||
// Similar to qemu-nbd, but sets timeout and uses io_uring
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <linux/genetlink.h>
|
||||
#include <linux/nbd.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <fcntl.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "epoll_manager.h"
|
||||
#include "cluster_client.h"
|
||||
#include "epoll_manager.h"
|
||||
#include "str_util.h"
|
||||
|
||||
#ifdef HAVE_NBD_NETLINK_H
|
||||
#include <netlink/attr.h>
|
||||
#include <netlink/genl/ctrl.h>
|
||||
#include <netlink/genl/genl.h>
|
||||
#include <netlink/handlers.h>
|
||||
#include <netlink/msg.h>
|
||||
#include <netlink/netlink.h>
|
||||
#include <netlink/socket.h>
|
||||
#include <netlink/errno.h>
|
||||
#include <linux/nbd-netlink.h>
|
||||
|
||||
#define fail(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
|
||||
|
||||
struct netlink_ctx
|
||||
{
|
||||
struct nl_sock *sk;
|
||||
int driver_id;
|
||||
};
|
||||
|
||||
static void netlink_sock_alloc(struct netlink_ctx *ctx)
|
||||
{
|
||||
struct nl_sock *sk;
|
||||
int nl_driver_id;
|
||||
|
||||
sk = nl_socket_alloc();
|
||||
if (!sk)
|
||||
{
|
||||
fail("Failed to alloc netlink socket\n");
|
||||
}
|
||||
|
||||
if (genl_connect(sk))
|
||||
{
|
||||
nl_socket_free(sk);
|
||||
fail("Couldn't connect to the generic netlink socket\n");
|
||||
}
|
||||
|
||||
nl_driver_id = genl_ctrl_resolve(sk, "nbd");
|
||||
if (nl_driver_id < 0)
|
||||
{
|
||||
nl_socket_free(sk);
|
||||
fail("Couldn't resolve the nbd netlink family\n");
|
||||
}
|
||||
|
||||
ctx->driver_id = nl_driver_id;
|
||||
ctx->sk = sk;
|
||||
}
|
||||
|
||||
static void netlink_sock_free(struct netlink_ctx *ctx)
|
||||
{
|
||||
free(ctx->sk);
|
||||
ctx->sk = NULL;
|
||||
}
|
||||
|
||||
static int netlink_status_cb(struct nl_msg *sk_msg, void *devnum)
|
||||
{
|
||||
struct nlmsghdr *nl_hdr;
|
||||
struct genlmsghdr *gnl_hdr;
|
||||
struct nlattr *msg_attr[NBD_ATTR_MAX + 1];
|
||||
struct nlattr *attr_data;
|
||||
int attr_len;
|
||||
uint32_t* dev_num;
|
||||
|
||||
dev_num = (uint32_t*)devnum;
|
||||
|
||||
nl_hdr = nlmsg_hdr(sk_msg);
|
||||
gnl_hdr = (struct genlmsghdr *)nlmsg_data(nl_hdr);
|
||||
attr_data = genlmsg_attrdata(gnl_hdr, 0);
|
||||
attr_len = genlmsg_attrlen(gnl_hdr, 0);
|
||||
|
||||
if (nla_parse(msg_attr, NBD_ATTR_MAX, attr_data, attr_len, NULL))
|
||||
{
|
||||
fail("Failed to parse netlink response\n");
|
||||
}
|
||||
|
||||
if (!msg_attr[NBD_ATTR_INDEX])
|
||||
{
|
||||
fail("Got malformed netlink reponse\n");
|
||||
}
|
||||
|
||||
*dev_num = nla_get_u32(msg_attr[NBD_ATTR_INDEX]);
|
||||
|
||||
return NL_OK;
|
||||
}
|
||||
|
||||
static int netlink_configure(const int *sockfd, int sock_size, int dev_num, uint64_t size,
|
||||
uint64_t blocksize, uint64_t flags, uint64_t cflags, uint64_t timeout, uint64_t conn_timeout,
|
||||
const char *backend, bool reconfigure)
|
||||
{
|
||||
struct netlink_ctx ctx;
|
||||
struct nlattr *msg_attr, *msg_opt_attr;
|
||||
struct nl_msg *msg;
|
||||
int i, err, sock;
|
||||
uint32_t devnum = dev_num;
|
||||
|
||||
if (reconfigure && dev_num < 0)
|
||||
{
|
||||
return -NLE_INVAL;
|
||||
}
|
||||
|
||||
netlink_sock_alloc(&ctx);
|
||||
|
||||
if (!reconfigure)
|
||||
{
|
||||
// A callback we set for a response we get on send
|
||||
nl_socket_modify_cb(ctx.sk, NL_CB_VALID, NL_CB_CUSTOM, netlink_status_cb, &devnum);
|
||||
}
|
||||
|
||||
msg = nlmsg_alloc();
|
||||
if (!msg)
|
||||
{
|
||||
netlink_sock_free(&ctx);
|
||||
fail("Failed to allocate netlink message\n");
|
||||
}
|
||||
|
||||
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0,
|
||||
reconfigure ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0);
|
||||
|
||||
if (dev_num >= 0)
|
||||
{
|
||||
NLA_PUT_U32(msg, NBD_ATTR_INDEX, (uint32_t)dev_num);
|
||||
}
|
||||
|
||||
NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
|
||||
NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, blocksize);
|
||||
NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
|
||||
NLA_PUT_U64(msg, NBD_ATTR_CLIENT_FLAGS, cflags);
|
||||
|
||||
if (timeout)
|
||||
{
|
||||
NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, timeout);
|
||||
}
|
||||
|
||||
if (conn_timeout)
|
||||
{
|
||||
NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, conn_timeout);
|
||||
}
|
||||
|
||||
if (backend)
|
||||
{
|
||||
// Backend is an attribute useful for identication of the device
|
||||
// Also it prevents reconfiguration of the device with a different backend string
|
||||
NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, backend);
|
||||
}
|
||||
|
||||
msg_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
|
||||
if (!msg_attr)
|
||||
{
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
for (i = 0; i < sock_size; i++)
|
||||
{
|
||||
msg_opt_attr = nla_nest_start(msg, NBD_SOCK_ITEM);
|
||||
if (!msg_opt_attr)
|
||||
{
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
sock = sockfd[i];
|
||||
NLA_PUT_U32(msg, NBD_SOCK_FD, sock);
|
||||
|
||||
nla_nest_end(msg, msg_opt_attr);
|
||||
}
|
||||
|
||||
nla_nest_end(msg, msg_attr);
|
||||
|
||||
if ((err = nl_send_sync(ctx.sk, msg)) != 0)
|
||||
{
|
||||
netlink_sock_free(&ctx);
|
||||
return err;
|
||||
}
|
||||
|
||||
netlink_sock_free(&ctx);
|
||||
|
||||
return devnum;
|
||||
|
||||
nla_put_failure:
|
||||
nlmsg_free(msg);
|
||||
netlink_sock_free(&ctx);
|
||||
fail("Failed to create netlink message\n");
|
||||
}
|
||||
|
||||
static void netlink_disconnect(uint32_t dev_num)
|
||||
{
|
||||
struct netlink_ctx ctx;
|
||||
struct nl_msg *msg;
|
||||
int err;
|
||||
|
||||
netlink_sock_alloc(&ctx);
|
||||
|
||||
msg = nlmsg_alloc();
|
||||
if (!msg)
|
||||
{
|
||||
netlink_sock_free(&ctx);
|
||||
fail("Failed to allocate netlink message\n");
|
||||
}
|
||||
|
||||
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, ctx.driver_id, 0, 0, NBD_CMD_DISCONNECT, 0);
|
||||
NLA_PUT_U32(msg, NBD_ATTR_INDEX, dev_num);
|
||||
|
||||
if ((err = nl_send_sync(ctx.sk, msg)) < 0)
|
||||
{
|
||||
netlink_sock_free(&ctx);
|
||||
fail("Failed to send netlink message %d\n", err);
|
||||
}
|
||||
|
||||
netlink_sock_free(&ctx);
|
||||
|
||||
return;
|
||||
|
||||
nla_put_failure:
|
||||
nlmsg_free(msg);
|
||||
netlink_sock_free(&ctx);
|
||||
fail("Failed to create netlink message\n");
|
||||
}
|
||||
|
||||
#undef fail
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef MSG_ZEROCOPY
|
||||
#define MSG_ZEROCOPY 0
|
||||
|
@ -24,13 +250,75 @@
|
|||
|
||||
const char *exe_name = NULL;
|
||||
|
||||
const char *help_text =
|
||||
"Vitastor NBD proxy " VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
"\n"
|
||||
"vitastor-nbd map [OPTIONS] [/dev/nbdX] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||
" Map an NBD device using ioctl interface. Options:\n"
|
||||
" --nbd_timeout 0\n"
|
||||
" Timeout for I/O operations in seconds after exceeding which the kernel stops the device.\n"
|
||||
" Before Linux 5.19, if nbd_timeout is empty, a dead NBD device can't be removed from\n"
|
||||
" the system at all without rebooting.\n"
|
||||
" --nbd_max_devices 64 --nbd_max_part 3\n"
|
||||
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
|
||||
" --logfile /path/to/log/file.txt\n"
|
||||
" Write log messages to the specified file instead of dropping them (in background mode)\n"
|
||||
" or printing them to the standard output (in foreground mode).\n"
|
||||
" --foreground 1\n"
|
||||
" Stay in foreground, do not daemonize.\n"
|
||||
"\n"
|
||||
"vitastor-nbd unmap /dev/nbdX\n"
|
||||
" Unmap an ioctl-mapped NBD device.\n"
|
||||
"\n"
|
||||
"vitastor-nbd ls [--json]\n"
|
||||
" List ioctl-mapped Vitastor NBD devices, optionally in JSON format.\n"
|
||||
"\n"
|
||||
#ifdef HAVE_NBD_NETLINK_H
|
||||
"vitastor-nbd netlink-map [/dev/nbd<number>] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||
" Map a device using netlink interface. Experimental mode. Differences from 'map':\n"
|
||||
" 1) netlink-map can create new /dev/nbdXXX devices.\n"
|
||||
" 2) netlink-mapped devices can be unmapped only using netlink-unmap command.\n"
|
||||
" 3) netlink-mapped devices don't show up `ls` output (yet).\n"
|
||||
" 4) dead netlink-mapped devices can be 'revived' (however, losing old I/O requests).\n"
|
||||
" 5) netlink-map supports additional options:\n"
|
||||
" --nbd_conn_timeout 0\n"
|
||||
" Disconnect a dead device automatically after this number of seconds.\n"
|
||||
#ifdef NBD_CFLAG_DESTROY_ON_DISCONNECT
|
||||
" --nbd_destroy_on_disconnect 1\n"
|
||||
" Delete the nbd device on disconnect.\n"
|
||||
#endif
|
||||
#ifdef NBD_CFLAG_DISCONNECT_ON_CLOSE
|
||||
" --nbd_disconnect_on_close 1\n"
|
||||
" Disconnect the nbd device on close by last opener.\n"
|
||||
#endif
|
||||
#ifdef NBD_FLAG_READ_ONLY
|
||||
" --nbd_ro 1\n"
|
||||
" Set device into read only mode.\n"
|
||||
#endif
|
||||
"\n"
|
||||
"vitastor-nbd netlink-unmap /dev/nbdX\n"
|
||||
" Unmap a device using netlink interface. Works with both netlink and ioctl mapped devices.\n"
|
||||
"\n"
|
||||
"vitastor-nbd netlink-revive /dev/nbdX (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||
" Restart a dead NBD device without removing it. Supports the same options as netlink-map.\n"
|
||||
"\n"
|
||||
"Use vitastor-nbd --help <command> for command details or vitastor-nbd --help --all for all details.\n"
|
||||
"\n"
|
||||
"All usual Vitastor config options like --config_file <path_to_config> may also be specified in CLI.\n"
|
||||
#endif
|
||||
;
|
||||
|
||||
class nbd_proxy
|
||||
{
|
||||
protected:
|
||||
std::string image_name;
|
||||
uint64_t inode = 0;
|
||||
uint64_t device_size = 0;
|
||||
int nbd_timeout = 300;
|
||||
uint64_t nbd_conn_timeout = 0;
|
||||
int nbd_timeout = 0;
|
||||
int nbd_max_devices = 64;
|
||||
int nbd_max_part = 3;
|
||||
inode_watch_t *watch = NULL;
|
||||
|
@ -74,19 +362,19 @@ public:
|
|||
{
|
||||
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
||||
{
|
||||
help();
|
||||
cfg["help"] = 1;
|
||||
}
|
||||
else if (args[i][0] == '-' && args[i][1] == '-')
|
||||
{
|
||||
const char *opt = args[i]+2;
|
||||
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
|
||||
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") || i == narg-1 ? "1" : args[++i];
|
||||
}
|
||||
else if (pos == 0)
|
||||
{
|
||||
cfg["command"] = args[i];
|
||||
pos++;
|
||||
}
|
||||
else if (pos == 1 && (cfg["command"] == "map" || cfg["command"] == "unmap"))
|
||||
else if (pos == 1)
|
||||
{
|
||||
int n = 0;
|
||||
if (sscanf(args[i], "/dev/nbd%d", &n) > 0)
|
||||
|
@ -101,9 +389,13 @@ public:
|
|||
|
||||
void exec(json11::Json cfg)
|
||||
{
|
||||
if (cfg["help"].bool_value())
|
||||
{
|
||||
goto help;
|
||||
}
|
||||
if (cfg["command"] == "map")
|
||||
{
|
||||
start(cfg);
|
||||
start(cfg, false, false);
|
||||
}
|
||||
else if (cfg["command"] == "unmap")
|
||||
{
|
||||
|
@ -112,8 +404,28 @@ public:
|
|||
fprintf(stderr, "device name or number is missing\n");
|
||||
exit(1);
|
||||
}
|
||||
unmap(cfg["dev_num"].uint64_value());
|
||||
if (cfg["netlink"].is_null())
|
||||
{
|
||||
ioctl_unmap(cfg["dev_num"].uint64_value());
|
||||
}
|
||||
else
|
||||
{
|
||||
}
|
||||
}
|
||||
#ifdef HAVE_NBD_NETLINK_H
|
||||
else if (cfg["command"] == "netlink-map")
|
||||
{
|
||||
start(cfg, true, false);
|
||||
}
|
||||
else if (cfg["command"] == "netlink-revive")
|
||||
{
|
||||
start(cfg, true, true);
|
||||
}
|
||||
else if (cfg["command"] == "netlink-unmap")
|
||||
{
|
||||
netlink_disconnect(cfg["dev_num"].uint64_value());
|
||||
}
|
||||
#endif
|
||||
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
|
||||
{
|
||||
auto mapped = list_mapped();
|
||||
|
@ -121,43 +433,13 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
help();
|
||||
help:
|
||||
print_help(help_text, "vitastor-nbd", cfg["command"].string_value(), cfg["all"].bool_value());
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void help()
|
||||
{
|
||||
printf(
|
||||
"Vitastor NBD proxy\n"
|
||||
"(c) Vitaliy Filippov, 2020-2021 (VNPL-1.1)\n\n"
|
||||
"USAGE:\n"
|
||||
" %s map [OPTIONS] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||
" %s unmap /dev/nbd0\n"
|
||||
" %s ls [--json]\n"
|
||||
"OPTIONS:\n"
|
||||
" All usual Vitastor config options like --config_file <path_to_config> plus NBD-specific:\n"
|
||||
" --nbd_timeout 300\n"
|
||||
" Timeout for I/O operations in seconds after exceeding which the kernel stops\n"
|
||||
" the device. You can set it to 0 to disable the timeout, but beware that you\n"
|
||||
" won't be able to stop the device at all if vitastor-nbd process dies.\n"
|
||||
" --nbd_max_devices 64 --nbd_max_part 3\n"
|
||||
" Options for the \"nbd\" kernel module when modprobing it (nbds_max and max_part).\n"
|
||||
" note that maximum allowed (nbds_max)*(1+max_part) is 256.\n"
|
||||
" Note that nbd_timeout, nbd_max_devices and nbd_max_part options may also be specified\n"
|
||||
" in /etc/vitastor/vitastor.conf or in other configuration file specified with --config_file.\n"
|
||||
" --logfile /path/to/log/file.txt\n"
|
||||
" Write log messages to the specified file instead of dropping them (in background mode)\n"
|
||||
" or printing them to the standard output (in foreground mode).\n"
|
||||
" --dev_num N\n"
|
||||
" Use the specified device /dev/nbdN instead of automatic selection.\n"
|
||||
" --foreground 1\n"
|
||||
" Stay in foreground, do not daemonize.\n",
|
||||
exe_name, exe_name, exe_name
|
||||
);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void unmap(int dev_num)
|
||||
void ioctl_unmap(int dev_num)
|
||||
{
|
||||
char path[64] = { 0 };
|
||||
sprintf(path, "/dev/nbd%d", dev_num);
|
||||
|
@ -176,7 +458,7 @@ public:
|
|||
close(nbd);
|
||||
}
|
||||
|
||||
void start(json11::Json cfg)
|
||||
void start(json11::Json cfg, bool netlink, bool revive)
|
||||
{
|
||||
// Check options
|
||||
if (cfg["image"].string_value() != "")
|
||||
|
@ -206,19 +488,6 @@ public:
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
auto file_config = osd_messenger_t::read_config(cfg);
|
||||
if (file_config["nbd_max_devices"].is_number() || file_config["nbd_max_devices"].is_string())
|
||||
{
|
||||
nbd_max_devices = file_config["nbd_max_devices"].uint64_value();
|
||||
}
|
||||
if (file_config["nbd_max_part"].is_number() || file_config["nbd_max_part"].is_string())
|
||||
{
|
||||
nbd_max_part = file_config["nbd_max_part"].uint64_value();
|
||||
}
|
||||
if (file_config["nbd_timeout"].is_number() || file_config["nbd_timeout"].is_string())
|
||||
{
|
||||
nbd_timeout = file_config["nbd_timeout"].uint64_value();
|
||||
}
|
||||
if (cfg["client_writeback_allowed"].is_null())
|
||||
{
|
||||
// NBD is always aware of fsync, so we allow write-back cache
|
||||
|
@ -227,6 +496,7 @@ public:
|
|||
obj["client_writeback_allowed"] = true;
|
||||
cfg = obj;
|
||||
}
|
||||
|
||||
// Create client
|
||||
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE);
|
||||
epmgr = new epoll_manager_t(ringloop);
|
||||
|
@ -250,6 +520,25 @@ public:
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// cli->config contains merged config
|
||||
if (cli->config.find("nbd_max_devices") != cli->config.end())
|
||||
{
|
||||
nbd_max_devices = cli->config["nbd_max_devices"].uint64_value();
|
||||
}
|
||||
if (cli->config.find("nbd_max_part") != cli->config.end())
|
||||
{
|
||||
nbd_max_part = cli->config["nbd_max_part"].uint64_value();
|
||||
}
|
||||
if (cli->config.find("nbd_timeout") != cli->config.end())
|
||||
{
|
||||
nbd_timeout = cli->config["nbd_timeout"].uint64_value();
|
||||
}
|
||||
if (cli->config.find("nbd_conn_timeout") != cli->config.end())
|
||||
{
|
||||
nbd_conn_timeout = cli->config["nbd_conn_timeout"].uint64_value();
|
||||
}
|
||||
|
||||
// Initialize NBD
|
||||
int sockfd[2];
|
||||
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockfd) < 0)
|
||||
|
@ -257,46 +546,87 @@ public:
|
|||
perror("socketpair");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fcntl(sockfd[0], F_SETFL, fcntl(sockfd[0], F_GETFL, 0) | O_NONBLOCK);
|
||||
nbd_fd = sockfd[0];
|
||||
load_module();
|
||||
bool bg = cfg["foreground"].is_null();
|
||||
if (!cfg["dev_num"].is_null())
|
||||
|
||||
if (netlink)
|
||||
{
|
||||
if (run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg) < 0)
|
||||
#ifdef HAVE_NBD_NETLINK_H
|
||||
int devnum = -1;
|
||||
if (!cfg["dev_num"].is_null())
|
||||
{
|
||||
perror("run_nbd");
|
||||
devnum = (int)cfg["dev_num"].uint64_value();
|
||||
}
|
||||
uint64_t flags = NBD_FLAG_SEND_FLUSH;
|
||||
uint64_t cflags = 0;
|
||||
#ifdef NBD_FLAG_READ_ONLY
|
||||
if (!cfg["nbd_ro"].is_null())
|
||||
flags |= NBD_FLAG_READ_ONLY;
|
||||
#endif
|
||||
#ifdef NBD_CFLAG_DESTROY_ON_DISCONNECT
|
||||
if (!cfg["nbd_destroy_on_disconnect"].is_null())
|
||||
cflags |= NBD_CFLAG_DESTROY_ON_DISCONNECT;
|
||||
#endif
|
||||
#ifdef NBD_CFLAG_DISCONNECT_ON_CLOSE
|
||||
if (!cfg["nbd_disconnect_on_close"].is_null())
|
||||
cflags |= NBD_CFLAG_DISCONNECT_ON_CLOSE;
|
||||
#endif
|
||||
int err = netlink_configure(sockfd + 1, 1, devnum, device_size, 4096, flags, cflags, nbd_timeout, nbd_conn_timeout, NULL, revive);
|
||||
if (err < 0)
|
||||
{
|
||||
errno = (err == -NLE_BUSY ? EBUSY : EIO);
|
||||
fprintf(stderr, "netlink_configure failed: %s (code %d)\n", nl_geterror(err), err);
|
||||
exit(1);
|
||||
}
|
||||
close(sockfd[1]);
|
||||
printf("/dev/nbd%d\n", err);
|
||||
#else
|
||||
fprintf(stderr, "netlink support is disabled in this build\n");
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// Find an unused device
|
||||
int i = 0;
|
||||
while (true)
|
||||
if (!cfg["dev_num"].is_null())
|
||||
{
|
||||
int r = run_nbd(sockfd, i, device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg);
|
||||
if (r == 0)
|
||||
if (run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg) < 0)
|
||||
{
|
||||
printf("/dev/nbd%d\n", i);
|
||||
break;
|
||||
}
|
||||
else if (r == -1 && errno == ENOENT)
|
||||
{
|
||||
fprintf(stderr, "No free NBD devices found\n");
|
||||
exit(1);
|
||||
}
|
||||
else if (r == -2 && errno == EBUSY)
|
||||
{
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%d %d\n", r, errno);
|
||||
perror("run_nbd");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Find an unused device
|
||||
int i = 0;
|
||||
while (true)
|
||||
{
|
||||
int r = run_nbd(sockfd, i, device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg);
|
||||
if (r == 0)
|
||||
{
|
||||
printf("/dev/nbd%d\n", i);
|
||||
break;
|
||||
}
|
||||
else if (r == -1 && errno == ENOENT)
|
||||
{
|
||||
fprintf(stderr, "No free NBD devices found\n");
|
||||
exit(1);
|
||||
}
|
||||
else if (r == -2 && errno == EBUSY)
|
||||
{
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%d %d\n", r, errno);
|
||||
perror("run_nbd");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cfg["logfile"].string_value() != "")
|
||||
{
|
||||
|
@ -368,9 +698,10 @@ public:
|
|||
return;
|
||||
}
|
||||
int r;
|
||||
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a big shit
|
||||
// 64 also isn't too high, but the possible maximum is nbds_max=256 max_part=0 and it won't reserve
|
||||
// any block device minor numbers for partitions
|
||||
// NBD module creates ALL <nbd_max_devices> devices in /dev/ when loaded
|
||||
// Kernel built-in default is 16 devices with up to 16 partitions per device which is a bit too low.
|
||||
// ...and ioctl setup method can't create additional devices.
|
||||
// netlink setup method, however, CAN create additional devices.
|
||||
if ((r = system(("modprobe nbd nbds_max="+std::to_string(nbd_max_devices)+" max_part="+std::to_string(nbd_max_part)).c_str())) != 0)
|
||||
{
|
||||
if (r < 0)
|
||||
|
|
|
@ -209,7 +209,7 @@ void print_help(const char *help_text, std::string exe_name, std::string cmd, bo
|
|||
const char *var_end = var_start;
|
||||
while (*var_end && !isspace(*var_end))
|
||||
var_end++;
|
||||
if ((std::string(var_start, var_end-var_start)+"|").find(cmd+"|") != std::string::npos)
|
||||
if (("|"+std::string(var_start, var_end-var_start)+"|").find("|"+cmd+"|") != std::string::npos)
|
||||
found = matched = true;
|
||||
}
|
||||
else if (*next_line && isspace(*next_line))
|
||||
|
|
Loading…
Reference in New Issue