Compare commits

..

18 Commits

Author SHA1 Message Date
1f6c4c79d6 vmsplice+splice experiment in stub_osd to test it too 2021-11-22 01:20:12 +03:00
4936c42132 Splice via io_uring - bad result too
40% CPU according to perf is lost inside do_splice() -> unix_stream_sendpage()
without io_uring and in various exc_page_fault() with io_uring
2021-11-22 00:13:27 +03:00
6c3248a36c Experiment: vmsplice+splice "zero-copy" read in NBD 2021-11-22 00:12:39 +03:00
a863013cb2 Add a patch for qemu 6.1 and replace _ with - in qemu options 2021-11-21 16:16:46 +03:00
660c3f7b0d Change default RDMA settings to 128x 129K buffers
129K to leave extra space for the header

The problem with 8x 1M buffers is that the following happens with,
for example, 2 OSDs and 4M T1Q1 write:
- Server posts 8 receives
- Client posts 8 sends
- WRs are processed by the RDMA stack, but the OSD doesn't have the time
  to handle them and doesn't refill buffers
- Client posts 1 more send
- RNR retransmission happens and performance drops to zero

Overall it seems that RDMA support should be reworked to use real 'RDMA'
operations i.e. operations writing into remote memory. This has an
additional advantage of avoiding a copy at the receive side of the OSD.
2021-11-21 12:05:52 +03:00
f0ebfae3b8 Fix vitastor-cli alloc-osd, use vitastor-cli in make-osd.sh 2021-11-21 00:01:03 +03:00
eb7ad2c114 Fix empty size syntax, use C version of simple-offsets in tests 2021-11-20 23:51:26 +03:00
cd21ff0b6a Rewrite simple-offsets.js in C/C++ 2021-11-19 02:39:56 +03:00
d3903f039c Implement alloc-osd (allocate a new OSD number) command 2021-11-19 02:39:37 +03:00
66fe1a469b Additionally balance parity chunks over OSDs using round-robin when generating initial distribution 2021-11-16 21:02:39 +03:00
24409bd4c4 Oops # 2. Fix vitastor-cli create broken due to json11 :D 2021-11-16 12:52:35 +03:00
c5029961ea Oops. Fix vitastor-cli ls 2021-11-16 12:39:41 +03:00
1ca1143d4a Add fio_version and qemu_version variable example files back 2021-11-15 17:21:01 +03:00
920345f7b6 Release 0.6.8
- Build separate packages for OSD, monitor, client, C header, fio and QEMU drivers
  instead of one package which included everything
2021-11-15 00:49:21 +03:00
75b47a6298 Generate pkg-config file 2021-11-15 00:49:21 +03:00
6e446653ae Include README 2021-11-15 00:00:02 +03:00
e51edf2542 Split into multiple rpm packages 2021-11-14 23:48:50 +03:00
ce170af91f Split into multiple Debian packages 2021-11-14 23:31:30 +03:00
62 changed files with 1177 additions and 309 deletions

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
project(vitastor)
set(VERSION "0.6.7")
set(VERSION "0.6.8")
add_subdirectory(src)

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.6.7
VERSION ?= v0.6.8
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.6.7
image: vitalif/vitastor-csi:v0.6.8
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.6.7
image: vitalif/vitastor-csi:v0.6.8
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.6.7"
vitastorCSIDriverVersion = "0.6.8"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (0.6.7-1) unstable; urgency=medium
vitastor (0.6.8-1) unstable; urgency=medium
* RDMA support
* Bugfixes

40
debian/control vendored
View File

@@ -9,9 +9,47 @@ Rules-Requires-Root: no
Package: vitastor
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, fio (= ${dep:fio}), qemu (= ${dep:qemu}), nodejs (>= 10), node-sprintf-js, node-ws (>= 7), libjerasure2, lp-solve
Depends: vitastor-osd, vitastor-mon, vitastor-client, vitastor-client-dev, vitastor-fio, vitastor-qemu
Description: Vitastor, a fast software-defined clustered block storage
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
architecturally similar to Ceph which means strong consistency, primary-replication,
symmetric clustering and automatic data distribution over any number of drives of any
size with configurable redundancy (replication or erasure codes/XOR).
Package: vitastor-osd
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version})
Description: Vitastor, a fast software-defined clustered block storage - object storage daemon
Vitastor object storage daemon, i.e. server program that stores data.
Package: vitastor-mon
Architecture: amd64
Depends: ${misc:Depends}, nodejs (>= 10), node-sprintf-js, node-ws (>= 7), lp-solve
Description: Vitastor, a fast software-defined clustered block storage - monitor
Vitastor monitor, i.e. server program responsible for watching cluster state and
scheduling cluster-level operations.
Package: vitastor-client
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}
Description: Vitastor, a fast software-defined clustered block storage - client
Vitastor client library and command-line interface.
Package: vitastor-client-dev
Section: devel
Architecture: amd64
Depends: ${misc:Depends}, vitastor-client (= ${binary:Version})
Description: Vitastor, a fast software-defined clustered block storage - development files
Vitastor library headers for development.
Package: vitastor-fio
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version}), fio (= ${dep:fio})
Description: Vitastor, a fast software-defined clustered block storage - fio drivers
Vitastor fio drivers for benchmarking.
Package: vitastor-qemu
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version}), qemu (= ${dep:qemu})
Description: Vitastor, a fast software-defined clustered block storage - QEMU driver
Vitastor QEMU block device driver.

1
debian/fio_version vendored Normal file
View File

@@ -0,0 +1 @@
dep:fio=3.16-1

3
debian/install vendored
View File

@@ -1,3 +1,4 @@
VNPL-1.1.txt usr/share/doc/vitastor
GPL-2.0.txt usr/share/doc/vitastor
mon usr/lib/vitastor
README.md usr/share/doc/vitastor
README-ru.md usr/share/doc/vitastor

View File

@@ -7,11 +7,11 @@ ARG REL=
WORKDIR /root
RUN if [ "$REL" = "buster" ]; then \
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list; \
RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo 'Pin: release a=buster-backports' >> /etc/apt/preferences; \
echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
@@ -27,15 +27,20 @@ RUN apt-get -y build-dep fio
RUN apt-get --download-only source qemu
RUN apt-get --download-only source fio
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch /root/vitastor/patches/
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch patches/qemu-6.1-vitastor.patch /root/vitastor/patches/
RUN set -e; \
mkdir -p /root/packages/qemu-$REL; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \
dpkg-source -x /root/qemu*.dsc; \
if [ -d /root/packages/qemu-$REL/qemu-5.0 ]; then \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch /root/packages/qemu-$REL/qemu-5.0/debian/patches; \
echo qemu-5.0-vitastor.patch >> /root/packages/qemu-$REL/qemu-5.0/debian/patches/series; \
if ls -d /root/packages/qemu-$REL/qemu-5.0*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-5.0*); \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch $D/debian/patches; \
echo qemu-5.0-vitastor.patch >> $D/debian/patches/series; \
elif ls /root/packages/qemu-$REL/qemu-6.1*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-6.1*); \
cp /root/vitastor/patches/qemu-6.1-vitastor.patch $D/debian/patches; \
echo qemu-6.1-vitastor.patch >> $D/debian/patches/series; \
else \
cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \

1
debian/qemu_version vendored Normal file
View File

@@ -0,0 +1 @@
dep:qemu=1:5.2+dfsg-10+vitastor1

3
debian/rules vendored
View File

@@ -5,5 +5,6 @@ export DH_VERBOSE = 1
dh $@
override_dh_installdeb:
cat debian/substvars >> debian/vitastor.substvars
cat debian/fio_version >> debian/vitastor-fio.substvars
cat debian/qemu_version >> debian/vitastor-qemu.substvars
dh_installdeb

2
debian/substvars vendored
View File

@@ -1,2 +0,0 @@
dep:fio=3.16-1
dep:qemu=1:5.1+dfsg-4+vitastor1

2
debian/vitastor-client-dev.install vendored Normal file
View File

@@ -0,0 +1,2 @@
usr/include
usr/lib/*/pkgconfig

5
debian/vitastor-client.install vendored Normal file
View File

@@ -0,0 +1,5 @@
usr/bin/vita
usr/bin/vitastor-cli
usr/bin/vitastor-rm
usr/bin/vitastor-nbd
usr/lib/*/libvitastor*.so*

1
debian/vitastor-fio.install vendored Normal file
View File

@@ -0,0 +1 @@
usr/lib/*/libfio*.so*

1
debian/vitastor-mon.install vendored Normal file
View File

@@ -0,0 +1 @@
mon usr/lib/vitastor

3
debian/vitastor-osd.install vendored Normal file
View File

@@ -0,0 +1,3 @@
usr/bin/vitastor-osd
usr/bin/vitastor-dump-journal
mon/make-osd.sh /usr/lib/vitastor

1
debian/vitastor-qemu.install vendored Normal file
View File

@@ -0,0 +1 @@
usr/lib/*/qemu/*

View File

@@ -42,10 +42,10 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.6.7; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.7/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.6.7/fio; \
cd vitastor-0.6.7; \
cp -r /root/vitastor vitastor-0.6.8; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.8/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.6.8/fio; \
cd vitastor-0.6.8; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
QEMU=$(head -n1 qemu/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
sh copy-qemu-includes.sh; \
@@ -58,11 +58,11 @@ RUN set -e -x; \
echo qemu-fio-headers.patch >> debian/patches/series; \
rm -rf a b; \
rm -rf /root/packages/qemu-$REL/qemu*/; \
echo "dep:fio=$FIO" > debian/substvars; \
echo "dep:qemu=$QEMU" >> debian/substvars; \
echo "dep:fio=$FIO" > debian/fio_version; \
echo "dep:qemu=$QEMU" > debian/qemu_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.7.orig.tar.xz vitastor-0.6.7; \
cd vitastor-0.6.7; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.8.orig.tar.xz vitastor-0.6.8; \
cd vitastor-0.6.8; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

2
json11

Submodule json11 updated: 34781ed7ee...55363fc265

View File

@@ -50,7 +50,7 @@ async function lp_solve(text)
return { score, vars };
}
async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1 })
async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, round_robin = false })
{
if (!pg_count || !osd_tree)
{
@@ -92,7 +92,7 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
console.log(lp);
throw new Error('Problem is infeasible or unbounded - is it a bug?');
}
const int_pgs = make_int_pgs(lp_result.vars, pg_count);
const int_pgs = make_int_pgs(lp_result.vars, pg_count, round_robin);
const eff = pg_list_space_efficiency(int_pgs, all_weights, pg_minsize, parity_space);
const res = {
score: lp_result.score,
@@ -115,7 +115,7 @@ function shuffle(array)
}
}
function make_int_pgs(weights, pg_count)
function make_int_pgs(weights, pg_count, round_robin)
{
const total_weight = Object.values(weights).reduce((a, c) => Number(a) + Number(c), 0);
let int_pgs = [];
@@ -123,10 +123,15 @@ function make_int_pgs(weights, pg_count)
let weight_left = total_weight;
for (const pg_name in weights)
{
let cur_pg = pg_name.substr(3).split('_');
let n = Math.round(weights[pg_name] / weight_left * pg_left);
for (let i = 0; i < n; i++)
{
int_pgs.push(pg_name.substr(3).split('_'));
int_pgs.push([ ...cur_pg ]);
if (round_robin)
{
cur_pg.push(cur_pg.shift());
}
}
weight_left -= weights[pg_name];
pg_left -= n;

View File

@@ -17,23 +17,14 @@ ETCD_MON=$(echo $ETCD_HOSTS | perl -pe 's/:2380/:2379/g; s/etcd\d*=//g;')
D=`dirname $0`
# Create OSDs on all passed devices
OSD_NUM=1
for DEV in $*; do
# Ugly :) -> node.js rework pending
while true; do
ST=$(etcdctl --endpoints="$ETCD_MON" get --print-value-only /vitastor/osd/stats/$OSD_NUM)
if [ "$ST" = "" ]; then
break
fi
OSD_NUM=$((OSD_NUM+1))
done
etcdctl --endpoints="$ETCD_MON" put /vitastor/osd/stats/$OSD_NUM '{}'
OSD_NUM=$(vitastor-cli alloc-osd)
echo Creating OSD $OSD_NUM on $DEV
OPT=`node $D/simple-offsets.js --device $DEV --format options | tr '\n' ' '`
META=`echo $OPT | grep -Po '(?<=data_offset )\d+'`
OPT=$(vitastor-cli simple-offsets --format options $DEV | tr '\n' ' ')
META=$(vitastor-cli simple-offsets --format json $DEV | jq .data_offset)
dd if=/dev/zero of=$DEV bs=1048576 count=$(((META+1048575)/1048576)) oflag=direct
cat >/etc/systemd/system/vitastor-osd$OSD_NUM.service <<EOF

View File

@@ -1022,6 +1022,7 @@ class Mon
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
max_combinations: pool_cfg.max_osd_combinations,
round_robin: pool_cfg.scheme != 'replicated',
};
let optimize_result;
if (old_pg_count > 0)

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '0.6.7'
VERSION = '0.6.8'
LOG = logging.getLogger(__name__)
@@ -489,7 +489,7 @@ class VitastorDriver(driver.CloneableImageVD,
# FIXME use etcd_address in qemu driver
kk = 'etcd_host'
if v:
args += ':'+kk+'='+v.replace(':', '\\:')
args += ':'+kk.replace('_', '-')+'='+v.replace(':', '\\:')
return args
def delete_volume(self, volume):

View File

@@ -245,9 +245,9 @@ index cbf0aa4..096700d 100644
+
+ if (virJSONValueObjectCreate(&ret,
+ "s:driver", "vitastor",
+ "S:etcd_host", etcd,
+ "S:etcd_prefix", src->relPath,
+ "S:config_path", src->configFile,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->relPath,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ goto cleanup;
@@ -293,7 +293,7 @@ index 822d5f8..e375cef 100644
+ virBufferStrcat(&buf, "vitastor:image=", src->path, NULL);
+
+ if (src->nhosts > 0) {
+ virBufferAddLit(&buf, ":etcd_host=");
+ virBufferAddLit(&buf, ":etcd-host=");
+ for (i = 0; i < src->nhosts; i++) {
+ if (i)
+ virBufferAddLit(&buf, ",");
@@ -311,10 +311,10 @@ index 822d5f8..e375cef 100644
+ }
+
+ if (src->configFile)
+ virBufferEscape(&buf, '\\', ":", ":config_path=%s", src->configFile);
+ virBufferEscape(&buf, '\\', ":", ":config-path=%s", src->configFile);
+
+ if (src->relPath)
+ virBufferEscape(&buf, '\\', ":", ":etcd_prefix=%s", src->relPath);
+ virBufferEscape(&buf, '\\', ":", ":etcd-prefix=%s", src->relPath);
+
+ ret = virBufferContentAndReset(&buf);
+ break;
@@ -438,16 +438,16 @@ index bd4b027..b323cd6 100644
+ if (STRPREFIX(p, "image=")) {
+ if (VIR_STRDUP(src->path, p + strlen("image=")) < 0)
+ return -1;
+ } else if (STRPREFIX(p, "etcd_prefix=")) {
+ if (VIR_STRDUP(src->relPath, p + strlen("etcd_prefix=")) < 0)
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ if (VIR_STRDUP(src->relPath, p + strlen("etcd-prefix=")) < 0)
+ return -1;
+ } else if (STRPREFIX(p, "config_file=")) {
+ if (VIR_STRDUP(src->configFile, p + strlen("config_file=")) < 0)
+ } else if (STRPREFIX(p, "config-path=")) {
+ if (VIR_STRDUP(src->configFile, p + strlen("config-path=")) < 0)
+ return -1;
+ } else if (STRPREFIX(p, "etcd_host=")) {
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd_host=");
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
@@ -507,8 +507,8 @@ index bd4b027..b323cd6 100644
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config_path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd_prefix");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValuePtr servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;

View File

@@ -244,9 +244,9 @@ index f9c6da2..922dde5 100644
+ }
+
+ if (virJSONValueObjectCreate(&ret,
+ "S:etcd_host", etcd,
+ "S:etcd_prefix", src->query,
+ "S:config_path", src->configFile,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
@@ -311,7 +311,7 @@ index 6f970a3..10b39ca 100644
+ virBufferStrcat(&buf, "vitastor:image=", src->path, NULL);
+
+ if (src->nhosts > 0) {
+ virBufferAddLit(&buf, ":etcd_host=");
+ virBufferAddLit(&buf, ":etcd-host=");
+ for (i = 0; i < src->nhosts; i++) {
+ if (i)
+ virBufferAddLit(&buf, ",");
@@ -329,10 +329,10 @@ index 6f970a3..10b39ca 100644
+ }
+
+ if (src->configFile)
+ virBufferEscape(&buf, '\\', ":", ":config_path=%s", src->configFile);
+ virBufferEscape(&buf, '\\', ":", ":config-path=%s", src->configFile);
+
+ if (src->query)
+ virBufferEscape(&buf, '\\', ":", ":etcd_prefix=%s", src->query);
+ virBufferEscape(&buf, '\\', ":", ":etcd-prefix=%s", src->query);
+
+ ret = virBufferContentAndReset(&buf);
+ break;
@@ -462,14 +462,14 @@ index 0d3c2af..36e3afc 100644
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd_prefix=")) {
+ src->query = g_strdup(p + strlen("etcd_prefix="));
+ } else if (STRPREFIX(p, "config_file=")) {
+ src->configFile = g_strdup(p + strlen("config_file="));
+ } else if (STRPREFIX(p, "etcd_host=")) {
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd_host=");
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
@@ -526,8 +526,8 @@ index 0d3c2af..36e3afc 100644
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config_path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd_prefix");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValuePtr servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;

View File

@@ -276,9 +276,9 @@ index 6627d04..c33f428 100644
+ }
+
+ if (virJSONValueObjectCreate(&ret,
+ "S:etcd_host", etcd,
+ "S:etcd_prefix", src->query,
+ "S:config_path", src->configFile,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
@@ -343,7 +343,7 @@ index ea51369..8258632 100644
+ virBufferStrcat(&buf, "vitastor:image=", src->path, NULL);
+
+ if (src->nhosts > 0) {
+ virBufferAddLit(&buf, ":etcd_host=");
+ virBufferAddLit(&buf, ":etcd-host=");
+ for (i = 0; i < src->nhosts; i++) {
+ if (i)
+ virBufferAddLit(&buf, ",");
@@ -361,10 +361,10 @@ index ea51369..8258632 100644
+ }
+
+ if (src->configFile)
+ virBufferEscape(&buf, '\\', ":", ":config_path=%s", src->configFile);
+ virBufferEscape(&buf, '\\', ":", ":config-path=%s", src->configFile);
+
+ if (src->query)
+ virBufferEscape(&buf, '\\', ":", ":etcd_prefix=%s", src->query);
+ virBufferEscape(&buf, '\\', ":", ":etcd-prefix=%s", src->query);
+
+ ret = virBufferContentAndReset(&buf);
+ break;
@@ -474,14 +474,14 @@ index e48ae72..d7a9b72 100644
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd_prefix=")) {
+ src->query = g_strdup(p + strlen("etcd_prefix="));
+ } else if (STRPREFIX(p, "config_file=")) {
+ src->configFile = g_strdup(p + strlen("config_file="));
+ } else if (STRPREFIX(p, "etcd_host=")) {
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd_host=");
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
@@ -538,8 +538,8 @@ index e48ae72..d7a9b72 100644
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config_path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd_prefix");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;

View File

@@ -124,7 +124,7 @@ index 391231c527..34dc60dcdd 100644
+ if kk == 'etcd_address':
+ # FIXME use etcd_address in qemu driver
+ kk = 'etcd_host'
+ path += ":"+kk+"="+connection_info['data'][k].replace(':', '\\:')
+ path += ":"+kk.replace('_', '-')+"="+connection_info['data'][k].replace(':', '\\:')
else:
path = 'unknown'
raise exception.DiskNotFound(location='unknown')

View File

@@ -23,18 +23,18 @@ Index: qemu-3.1+dfsg/qapi/block-core.json
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_prefix': 'str' } }
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:

View File

@@ -23,18 +23,18 @@ Index: qemu/qapi/block-core.json
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_prefix': 'str' } }
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:

View File

@@ -23,18 +23,18 @@ Index: qemu/qapi/block-core.json
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_prefix': 'str' } }
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:

View File

@@ -23,18 +23,18 @@ Index: qemu-5.1+dfsg/qapi/block-core.json
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_prefix': 'str' } }
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:

View File

@@ -0,0 +1,88 @@
Index: qemu-6.1+dfsg/qapi/block-core.json
===================================================================
--- qemu-6.1+dfsg.orig/qapi/block-core.json
+++ qemu-6.1+dfsg/qapi/block-core.json
@@ -2838,7 +2838,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3763,6 +3763,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4134,6 +4156,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4523,6 +4546,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -4718,6 +4752,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: qemu-6.1+dfsg/scripts/modules/module_block.py
===================================================================
--- qemu-6.1+dfsg.orig/scripts/modules/module_block.py
+++ qemu-6.1+dfsg/scripts/modules/module_block.py
@@ -86,6 +86,7 @@ if __name__ == '__main__':
output_file = sys.argv[1]
with open(output_file, 'w') as fheader:
print_top(fheader)
+ add_module(fheader, "vitastor", "vitastor", "vitastor")
for filename in sys.argv[2:]:
if os.path.isfile(filename):

View File

@@ -48,4 +48,4 @@ FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Ve
QEMU=`rpm -qi qemu qemu-kvm | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
perl -i -pe 's/(Requires:\s*qemu(?:-kvm)?)([^\n]+)?/$1 = '$QEMU'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.6.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.7$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.6.8/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.8$(rpm --eval '%dist').tar.gz *

View File

@@ -38,7 +38,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.6.7.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.6.8.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.6.7
Version: 0.6.8
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.6.7.el7.tar.gz
Source0: vitastor-0.6.8.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -16,13 +16,12 @@ BuildRequires: jerasure-devel
BuildRequires: gf-complete-devel
BuildRequires: libibverbs-devel
BuildRequires: cmake
Requires: fio = 3.7-1.el7
Requires: qemu-kvm = 2.0.0-1.el7.6
Requires: rh-nodejs12
Requires: rh-nodejs12-npm
Requires: liburing >= 0.6
Requires: libJerasure2
Requires: lpsolve
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}
Requires: vitastor-client-devel = %{version}-%{release}
Requires: vitastor-fio = %{version}-%{release}
Requires: vitastor-qemu = %{version}-%{release}
%description
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
@@ -31,6 +30,70 @@ symmetric clustering and automatic data distribution over any number of drives o
size with configurable redundancy (replication or erasure codes/XOR).
%package -n vitastor-osd
Summary: Vitastor - OSD
Requires: libJerasure2
Requires: liburing >= 0.6
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-osd
Vitastor object storage daemon, i.e. server program that stores data.
%package -n vitastor-mon
Summary: Vitastor - monitor
Requires: rh-nodejs12
Requires: rh-nodejs12-npm
Requires: lpsolve
%description -n vitastor-mon
Vitastor monitor, i.e. server program responsible for watching cluster state and
scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
Requires: liburing >= 0.6
%description -n vitastor-client
Vitastor client library and command-line interface.
%package -n vitastor-client-devel
Summary: Vitastor - development files
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-client-devel
Vitastor library headers for development.
%package -n vitastor-fio
Summary: Vitastor - fio drivers
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: fio = 3.7-1.el7
%description -n vitastor-fio
Vitastor fio drivers for benchmarking.
%package -n vitastor-qemu
Summary: Vitastor - QEMU driver
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: qemu-kvm = 2.0.0-1.el7.6
%description -n vitastor-qemu
Vitastor QEMU block device driver.
%prep
%setup -q
@@ -49,25 +112,46 @@ cd mon
npm install
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor/mon
cp mon/make-osd.sh %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
%files
%doc
%_bindir/vitastor-dump-journal
%_bindir/vitastor-nbd
%doc GPL-2.0.txt VNPL-1.1.txt README.md README-ru.md
%files -n vitastor-osd
%_bindir/vitastor-osd
%_bindir/vitastor-dump-journal
/usr/lib/vitastor/make-osd.sh
%files -n vitastor-mon
/usr/lib/vitastor/mon
%files -n vitastor-client
%_bindir/vitastor-nbd
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/qemu-kvm/block-vitastor.so
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%files -n vitastor-client-devel
%_includedir/vitastor_c.h
%_libdir/pkgconfig
%files -n vitastor-fio
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so
%_libdir/libfio_vitastor_sec.so
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%_includedir/vitastor_c.h
/usr/lib/vitastor
%files -n vitastor-qemu
%_libdir/qemu-kvm/block-vitastor.so
%changelog

View File

@@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.6.7.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.6.8.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.6.7
Version: 0.6.8
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.6.7.el8.tar.gz
Source0: vitastor-0.6.8.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -15,12 +15,12 @@ BuildRequires: jerasure-devel
BuildRequires: gf-complete-devel
BuildRequires: libibverbs-devel
BuildRequires: cmake
Requires: fio = 3.7-3.el8
Requires: qemu-kvm = 4.2.0-29.el8.6
Requires: nodejs >= 10
Requires: liburing >= 0.6
Requires: libJerasure2
Requires: lpsolve
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}
Requires: vitastor-client-devel = %{version}-%{release}
Requires: vitastor-fio = %{version}-%{release}
Requires: vitastor-qemu = %{version}-%{release}
%description
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
@@ -29,6 +29,69 @@ symmetric clustering and automatic data distribution over any number of drives o
size with configurable redundancy (replication or erasure codes/XOR).
%package -n vitastor-osd
Summary: Vitastor - OSD
Requires: libJerasure2
Requires: liburing >= 0.6
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-osd
Vitastor object storage daemon, i.e. server program that stores data.
%package -n vitastor-mon
Summary: Vitastor - monitor
Requires: nodejs >= 10
Requires: lpsolve
%description -n vitastor-mon
Vitastor monitor, i.e. server program responsible for watching cluster state and
scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
Requires: liburing >= 0.6
%description -n vitastor-client
Vitastor client library and command-line interface.
%package -n vitastor-client-devel
Summary: Vitastor - development files
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-client-devel
Vitastor library headers for development.
%package -n vitastor-fio
Summary: Vitastor - fio drivers
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: fio = 3.7-3.el8
%description -n vitastor-fio
Vitastor fio drivers for benchmarking.
%package -n vitastor-qemu
Summary: Vitastor - QEMU driver
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: qemu-kvm = 4.2.0-29.el8.6
%description -n vitastor-qemu
Vitastor QEMU block device driver.
%prep
%setup -q
@@ -46,25 +109,46 @@ cd mon
npm install
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp mon/make-osd.sh %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
%files
%doc
%_bindir/vitastor-dump-journal
%_bindir/vitastor-nbd
%doc GPL-2.0.txt VNPL-1.1.txt README.md README-ru.md
%files -n vitastor-osd
%_bindir/vitastor-osd
%_bindir/vitastor-dump-journal
/usr/lib/vitastor/make-osd.sh
%files -n vitastor-mon
/usr/lib/vitastor/mon
%files -n vitastor-client
%_bindir/vitastor-nbd
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/qemu-kvm/block-vitastor.so
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%files -n vitastor-client-devel
%_includedir/vitastor_c.h
%_libdir/pkgconfig
%files -n vitastor-fio
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so
%_libdir/libfio_vitastor_sec.so
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%_includedir/vitastor_c.h
/usr/lib/vitastor
%files -n vitastor-qemu
%_libdir/qemu-kvm/block-vitastor.so
%changelog

View File

@@ -15,7 +15,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="0.6.7")
add_definitions(-DVERSION="0.6.8")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -145,7 +145,7 @@ endif (${WITH_FIO})
# vitastor-nbd
add_executable(vitastor-nbd
nbd_proxy.cpp
nbd_proxy.cpp mmap_manager.cpp
)
target_link_libraries(vitastor-nbd
vitastor_client
@@ -153,11 +153,13 @@ target_link_libraries(vitastor-nbd
# vitastor-cli
add_executable(vitastor-cli
cli.cpp cli_ls.cpp cli_create.cpp cli_modify.cpp cli_flatten.cpp cli_merge.cpp cli_rm.cpp cli_snap_rm.cpp
cli.cpp cli_alloc_osd.cpp cli_simple_offsets.cpp
cli_ls.cpp cli_create.cpp cli_modify.cpp cli_flatten.cpp cli_merge.cpp cli_rm.cpp cli_snap_rm.cpp
)
target_link_libraries(vitastor-cli
vitastor_client
)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
# vitastor-dump-journal
add_executable(vitastor-dump-journal
@@ -186,7 +188,7 @@ endif (${WITH_QEMU})
### Test stubs
# stub_osd, stub_bench, osd_test
add_executable(stub_osd stub_osd.cpp rw_blocking.cpp)
add_executable(stub_osd stub_osd.cpp rw_blocking.cpp mmap_manager.cpp)
target_link_libraries(stub_osd tcmalloc_minimal)
add_executable(stub_bench stub_bench.cpp rw_blocking.cpp)
target_link_libraries(stub_bench tcmalloc_minimal)
@@ -248,6 +250,7 @@ install(
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/vitastor.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
if (${WITH_FIO})
install(TARGETS fio_vitastor fio_vitastor_blk fio_vitastor_sec LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif (${WITH_FIO})

View File

@@ -130,6 +130,18 @@ void cli_tool_t::help()
" <to> must be a child of <from> and <target> may be one of the layers between\n"
" <from> and <to>, including <from> and <to>.\n"
"\n"
"%s alloc-osd\n"
" Allocate a new OSD number and reserve it by creating empty /osd/stats/<n> key.\n"
"%s simple-offsets <device>\n"
" Calculate offsets for simple&stupid (no superblock) OSD deployment. Options:\n"
" --object_size 128k Set blockstore block size\n"
" --bitmap_granularity 4k Set bitmap granularity\n"
" --journal_size 16M Set journal size\n"
" --device_block_size 4k Set device block size\n"
" --journal_offset 0 Set journal offset\n"
" --device_size 0 Set device size\n"
" --format text Result format: json, options, env, or text\n"
"\n"
"GLOBAL OPTIONS:\n"
" --etcd_address <etcd_address>\n"
" --iodepth N Send N operations in parallel to each OSD when possible (default 32)\n"
@@ -139,7 +151,7 @@ void cli_tool_t::help()
" --no-color Disable colored output\n"
" --json JSON output\n"
,
exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name
exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name
);
exit(0);
}
@@ -268,6 +280,16 @@ void cli_tool_t::run(json11::Json cfg)
// Remove multiple snapshots and rebase their children
action_cb = start_snap_rm(cfg);
}
else if (cmd[0] == "alloc-osd")
{
// Allocate a new OSD number
action_cb = start_alloc_osd(cfg);
}
else if (cmd[0] == "simple-offsets")
{
// Calculate offsets for simple & stupid OSD deployment without superblock
action_cb = simple_offsets(cfg);
}
else
{
fprintf(stderr, "unknown command: %s\n", cmd[0].string_value().c_str());

View File

@@ -50,11 +50,16 @@ public:
friend struct snap_flattener_t;
friend struct snap_remover_t;
std::function<bool(void)> start_ls(json11::Json cfg);
std::function<bool(void)> start_create(json11::Json cfg);
std::function<bool(void)> start_modify(json11::Json cfg);
std::function<bool(void)> start_ls(json11::Json);
std::function<bool(void)> start_create(json11::Json);
std::function<bool(void)> start_modify(json11::Json);
std::function<bool(void)> start_rm(json11::Json);
std::function<bool(void)> start_merge(json11::Json);
std::function<bool(void)> start_flatten(json11::Json);
std::function<bool(void)> start_snap_rm(json11::Json);
std::function<bool(void)> start_alloc_osd(json11::Json cfg, uint64_t *out = NULL);
std::function<bool(void)> simple_offsets(json11::Json cfg);
};
std::string format_size(uint64_t size);
uint64_t parse_size(std::string size_str);

141
src/cli_alloc_osd.cpp Normal file
View File

@@ -0,0 +1,141 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <ctype.h>
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
#include <algorithm>
// Safely allocate an OSD number
struct alloc_osd_t
{
cli_tool_t *parent;
json11::Json result;
uint64_t new_id = 1;
int state = 0;
bool is_done()
{
return state == 100;
}
void loop()
{
if (state == 1)
goto resume_1;
do
{
etcd_txn(json11::Json::object {
{ "compare", json11::Json::array {
json11::Json::object {
{ "target", "VERSION" },
{ "version", 0 },
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/osd/stats/"+std::to_string(new_id)
) },
},
} },
{ "success", json11::Json::array {
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/osd/stats/"+std::to_string(new_id)
) },
{ "value", base64_encode("{}") },
} },
},
} },
{ "failure", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats/") },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats0") },
{ "keys_only", true },
} },
},
} },
});
resume_1:
state = 1;
if (parent->waiting > 0)
return;
if (!result["succeeded"].bool_value())
{
std::vector<osd_num_t> used;
for (auto kv: result["responses"][0]["response_range"]["kvs"].array_items())
{
std::string key = base64_decode(kv["key"].string_value());
osd_num_t cur_osd;
char null_byte = 0;
sscanf(key.c_str() + parent->cli->st_cli.etcd_prefix.length(), "/osd/stats/%lu%c", &cur_osd, &null_byte);
if (!cur_osd || null_byte != 0)
{
fprintf(stderr, "Invalid key in etcd: %s\n", key.c_str());
continue;
}
used.push_back(cur_osd);
}
std::sort(used.begin(), used.end());
if (used[used.size()-1] == used.size())
{
new_id = used.size()+1;
}
else
{
int s = 0, e = used.size();
while (e > s+1)
{
int c = (s+e)/2;
if (used[c] == c+1)
s = c;
else
e = c;
}
new_id = used[e-1]+1;
}
}
} while (!result["succeeded"].bool_value());
state = 100;
}
void etcd_txn(json11::Json txn)
{
parent->waiting++;
parent->cli->st_cli.etcd_txn(txn, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json res)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error reading from etcd: %s\n", err.c_str());
exit(1);
}
this->result = res;
parent->ringloop->wakeup();
});
}
};
std::function<bool(void)> cli_tool_t::start_alloc_osd(json11::Json cfg, uint64_t *out)
{
json11::Json::array cmd = cfg["command"].array_items();
auto alloc_osd = new alloc_osd_t();
alloc_osd->parent = this;
return [alloc_osd, &out]()
{
alloc_osd->loop();
if (alloc_osd->is_done())
{
if (out)
*out = alloc_osd->new_id;
else if (alloc_osd->new_id)
printf("%lu\n", alloc_osd->new_id);
delete alloc_osd;
return true;
}
return false;
};
}

View File

@@ -94,6 +94,11 @@ struct image_creator_t
goto resume_2;
else if (state == 3)
goto resume_3;
if (!size)
{
fprintf(stderr, "Image size is missing\n");
exit(1);
}
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name)
@@ -432,6 +437,31 @@ resume_3:
}
};
uint64_t parse_size(std::string size_str)
{
uint64_t mul = 1;
char type_char = tolower(size_str[size_str.length()-1]);
if (type_char == 'k' || type_char == 'm' || type_char == 'g' || type_char == 't')
{
if (type_char == 'k')
mul = 1l<<10;
else if (type_char == 'm')
mul = 1l<<20;
else if (type_char == 'g')
mul = 1l<<30;
else /*if (type_char == 't')*/
mul = 1l<<40;
size_str = size_str.substr(0, size_str.length()-1);
}
uint64_t size = json11::Json(size_str).uint64_value() * mul;
if (size == 0 && size_str != "0" && (size_str != "" || mul != 1))
{
fprintf(stderr, "Invalid syntax for size: %s\n", size_str.c_str());
exit(1);
}
return size;
}
std::function<bool(void)> cli_tool_t::start_create(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
@@ -458,33 +488,12 @@ std::function<bool(void)> cli_tool_t::start_create(json11::Json cfg)
image_creator->new_parent = cfg["parent"].string_value();
if (cfg["size"].string_value() != "")
{
std::string size_str = cfg["size"].string_value();
uint64_t mul = 1;
char type_char = tolower(size_str[size_str.length()-1]);
if (type_char == 'k' || type_char == 'm' || type_char == 'g' || type_char == 't')
image_creator->size = parse_size(cfg["size"].string_value());
if (image_creator->size % 4096)
{
if (type_char == 'k')
mul = 1l<<10;
else if (type_char == 'm')
mul = 1l<<20;
else if (type_char == 'g')
mul = 1l<<30;
else /*if (type_char == 't')*/
mul = 1l<<40;
size_str = size_str.substr(0, size_str.length()-1);
}
uint64_t size = json11::Json(size_str).uint64_value() * mul;
if (size == 0)
{
fprintf(stderr, "Invalid syntax for size: %s\n", cfg["size"].string_value().c_str());
fprintf(stderr, "Size should be a multiple of 4096\n");
exit(1);
}
if (size % 4096)
{
fprintf(stderr, "Image size should be a multiple of 4096\n");
exit(1);
}
image_creator->size = size;
if (image_creator->new_snap != "")
{
fprintf(stderr, "--size can't be specified for snapshots\n");

View File

@@ -505,7 +505,7 @@ std::function<bool(void)> cli_tool_t::start_ls(json11::Json cfg)
lister->sort_field = cfg["sort"].string_value();
lister->reverse = cfg["reverse"].bool_value();
lister->max_count = cfg["count"].uint64_value();
for (int i = 0; i < cmd.size(); i++)
for (int i = 1; i < cmd.size(); i++)
{
lister->only_names.insert(cmd[i].string_value());
}

143
src/cli_simple_offsets.cpp Normal file
View File

@@ -0,0 +1,143 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <fcntl.h>
#include <sys/ioctl.h>
#include <ctype.h>
#include <unistd.h>
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
// Calculate offsets for a block device and print OSD command line parameters
std::function<bool(void)> cli_tool_t::simple_offsets(json11::Json cfg)
{
std::string device = cfg["command"][1].string_value();
uint64_t object_size = parse_size(cfg["object_size"].string_value());
uint64_t bitmap_granularity = parse_size(cfg["bitmap_granularity"].string_value());
uint64_t journal_size = parse_size(cfg["journal_size"].string_value());
uint64_t device_block_size = parse_size(cfg["device_block_size"].string_value());
uint64_t journal_offset = parse_size(cfg["journal_offset"].string_value());
uint64_t device_size = parse_size(cfg["device_size"].string_value());
std::string format = cfg["format"].string_value();
if (json_output)
format = "json";
if (!object_size)
object_size = DEFAULT_BLOCK_SIZE;
if (!bitmap_granularity)
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
if (!journal_size)
journal_size = 16*1024*1024;
if (!device_block_size)
device_block_size = 4096;
uint64_t orig_device_size = device_size;
if (!device_size)
{
struct stat st;
if (stat(device.c_str(), &st) < 0)
{
fprintf(stderr, "Can't stat %s: %s\n", device.c_str(), strerror(errno));
exit(1);
}
if (S_ISBLK(st.st_mode))
{
int fd = open(device.c_str(), O_DIRECT|O_RDONLY);
if (fd < 0 || ioctl(fd, BLKGETSIZE64, &device_size) < 0)
{
fprintf(stderr, "Failed to get device size for %s: %s\n", device.c_str(), strerror(errno));
exit(1);
}
close(fd);
if (st.st_blksize < device_block_size)
{
fprintf(
stderr, "Warning: %s reports %lu byte blocks, but we use %lu."
" Set --device_block_size=%lu if you're sure it works well with %lu byte blocks.\n",
device.c_str(), st.st_blksize, device_block_size, st.st_blksize, st.st_blksize
);
}
}
else if (S_ISREG(st.st_mode))
{
device_size = st.st_size;
}
else
{
fprintf(stderr, "%s is neither a block device nor a regular file\n", device.c_str());
exit(1);
}
}
if (!device_size)
{
fprintf(stderr, "Failed to get device size for %s\n", device.c_str());
exit(1);
}
if (device_block_size < 512 || device_block_size > 1048576 ||
device_block_size & (device_block_size-1) != 0)
{
fprintf(stderr, "Invalid device block size specified: %lu\n", device_block_size);
exit(1);
}
if (object_size < device_block_size || object_size > MAX_BLOCK_SIZE ||
object_size & (object_size-1) != 0)
{
fprintf(stderr, "Invalid object size specified: %lu\n", object_size);
exit(1);
}
if (bitmap_granularity < device_block_size || bitmap_granularity > object_size ||
bitmap_granularity & (bitmap_granularity-1) != 0)
{
fprintf(stderr, "Invalid bitmap granularity specified: %lu\n", bitmap_granularity);
exit(1);
}
journal_offset = ((journal_offset+device_block_size-1)/device_block_size)*device_block_size;
uint64_t meta_offset = journal_offset + ((journal_size+device_block_size-1)/device_block_size)*device_block_size;
uint64_t entries_per_block = (device_block_size / (24 + 2*object_size/bitmap_granularity/8));
uint64_t object_count = ((device_size-meta_offset)/object_size);
uint64_t meta_size = (1 + (object_count+entries_per_block-1)/entries_per_block) * device_block_size;
uint64_t data_offset = meta_offset + meta_size;
if (format == "json")
{
// JSON
printf("%s\n", json11::Json(json11::Json::object {
{ "meta_block_size", device_block_size },
{ "journal_block_size", device_block_size },
{ "data_size", device_size-data_offset },
{ "data_device", device },
{ "journal_offset", journal_offset },
{ "meta_offset", meta_offset },
{ "data_offset", data_offset },
}).dump().c_str());
}
else if (format == "env")
{
// Env
printf(
"meta_block_size=%lu\njournal_block_size=%lu\ndata_size=%lu\n"
"data_device=%s\njournal_offset=%lu\nmeta_offset=%lu\ndata_offset=%lu\n",
device_block_size, device_block_size, device_size-data_offset,
device.c_str(), journal_offset, meta_offset, data_offset
);
}
else
{
// OSD command-line options
if (format != "options")
{
fprintf(stderr, "Metadata size: %s\nOptions for the OSD:\n", format_size(meta_size).c_str());
}
if (device_block_size != 4096)
{
printf("--meta_block_size %lu\n--journal_block_size %lu\n", device_block_size, device_block_size);
}
if (orig_device_size)
{
printf("--data_size %lu\n", device_size-data_offset);
}
printf(
"--data_device %s\n--journal_offset %lu\n--meta_offset %lu\n--data_offset %lu\n",
device.c_str(), journal_offset, meta_offset, data_offset
);
}
return NULL;
}

View File

@@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <fcntl.h>
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"

View File

@@ -147,13 +147,13 @@ void osd_messenger_t::parse_config(const json11::Json & config)
this->rdma_max_sge = 128;
this->rdma_max_send = config["rdma_max_send"].uint64_value();
if (!this->rdma_max_send)
this->rdma_max_send = 32;
this->rdma_max_send = 1;
this->rdma_max_recv = config["rdma_max_recv"].uint64_value();
if (!this->rdma_max_recv)
this->rdma_max_recv = 8;
this->rdma_max_recv = 128;
this->rdma_max_msg = config["rdma_max_msg"].uint64_value();
if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024)
this->rdma_max_msg = 1024*1024;
this->rdma_max_msg = 129*1024;
#endif
this->receive_buffer_size = (uint32_t)config["tcp_header_buffer_size"].uint64_value();
if (!this->receive_buffer_size || this->receive_buffer_size > 1024*1024*1024)

View File

@@ -133,7 +133,7 @@ protected:
std::string rdma_device;
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL;
uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 8;
uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0;
uint64_t rdma_max_msg = 0;
#endif

82
src/mmap_manager.cpp Normal file
View File

@@ -0,0 +1,82 @@
#include <stdexcept>
#include <cassert>
#include <sys/mman.h>
#include "mmap_manager.h"
mmap_manager_t::mmap_manager_t(uint64_t mmap_size)
{
this->mmap_size = mmap_size;
}
mmap_manager_t::~mmap_manager_t()
{
for (auto & kv: past_buffers)
{
munmap(kv.second.addr, kv.second.size);
}
if (active_buffer.addr != NULL)
{
munmap(active_buffer.addr, active_buffer.size);
}
}
void *mmap_manager_t::alloc(uint64_t size)
{
if (!active_buffer.addr || (active_buffer.pos + size) > active_buffer.size)
{
if (active_buffer.addr)
{
if (active_buffer.freed >= active_buffer.pos)
munmap(active_buffer.addr, active_buffer.size);
else
past_buffers[active_buffer.addr] = active_buffer;
active_buffer = { 0 };
}
uint64_t new_size = size < mmap_size ? mmap_size : size;
void *buf = mmap(NULL, new_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (!buf)
throw std::runtime_error(std::string("can't mmap "+std::to_string(new_size)+" bytes"));
active_buffer = {
.addr = buf,
.size = new_size,
.freed = 0,
.pos = 0,
};
}
void *res = active_buffer.addr + active_buffer.pos;
active_buffer.pos += size;
return res;
}
void mmap_manager_t::free(void *addr, uint64_t size)
{
auto it = past_buffers.upper_bound(addr);
if (it != past_buffers.begin())
{
if (it == past_buffers.end())
{
it--;
if (addr < it->second.addr || addr >= it->second.addr+it->second.size)
it = past_buffers.end();
}
else
it--;
}
else
it = past_buffers.end();
if (it != past_buffers.end())
{
assert(addr >= it->second.addr && addr+size <= it->second.addr+it->second.size);
it->second.freed += size;
if (it->second.freed >= it->second.pos)
{
munmap(it->second.addr, it->second.size);
past_buffers.erase(it);
}
}
else
{
assert(addr < active_buffer.addr+active_buffer.size);
active_buffer.freed += size;
}
}

26
src/mmap_manager.h Normal file
View File

@@ -0,0 +1,26 @@
#pragma once
#include <stdint.h>
#include <map>
struct mmap_buffer_t
{
void *addr = NULL;
uint64_t size = 0;
uint64_t freed = 0;
uint64_t pos = 0;
};
class mmap_manager_t
{
protected:
uint64_t mmap_size = 32*1024*1024;
std::map<void*, mmap_buffer_t> past_buffers;
mmap_buffer_t active_buffer;
public:
mmap_manager_t(uint64_t mmap_size = 32*1024*1024);
~mmap_manager_t();
void *alloc(uint64_t size);
void free(void *addr, uint64_t size);
};

View File

@@ -17,6 +17,8 @@
#include "epoll_manager.h"
#include "cluster_client.h"
#include "mmap_manager.h"
#include <stdexcept>
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0
@@ -24,6 +26,24 @@
const char *exe_name = NULL;
static inline void my_uring_prep_splice(struct io_uring_sqe *sqe,
int fd_in, int64_t off_in,
int fd_out, int64_t off_out,
unsigned int nbytes,
unsigned int splice_flags)
{
my_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, (__u64) off_out);
sqe->splice_off_in = (__u64) off_in;
sqe->splice_fd_in = fd_in;
sqe->splice_flags = splice_flags;
}
struct buf_to_free_t
{
void *buf = NULL;
uint64_t unmap = 0;
};
class nbd_proxy
{
protected:
@@ -38,7 +58,7 @@ protected:
ring_consumer_t consumer;
std::vector<iovec> send_list, next_send_list;
std::vector<void*> to_free;
std::vector<buf_to_free_t> to_free;
int nbd_fd = -1;
void *recv_buf = NULL;
int receive_buffer_size = 9000;
@@ -51,6 +71,10 @@ protected:
msghdr read_msg = { 0 }, send_msg = { 0 };
iovec read_iov = { 0 };
mmap_manager_t mm;
int pipe_fd[2];
int vmspliced = 0;
public:
static json11::Json::object parse_args(int narg, const char *args[])
{
@@ -174,6 +198,12 @@ public:
exit(1);
}
}
// Create pipe for splicing
if (pipe(pipe_fd) < 0)
{
fprintf(stderr, "pipe failed: %s\n", strerror(errno));
exit(1);
}
// Create client
ringloop = new ring_loop_t(512);
epmgr = new epoll_manager_t(ringloop);
@@ -522,16 +552,76 @@ protected:
{
return;
}
io_uring_sqe* sqe = ringloop->get_sqe();
if (!sqe)
int i;
//uint64_t len = 0;
for (i = 0; i < send_list.size(); i++)
{
return;
if (to_free[i].unmap)
{
break;
}
//len += send_list[i].iov_len;
}
//if (true)
if (i > 0)
{
/*io_uring_sqe* sqe = ringloop->get_sqe();
if (!sqe)
{
return;
}
ring_data_t* data = ((ring_data_t*)sqe->user_data);
data->callback = [this](ring_data_t *data) { handle_send(data->res); };*/
send_msg.msg_iov = send_list.data();
//send_msg.msg_iovlen = send_list.size();
send_msg.msg_iovlen = i;
//my_uring_prep_sendmsg(sqe, nbd_fd, &send_msg, MSG_ZEROCOPY);
int res = sendmsg(nbd_fd, &send_msg, MSG_ZEROCOPY);
if (res < 0)
res = -errno;
handle_send(res);
//int r = sendmsg(int sockfd, const struct msghdr *msg, int flags);
}
else
{
io_uring_sqe* sqe = ringloop->get_sqe();
if (!sqe)
{
return;
}
if (vmspliced <= 0)
{
vmspliced = vmsplice(pipe_fd[1], send_list.data(), 1, SPLICE_F_GIFT);
if (vmspliced < 0)
{
throw std::runtime_error(std::string("vmsplice: ")+strerror(errno));
}
}
send_msg.msg_iovlen = 1;
ring_data_t* data = ((ring_data_t*)sqe->user_data);
data->callback = [this](ring_data_t *data)
{
if (data->res > 0)
vmspliced -= data->res;
handle_send(data->res);
};
my_uring_prep_splice(sqe, pipe_fd[0], -1l, nbd_fd, -1l, vmspliced, SPLICE_F_MOVE);
/*int sent = res, spl = res;
while (spl > 0)
{
res = splice(pipe_fd[0], NULL, nbd_fd, NULL, spl, SPLICE_F_MOVE);
if (res < 0)
{
if (errno != EAGAIN)
throw std::runtime_error(std::string("splice: ")+strerror(errno));
}
else
{
spl -= res;
}
}
handle_send(sent);*/
}
ring_data_t* data = ((ring_data_t*)sqe->user_data);
data->callback = [this](ring_data_t *data) { handle_send(data->res); };
send_msg.msg_iov = send_list.data();
send_msg.msg_iovlen = send_list.size();
my_uring_prep_sendmsg(sqe, nbd_fd, &send_msg, MSG_ZEROCOPY);
}
void handle_send(int result)
@@ -547,7 +637,10 @@ protected:
{
if (result >= send_list[to_eat].iov_len)
{
free(to_free[to_eat]);
if (to_free[to_eat].unmap)
mm.free(to_free[to_eat].buf, to_free[to_eat].unmap);
else
free(to_free[to_eat].buf);
result -= send_list[to_eat].iov_len;
to_eat++;
}
@@ -659,6 +752,7 @@ protected:
printf("request %lx +%x %lx\n", be64toh(cur_req.from), be32toh(cur_req.len), handle);
#endif
void *buf = NULL;
nbd_reply *reply = NULL;
cluster_op_t *op = new cluster_op_t;
if (req_type == NBD_CMD_READ || req_type == NBD_CMD_WRITE)
{
@@ -666,36 +760,51 @@ protected:
op->inode = inode ? inode : watch->cfg.num;
op->offset = be64toh(cur_req.from);
op->len = be32toh(cur_req.len);
buf = malloc_or_die(sizeof(nbd_reply) + op->len);
op->iov.push_back(buf + sizeof(nbd_reply), op->len);
if (req_type == NBD_CMD_WRITE)
{
buf = malloc_or_die(sizeof(nbd_reply) + op->len);
reply = (nbd_reply*)buf;
op->iov.push_back(buf + sizeof(nbd_reply), op->len);
}
else
{
buf = mm.alloc(op->len);
reply = (nbd_reply*)malloc_or_die(sizeof(nbd_reply));
op->iov.push_back(buf, op->len);
}
}
else if (req_type == NBD_CMD_FLUSH)
{
op->opcode = OSD_OP_SYNC;
buf = malloc_or_die(sizeof(nbd_reply));
reply = (nbd_reply*)malloc_or_die(sizeof(nbd_reply));
}
op->callback = [this, buf, handle](cluster_op_t *op)
op->callback = [this, buf, reply, handle](cluster_op_t *op)
{
#ifdef DEBUG
printf("reply %lx e=%d\n", handle, op->retval);
#endif
nbd_reply *reply = (nbd_reply*)buf;
reply->magic = htobe32(NBD_REPLY_MAGIC);
memcpy(reply->handle, &handle, 8);
reply->error = htobe32(op->retval < 0 ? -op->retval : 0);
auto & to_list = send_msg.msg_iovlen > 0 ? next_send_list : send_list;
if (op->retval < 0 || op->opcode != OSD_OP_READ)
to_list.push_back({ .iov_base = buf, .iov_len = sizeof(nbd_reply) });
else
to_list.push_back({ .iov_base = buf, .iov_len = sizeof(nbd_reply) + op->len });
to_free.push_back(buf);
to_list.push_back((iovec){ .iov_base = reply, .iov_len = sizeof(nbd_reply) });
to_free.push_back((buf_to_free_t){ .buf = reply, .unmap = 0 });
if (op->retval >= 0 && op->opcode == OSD_OP_READ)
{
to_list.push_back((iovec){ .iov_base = buf, .iov_len = op->len });
to_free.push_back((buf_to_free_t){ .buf = buf, .unmap = op->len });
}
else if (op->opcode == OSD_OP_READ)
{
mm.free(buf, op->len);
}
delete op;
ringloop->wakeup();
};
if (req_type == NBD_CMD_WRITE)
{
cur_op = op;
cur_buf = buf + sizeof(nbd_reply);
cur_buf = buf;
cur_left = op->len;
read_state = CL_READ_DATA;
}

View File

@@ -132,16 +132,19 @@ static void vitastor_parse_filename(const char *filename, QDict *options, Error
error_setg(errp, "conf option %s has no value", name);
break;
}
for (int i = 0; i < strlen(name); i++)
if (name[i] == '_')
name[i] = '-';
qemu_vitastor_unescape(name);
value = qemu_vitastor_next_tok(p, ':', &p);
qemu_vitastor_unescape(value);
if (!strcmp(name, "inode") ||
!strcmp(name, "pool") ||
!strcmp(name, "size") ||
!strcmp(name, "use_rdma") ||
!strcmp(name, "rdma_port_num") ||
!strcmp(name, "rdma_gid_index") ||
!strcmp(name, "rdma_mtu"))
!strcmp(name, "use-rdma") ||
!strcmp(name, "rdma-port_num") ||
!strcmp(name, "rdma-gid-index") ||
!strcmp(name, "rdma-mtu"))
{
unsigned long long num_val;
if (parse_uint_full(value, &num_val, 0))
@@ -202,15 +205,15 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
VitastorClient *client = bs->opaque;
int64_t ret = 0;
qemu_mutex_init(&client->mutex);
client->config_path = g_strdup(qdict_get_try_str(options, "config_path"));
client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
// FIXME: Rename to etcd_address
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd_prefix"));
client->use_rdma = qdict_get_try_int(options, "use_rdma", -1);
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma_device"));
client->rdma_port_num = qdict_get_try_int(options, "rdma_port_num", 0);
client->rdma_gid_index = qdict_get_try_int(options, "rdma_gid_index", 0);
client->rdma_mtu = qdict_get_try_int(options, "rdma_mtu", 0);
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
client->proxy = vitastor_c_create_qemu(
(QEMUSetFDHandler*)aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
@@ -264,14 +267,14 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
}
bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
//client->aio_context = bdrv_get_aio_context(bs);
qdict_del(options, "use_rdma");
qdict_del(options, "rdma_mtu");
qdict_del(options, "rdma_gid_index");
qdict_del(options, "rdma_port_num");
qdict_del(options, "rdma_device");
qdict_del(options, "config_path");
qdict_del(options, "etcd_host");
qdict_del(options, "etcd_prefix");
qdict_del(options, "use-rdma");
qdict_del(options, "rdma-mtu");
qdict_del(options, "rdma-gid-index");
qdict_del(options, "rdma-port-num");
qdict_del(options, "rdma-device");
qdict_del(options, "config-path");
qdict_del(options, "etcd-host");
qdict_del(options, "etcd-prefix");
qdict_del(options, "image");
qdict_del(options, "inode");
qdict_del(options, "pool");
@@ -515,9 +518,9 @@ static QEMUOptionParameter vitastor_create_opts[] = {
static const char *vitastor_strong_runtime_opts[] = {
"inode",
"pool",
"config_path",
"etcd_host",
"etcd_prefix",
"config-path",
"etcd-host",
"etcd-prefix",
NULL
};

View File

@@ -37,6 +37,7 @@
#include <stdexcept>
#include "mmap_manager.h"
#include "rw_blocking.h"
#include "osd_ops.h"
@@ -115,6 +116,12 @@ int bind_stub(const char *bind_address, int bind_port)
void run_stub(int peer_fd)
{
mmap_manager_t mm;
int pipe_fd[2];
if (pipe(pipe_fd) < 0)
{
throw std::runtime_error(std::string("pipe: ") + strerror(errno));
}
osd_any_op_t op;
osd_any_reply_t reply;
void *buf = NULL;
@@ -136,11 +143,39 @@ void run_stub(int peer_fd)
if (op.hdr.opcode == OSD_OP_SEC_READ)
{
reply.hdr.retval = op.sec_rw.len;
buf = malloc(op.sec_rw.len);
//buf = malloc(op.sec_rw.len);
buf = mm.alloc(op.sec_rw.len);
r = write_blocking(peer_fd, reply.buf, OSD_PACKET_SIZE);
if (r == OSD_PACKET_SIZE)
r = write_blocking(peer_fd, &buf, op.sec_rw.len);
free(buf);
{
size_t offset = 0;
while (offset < op.sec_rw.len)
{
iovec iov = { .iov_base = buf+offset, .iov_len = op.sec_rw.len-offset };
int vmspliced = vmsplice(pipe_fd[1], &iov, 1, SPLICE_F_GIFT);
if (vmspliced < 0)
{
throw std::runtime_error(std::string("vmsplice: ")+strerror(errno));
}
int spliced = 0;
while (spliced < vmspliced)
{
int r2 = splice(pipe_fd[0], NULL, peer_fd, NULL, vmspliced-spliced, SPLICE_F_MOVE);
if (r2 < 0)
{
if (errno != EAGAIN)
throw std::runtime_error(std::string("splice: ")+strerror(errno));
}
else
spliced += r2;
}
offset += vmspliced;
}
r = offset;
//r = write_blocking(peer_fd, &buf, op.sec_rw.len);
}
mm.free(buf, op.sec_rw.len);
buf = NULL;
if (r < op.sec_rw.len)
break;
}
@@ -170,5 +205,6 @@ void run_stub(int peer_fd)
break;
}
}
free(buf);
close(pipe_fd[0]);
close(pipe_fd[1]);
}

12
src/vitastor.pc.in Normal file
View File

@@ -0,0 +1,12 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor
Description: Vitastor client library
Version: 0.6.8
Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir}

View File

@@ -10,7 +10,7 @@ SCHEME=${SCHEME:-ec}
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done

View File

@@ -10,25 +10,14 @@ else
NOBJ=1024
fi
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd4.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd5.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd6.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) 2>&1 >>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) 2>&1 >>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) 2>&1 >>./testdata/osd3.log &
OSD3_PID=$!
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) 2>&1 >>./testdata/osd4.log &
OSD4_PID=$!
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) 2>&1 >>./testdata/osd5.log &
OSD5_PID=$!
build/src/vitastor-osd --osd_num 6 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd6.bin 2>/dev/null) 2>&1 >>./testdata/osd6.log &
OSD6_PID=$!
OSD_SIZE=1024
OSD_COUNT=6
OSD_ARGS=
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
cd mon
npm install

View File

@@ -2,16 +2,14 @@
. `dirname $0`/common.sh
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
OSD_SIZE=1024
OSD_COUNT=3
OSD_ARGS=
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
cd mon
npm install

View File

@@ -10,28 +10,14 @@ else
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":5}'
fi
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd4.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd5.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd6.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd7.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) &>./testdata/osd4.log &
OSD4_PID=$!
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) &>./testdata/osd5.log &
OSD5_PID=$!
build/src/vitastor-osd --osd_num 6 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd6.bin 2>/dev/null) &>./testdata/osd6.log &
OSD6_PID=$!
build/src/vitastor-osd --osd_num 7 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd7.bin 2>/dev/null) &>./testdata/osd7.log &
OSD7_PID=$!
OSD_SIZE=1024
OSD_COUNT=7
OSD_ARGS=
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
cd mon
npm install

View File

@@ -2,22 +2,14 @@
. `dirname $0`/common.sh
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd4.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd5.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) &>./testdata/osd4.log &
OSD4_PID=$!
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) &>./testdata/osd5.log &
OSD5_PID=$!
OSD_SIZE=1024
OSD_COUNT=5
OSD_ARGS=
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd"}}'

View File

@@ -20,7 +20,7 @@ LD_PRELOAD=libasan.so.5 \
# Kill OSD 2, start OSD 1
kill $OSD2_PID
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
sleep 2
# Check PG state - it should NOT become active

View File

@@ -7,12 +7,12 @@ etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/mon/master
etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/pg/state
etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/osd/state
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
OSD_COUNT=3
OSD_ARGS=
for i in $(seq 1 $OSD_COUNT); do
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
node mon/mon-main.js --etcd_url http://$ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
MON_PID=$!