Compare commits

...

18 Commits

Author SHA1 Message Date
Vitaliy Filippov cf60b6818c Extract PG generation into pg_gen.js
Test / test_snapshot_chain_ec (push) Successful in 2m56s Details
Test / test_rebalance_verify_imm (push) Successful in 3m2s Details
Test / test_root_node (push) Successful in 12s Details
Test / test_rebalance_verify (push) Successful in 3m35s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 40s Details
Test / test_write_no_same (push) Successful in 19s Details
Test / test_write_xor (push) Successful in 1m3s Details
Test / test_rebalance_verify_ec (push) Successful in 4m0s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m39s Details
Test / test_heal_pg_size_2 (push) Successful in 3m29s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m22s Details
Test / test_heal_ec (push) Successful in 6m27s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m40s Details
Test / test_heal_csum_32k (push) Successful in 6m50s Details
Test / test_osd_tags (push) Successful in 25s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m28s Details
Test / test_enospc (push) Successful in 2m13s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m10s Details
Test / test_heal_csum_4k (push) Successful in 6m12s Details
Test / test_scrub (push) Successful in 44s Details
Test / test_enospc_imm (push) Successful in 1m1s Details
Test / test_enospc_xor (push) Successful in 1m24s Details
Test / test_enospc_imm_xor (push) Successful in 1m5s Details
Test / test_scrub_zero_osd_2 (push) Successful in 27s Details
Test / test_scrub_xor (push) Successful in 28s Details
Test / test_nfs (push) Successful in 19s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s Details
Test / test_scrub_pg_size_3 (push) Successful in 48s Details
Test / test_scrub_ec (push) Successful in 29s Details
2024-06-05 11:22:06 +03:00
Vitaliy Filippov 1a4a7cdc37 Extract OSD Tree generation functions to osd_tree.js 2024-06-05 11:19:35 +03:00
Vitaliy Filippov 1b48085e21 Extract remote etcd interaction to etcd_adapter.js 2024-06-05 11:19:35 +03:00
Vitaliy Filippov a71847244e Rename PGUtil.js to pg_utils.js 2024-06-05 10:51:20 +03:00
Vitaliy Filippov 848c2d2722 Move LPOptimizer, DSL and tests to lp_optimizer/ 2024-06-05 10:51:20 +03:00
Vitaliy Filippov 86832dc43f Add eslint import/no-unresolved 2024-06-05 10:51:20 +03:00
Vitaliy Filippov 1f6da79463 Extract stats calculation into a separate file 2024-06-05 10:51:20 +03:00
Vitaliy Filippov 9bf57c3760 Mention generic Toshiba MG instead of specific MGxx, fix russian vitastorfs link 2024-06-05 02:08:09 +03:00
Vitaliy Filippov a0305b5b4a Extract pool configuration validation into a separate file 2024-06-05 02:08:08 +03:00
Vitaliy Filippov 1546f8e447 Extract etcd data "schema" into a separate file 2024-06-05 02:07:53 +03:00
Vitaliy Filippov 8ce962b312 Move scripts 2024-06-05 02:07:53 +03:00
Vitaliy Filippov 50e56b3b92 Add vitastor_c_inode_get_immediate_commit
Test / test_snapshot_chain_ec (push) Successful in 2m48s Details
Test / test_rebalance_verify_imm (push) Successful in 2m58s Details
Test / test_root_node (push) Successful in 9s Details
Test / test_rebalance_verify (push) Successful in 3m33s Details
Test / test_switch_primary (push) Successful in 35s Details
Test / test_write (push) Successful in 40s Details
Test / test_write_no_same (push) Successful in 18s Details
Test / test_write_xor (push) Successful in 1m3s Details
Test / test_rebalance_verify_ec (push) Successful in 4m12s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m41s Details
Test / test_heal_pg_size_2 (push) Successful in 3m30s Details
Test / test_heal_ec (push) Successful in 4m46s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m6s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m33s Details
Test / test_heal_csum_32k (push) Successful in 6m38s Details
Test / test_osd_tags (push) Successful in 33s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m59s Details
Test / test_enospc (push) Successful in 2m16s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m48s Details
Test / test_enospc_imm (push) Successful in 1m42s Details
Test / test_enospc_xor (push) Successful in 2m26s Details
Test / test_enospc_imm_xor (push) Successful in 2m11s Details
Test / test_heal_csum_4k (push) Successful in 6m15s Details
Test / test_scrub_zero_osd_2 (push) Successful in 29s Details
Test / test_scrub (push) Successful in 37s Details
Test / test_scrub_xor (push) Successful in 36s Details
Test / test_scrub_ec (push) Successful in 38s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s Details
Test / test_scrub_pg_size_3 (push) Successful in 47s Details
Test / test_nfs (push) Successful in 15s Details
2024-05-19 01:57:18 +03:00
Vitaliy Filippov a12d328793 Rename cli/ to cmd/, fix cmake install
Test / test_snapshot_chain_ec (push) Successful in 2m39s Details
Test / test_rebalance_verify_imm (push) Successful in 3m2s Details
Test / test_root_node (push) Successful in 9s Details
Test / test_rebalance_verify (push) Successful in 3m39s Details
Test / test_switch_primary (push) Successful in 37s Details
Test / test_write (push) Successful in 41s Details
Test / test_write_no_same (push) Successful in 18s Details
Test / test_write_xor (push) Successful in 1m4s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m44s Details
Test / test_rebalance_verify_ec (push) Successful in 4m17s Details
Test / test_heal_pg_size_2 (push) Successful in 3m30s Details
Test / test_heal_ec (push) Successful in 5m4s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m53s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m46s Details
Test / test_heal_csum_32k (push) Successful in 6m31s Details
Test / test_osd_tags (push) Successful in 25s Details
Test / test_heal_csum_4k_dmj (push) Successful in 7m7s Details
Test / test_enospc (push) Successful in 2m18s Details
Test / test_heal_csum_4k_dj (push) Successful in 5m57s Details
Test / test_heal_csum_4k (push) Successful in 6m27s Details
Test / test_enospc_imm (push) Successful in 1m6s Details
Test / test_enospc_xor (push) Successful in 1m18s Details
Test / test_scrub (push) Successful in 30s Details
Test / test_enospc_imm_xor (push) Successful in 1m5s Details
Test / test_scrub_zero_osd_2 (push) Successful in 25s Details
Test / test_scrub_xor (push) Successful in 31s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s Details
Test / test_scrub_ec (push) Successful in 26s Details
Test / test_nfs (push) Successful in 14s Details
Test / test_scrub_pg_size_3 (push) Successful in 46s Details
2024-05-15 23:04:50 +03:00
Vitaliy Filippov c79b38bd26 Move all sources to subdirs
Test / test_snapshot_chain_ec (push) Successful in 2m50s Details
Test / test_rebalance_verify_imm (push) Successful in 3m17s Details
Test / test_root_node (push) Successful in 9s Details
Test / test_rebalance_verify (push) Successful in 4m6s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 53s Details
Test / test_write_no_same (push) Successful in 12s Details
Test / test_write_xor (push) Successful in 51s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m26s Details
Test / test_rebalance_verify_ec (push) Successful in 4m24s Details
Test / test_heal_pg_size_2 (push) Successful in 5m11s Details
Test / test_heal_ec (push) Successful in 5m8s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m1s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m0s Details
Test / test_heal_csum_4k_dmj (push) Successful in 7m3s Details
Test / test_heal_csum_32k (push) Successful in 7m6s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m53s Details
Test / test_osd_tags (push) Successful in 28s Details
Test / test_enospc (push) Successful in 1m10s Details
Test / test_enospc_imm (push) Successful in 57s Details
Test / test_enospc_xor (push) Successful in 1m24s Details
Test / test_heal_csum_4k (push) Successful in 6m58s Details
Test / test_scrub_zero_osd_2 (push) Successful in 29s Details
Test / test_scrub (push) Successful in 33s Details
Test / test_scrub_xor (push) Successful in 26s Details
Test / test_enospc_imm_xor (push) Successful in 51s Details
Test / test_nfs (push) Successful in 24s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s Details
Test / test_scrub_ec (push) Successful in 32s Details
Test / test_scrub_pg_size_3 (push) Successful in 39s Details
2024-05-15 11:06:01 +03:00
Vitaliy Filippov 44692d148a Make vitastor_kv.h header public
Test / test_snapshot_chain_ec (push) Successful in 2m47s Details
Test / test_rebalance_verify_imm (push) Successful in 2m42s Details
Test / test_root_node (push) Successful in 9s Details
Test / test_rebalance_verify (push) Successful in 3m21s Details
Test / test_switch_primary (push) Successful in 37s Details
Test / test_write (push) Successful in 46s Details
Test / test_write_no_same (push) Successful in 17s Details
Test / test_write_xor (push) Successful in 49s Details
Test / test_rebalance_verify_ec (push) Successful in 4m51s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m31s Details
Test / test_heal_pg_size_2 (push) Successful in 3m36s Details
Test / test_heal_ec (push) Successful in 3m37s Details
Test / test_heal_csum_32k_dmj (push) Successful in 6m7s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m12s Details
Test / test_heal_csum_32k (push) Successful in 7m20s Details
Test / test_heal_csum_4k_dmj (push) Successful in 7m7s Details
Test / test_osd_tags (push) Successful in 19s Details
Test / test_enospc (push) Successful in 1m27s Details
Test / test_enospc_xor (push) Successful in 2m24s Details
Test / test_heal_csum_4k_dj (push) Successful in 5m42s Details
Test / test_enospc_imm (push) Successful in 1m39s Details
Test / test_heal_csum_4k (push) Successful in 6m0s Details
Test / test_scrub_zero_osd_2 (push) Successful in 47s Details
Test / test_scrub (push) Successful in 50s Details
Test / test_enospc_imm_xor (push) Successful in 1m15s Details
Test / test_scrub_xor (push) Successful in 25s Details
Test / test_nfs (push) Successful in 23s Details
Test / test_scrub_ec (push) Successful in 32s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s Details
Test / test_scrub_pg_size_3 (push) Successful in 41s Details
2024-05-15 01:49:38 +03:00
Vitaliy Filippov ba52359611 Fix last master commit 2024-05-15 01:49:31 +03:00
Vitaliy Filippov 23a9aa93b5 Fix pool create/modify --block_size validation
Test / test_splitbrain (push) Has been skipped Details
Test / test_rebalance_verify (push) Has been skipped Details
Test / test_rebalance_verify_imm (push) Has been skipped Details
Test / test_rebalance_verify_ec (push) Has been skipped Details
Test / test_rebalance_verify_ec_imm (push) Has been skipped Details
Test / test_root_node (push) Has been skipped Details
Test / test_switch_primary (push) Has been skipped Details
Test / test_write (push) Has been skipped Details
Test / test_write_xor (push) Has been skipped Details
Test / test_write_no_same (push) Has been skipped Details
Test / test_heal_pg_size_2 (push) Has been skipped Details
Test / test_heal_ec (push) Has been skipped Details
Test / test_heal_csum_32k_dmj (push) Has been skipped Details
Test / test_heal_csum_32k_dj (push) Has been skipped Details
Test / test_heal_csum_32k (push) Has been skipped Details
Test / test_heal_csum_4k_dmj (push) Has been skipped Details
Test / test_heal_csum_4k_dj (push) Has been skipped Details
Test / test_heal_csum_4k (push) Has been skipped Details
Test / test_osd_tags (push) Has been skipped Details
Test / test_enospc (push) Has been skipped Details
Test / test_enospc_xor (push) Has been skipped Details
Test / test_enospc_imm (push) Has been skipped Details
Test / test_enospc_imm_xor (push) Has been skipped Details
Test / test_scrub (push) Has been skipped Details
Test / test_scrub_zero_osd_2 (push) Has been skipped Details
Test / test_scrub_xor (push) Has been skipped Details
Test / test_scrub_pg_size_3 (push) Has been skipped Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been skipped Details
Test / test_scrub_ec (push) Has been skipped Details
Test / test_nfs (push) Has been skipped Details
2024-05-04 16:33:22 +03:00
Vitaliy Filippov 2412d9e239 Fix TTL comparison for lease/keepalive
Test / test_snapshot_chain_ec (push) Successful in 3m5s Details
Test / test_rebalance_verify_imm (push) Successful in 3m29s Details
Test / test_root_node (push) Successful in 9s Details
Test / test_rebalance_verify (push) Successful in 4m3s Details
Test / test_switch_primary (push) Successful in 35s Details
Test / test_write (push) Successful in 54s Details
Test / test_write_no_same (push) Successful in 13s Details
Test / test_write_xor (push) Successful in 54s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m58s Details
Test / test_rebalance_verify_ec (push) Successful in 4m58s Details
Test / test_heal_pg_size_2 (push) Successful in 4m6s Details
Test / test_heal_ec (push) Successful in 4m15s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m52s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m59s Details
Test / test_heal_csum_32k (push) Successful in 7m7s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m57s Details
Test / test_osd_tags (push) Successful in 28s Details
Test / test_enospc (push) Successful in 1m58s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m53s Details
Test / test_heal_csum_4k (push) Successful in 6m20s Details
Test / test_enospc_xor (push) Successful in 2m9s Details
Test / test_enospc_imm (push) Successful in 41s Details
Test / test_scrub_zero_osd_2 (push) Successful in 35s Details
Test / test_scrub (push) Successful in 38s Details
Test / test_scrub_xor (push) Successful in 34s Details
Test / test_enospc_imm_xor (push) Successful in 58s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s Details
Test / test_scrub_ec (push) Successful in 33s Details
Test / test_nfs (push) Successful in 19s Details
Test / test_scrub_pg_size_3 (push) Successful in 41s Details
2024-04-30 01:53:05 +03:00
254 changed files with 2304 additions and 2109 deletions

13
.gitignore vendored
View File

@ -3,16 +3,3 @@
package-lock.json package-lock.json
fio fio
qemu qemu
osd
stub_osd
stub_uring_osd
stub_bench
osd_test
osd_peering_pg_test
dump_journal
nbd_proxy
rm_inode
test_allocator
test_blockstore
test_shit
osd_rmw_test

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
gcc -I. -E -o fio_headers.i src/fio_headers.h gcc -I. -E -o fio_headers.i src/util/fio_headers.h
rm -rf fio-copy rm -rf fio-copy
for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do

View File

@ -5,7 +5,7 @@
#cd b/qemu; make qapi #cd b/qemu; make qapi
gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \ gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
-I qemu/include -E -o qemu_driver.i src/qemu_driver.c -I qemu/include -E -o qemu_driver.i src/client/qemu_driver.c
rm -rf qemu-copy rm -rf qemu-copy
for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do

View File

@ -27,7 +27,7 @@ RUN apt-get -y build-dep qemu
RUN apt-get --download-only source qemu RUN apt-get --download-only source qemu
ADD patches /root/vitastor/patches ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c ADD src/client/qemu_driver.c /root/qemu_driver.c
#RUN set -e; \ #RUN set -e; \
# apt-get install -y wget; \ # apt-get install -y wget; \
@ -52,7 +52,7 @@ RUN set -e; \
cd /root/packages/qemu-$REL/qemu-*/; \ cd /root/packages/qemu-$REL/qemu-*/; \
quilt push -a; \ quilt push -a; \
quilt add block/vitastor.c; \ quilt add block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \ cp /root/qemu_driver.c block/vitastor.c; \
quilt refresh; \ quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \ V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \ if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \

View File

@ -1,2 +1,3 @@
mon usr/lib/vitastor mon usr/lib/vitastor/mon
mon/vitastor-mon.service /lib/systemd/system mon/scripts/make-etcd usr/lib/vitastor/mon
mon/scripts/vitastor-mon.service /lib/systemd/system

View File

@ -1,6 +1,6 @@
usr/bin/vitastor-osd usr/bin/vitastor-osd
usr/bin/vitastor-disk usr/bin/vitastor-disk
usr/bin/vitastor-dump-journal usr/bin/vitastor-dump-journal
mon/vitastor-osd@.service /lib/systemd/system mon/scripts/vitastor-osd@.service /lib/systemd/system
mon/vitastor.target /lib/systemd/system mon/scripts/vitastor.target /lib/systemd/system
mon/90-vitastor.rules /lib/udev/rules.d mon/scripts/90-vitastor.rules /lib/udev/rules.d

View File

@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
QEMU build process. To do that: QEMU build process. To do that:
- Install vitastor client library headers (from source or from vitastor-client-dev package) - Install vitastor client library headers (from source or from vitastor-client-dev package)
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source - Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c` - Copy `src/client/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
- Build QEMU as usual - Build QEMU as usual
But it is also possible to build it out-of-tree. To do that: But it is also possible to build it out-of-tree. To do that:

View File

@ -41,7 +41,7 @@ cmake .. && make -j8 install
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого: Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev) - Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU - Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c` - Скопируйте [src/client/qemu_driver.c](../../src/client/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
- Соберите QEMU как обычно - Соберите QEMU как обычно
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого: Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:

View File

@ -22,7 +22,7 @@
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
[here](../config/layout-cluster.en.md#immediate_commit). [here](../config/layout-cluster.en.md#immediate_commit).
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar, - If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then Toshiba MG, Seagate EXOS or something similar. If your drives don't have such cache then
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough). you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal. - Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`. - Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
@ -33,7 +33,7 @@
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M - SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743, - NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS - HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
## Configure monitors ## Configure monitors

View File

@ -123,4 +123,4 @@ vitastor-cli create -s 10G testimg
Если вы хотите использовать не только блочные образы виртуальных машин или контейнеров, Если вы хотите использовать не только блочные образы виртуальных машин или контейнеров,
а также кластерную файловую систему, то: а также кластерную файловую систему, то:
- [Следуйте инструкциям](../usage/nfs.en.md#vitastorfs) - [Следуйте инструкциям](../usage/nfs.ru.md#vitastorfs)

View File

@ -11,6 +11,7 @@ module.exports = {
"ecmaVersion": 2020 "ecmaVersion": 2020
}, },
"plugins": [ "plugins": [
"import"
], ],
"rules": { "rules": {
"indent": [ "indent": [
@ -44,6 +45,10 @@ module.exports = {
], ],
"node/shebang": [ "node/shebang": [
"off" "off"
],
"import/no-unresolved": [
2,
{ "commonjs": true }
] ]
} }
}; };

320
mon/etcd_adapter.js Normal file
View File

@ -0,0 +1,320 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const http = require('http');
const WebSocket = require('ws');
class EtcdAdapter
{
constructor(mon)
{
this.mon = mon;
this.ws = null;
this.ws_alive = false;
this.ws_keepalive_timer = null;
}
parse_config(config)
{
this.parse_etcd_addresses(config.etcd_address||config.etcd_url);
}
parse_etcd_addresses(addrs)
{
const is_local_ip = this.mon.local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
this.etcd_local = [];
this.etcd_urls = [];
this.selected_etcd_url = null;
this.etcd_urls_to_try = [];
if (!(addrs instanceof Array))
addrs = addrs ? (''+(addrs||'')).split(/,/) : [];
if (!addrs.length)
{
console.error('Vitastor etcd address(es) not specified. Please set on the command line or in the config file');
process.exit(1);
}
for (let url of addrs)
{
let scheme = 'http';
url = url.trim().replace(/^(https?):\/\//, (m, m1) => { scheme = m1; return ''; });
const slash = url.indexOf('/');
const colon = url.indexOf(':');
const is_local = is_local_ip[colon >= 0 ? url.substr(0, colon) : (slash >= 0 ? url.substr(0, slash) : url)];
url = scheme+'://'+(slash >= 0 ? url : url+'/v3');
if (is_local)
this.etcd_local.push(url);
else
this.etcd_urls.push(url);
}
}
pick_next_etcd()
{
if (this.selected_etcd_url)
return this.selected_etcd_url;
if (!this.etcd_urls_to_try || !this.etcd_urls_to_try.length)
{
this.etcd_urls_to_try = [ ...this.etcd_local ];
const others = [ ...this.etcd_urls ];
while (others.length)
{
const url = others.splice(0|(others.length*Math.random()), 1);
this.etcd_urls_to_try.push(url[0]);
}
}
this.selected_etcd_url = this.etcd_urls_to_try.shift();
return this.selected_etcd_url;
}
restart_watcher(cur_addr)
{
if (this.ws)
{
this.ws.close();
this.ws = null;
}
if (this.ws_keepalive_timer)
{
clearInterval(this.ws_keepalive_timer);
this.ws_keepalive_timer = null;
}
if (this.selected_etcd_url == cur_addr)
{
this.selected_etcd_url = null;
}
this.start_watcher(this.mon.config.etcd_mon_retries).catch(this.mon.die);
}
async start_watcher(retries)
{
let retry = 0;
if (!retries || retries < 1)
{
retries = 1;
}
const tried = {};
while (retries < 0 || retry < retries)
{
const cur_addr = this.pick_next_etcd();
const base = 'ws'+cur_addr.substr(4);
let now = Date.now();
if (tried[base] && now-tried[base] < this.mon.etcd_start_timeout)
{
await new Promise(ok => setTimeout(ok, this.mon.etcd_start_timeout-(now-tried[base])));
now = Date.now();
}
tried[base] = now;
const ok = await new Promise(ok =>
{
const timer_id = setTimeout(() =>
{
this.ws.close();
this.ws = null;
ok(false);
}, this.mon.config.etcd_mon_timeout);
this.ws = new WebSocket(base+'/watch');
const fail = () =>
{
ok(false);
};
this.ws.on('error', fail);
this.ws.on('open', () =>
{
this.ws.removeListener('error', fail);
if (timer_id)
clearTimeout(timer_id);
ok(true);
});
});
if (ok)
break;
if (this.selected_etcd_url == cur_addr)
this.selected_etcd_url = null;
this.ws = null;
retry++;
}
if (!this.ws)
{
this.mon.failconnect('Failed to open etcd watch websocket');
}
const cur_addr = this.selected_etcd_url;
this.ws_alive = true;
this.ws_keepalive_timer = setInterval(() =>
{
if (this.ws_alive)
{
this.ws_alive = false;
this.ws.send(JSON.stringify({ progress_request: {} }));
}
else
{
console.log('etcd websocket timed out, restarting it');
this.restart_watcher(cur_addr);
}
}, (Number(this.mon.config.etcd_ws_keepalive_interval) || 30)*1000);
this.ws.on('error', () => this.restart_watcher(cur_addr));
this.ws.send(JSON.stringify({
create_request: {
key: b64(this.mon.etcd_prefix+'/'),
range_end: b64(this.mon.etcd_prefix+'0'),
start_revision: ''+this.mon.etcd_watch_revision,
watch_id: 1,
progress_notify: true,
},
}));
this.ws.on('message', (msg) =>
{
this.ws_alive = true;
let data;
try
{
data = JSON.parse(msg);
}
catch (e)
{
}
if (!data || !data.result)
{
console.error('Unknown message received from watch websocket: '+msg);
}
else if (data.result.canceled)
{
// etcd watch canceled
if (data.result.compact_revision)
{
// we may miss events if we proceed
console.error('Revisions before '+data.result.compact_revision+' were compacted by etcd, exiting');
this.mon.on_stop(1);
}
console.error('Watch canceled by etcd, reason: '+data.result.cancel_reason+', exiting');
this.mon.on_stop(1);
}
else if (data.result.created)
{
// etcd watch created
}
else
{
this.mon.on_message(data.result);
}
});
}
async become_master()
{
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
// eslint-disable-next-line no-constant-condition
while (1)
{
const res = await this.etcd_call('/kv/txn', {
compare: [ { target: 'CREATE', create_revision: 0, key: b64(this.mon.etcd_prefix+'/mon/master') } ],
success: [ { requestPut: { key: b64(this.mon.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
}, this.mon.etcd_start_timeout, 0);
if (res.succeeded)
{
break;
}
console.log('Waiting to become master');
await new Promise(ok => setTimeout(ok, this.mon.etcd_start_timeout));
}
console.log('Became master');
}
async etcd_call(path, body, timeout, retries)
{
let retry = 0;
if (retries >= 0 && retries < 1)
{
retries = 1;
}
const tried = {};
while (retries < 0 || retry < retries)
{
retry++;
const base = this.pick_next_etcd();
let now = Date.now();
if (tried[base] && now-tried[base] < timeout)
{
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
now = Date.now();
}
tried[base] = now;
const res = await POST(base+path, body, timeout);
if (res.error)
{
if (this.selected_etcd_url == base)
this.selected_etcd_url = null;
console.error('failed to query etcd: '+res.error);
continue;
}
if (res.json)
{
if (res.json.error)
{
console.error('etcd returned error: '+res.json.error);
break;
}
return res.json;
}
}
this.mon.failconnect();
}
}
function POST(url, body, timeout)
{
return new Promise(ok =>
{
const body_text = Buffer.from(JSON.stringify(body));
let timer_id = timeout > 0 ? setTimeout(() =>
{
if (req)
req.abort();
req = null;
ok({ error: 'timeout' });
}, timeout) : null;
let req = http.request(url, { method: 'POST', headers: {
'Content-Type': 'application/json',
'Content-Length': body_text.length,
} }, (res) =>
{
if (!req)
{
return;
}
clearTimeout(timer_id);
let res_body = '';
res.setEncoding('utf8');
res.on('error', (error) => ok({ error }));
res.on('data', chunk => { res_body += chunk; });
res.on('end', () =>
{
if (res.statusCode != 200)
{
ok({ error: res_body, code: res.statusCode });
return;
}
try
{
res_body = JSON.parse(res_body);
ok({ response: res, json: res_body });
}
catch (e)
{
ok({ error: e, response: res, body: res_body });
}
});
});
req.on('error', (error) => ok({ error }));
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
req.write(body_text);
req.end();
});
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
module.exports = EtcdAdapter;

391
mon/etcd_schema.js Normal file
View File

@ -0,0 +1,391 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
// FIXME document all etcd keys and config variables in the form of JSON schema or similar
const etcd_nonempty_keys = {
'config/global': 1,
'config/node_placement': 1,
'config/pools': 1,
'config/pgs': 1,
'history/last_clean_pgs': 1,
'stats': 1,
};
const etcd_allow = new RegExp('^'+[
'config/global',
'config/node_placement',
'config/pools',
'config/osd/[1-9]\\d*',
'config/pgs',
'config/inode/[1-9]\\d*/[1-9]\\d*',
'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*',
'osd/inodestats/[1-9]\\d*',
'osd/space/[1-9]\\d*',
'mon/master',
'mon/member/[a-f0-9]+',
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/\\d+',
'pool/stats/[1-9]\\d*',
'stats',
'index/image/.*',
'index/maxid/[1-9]\\d*',
].join('$|^')+'$');
const etcd_tree = {
config: {
/* global: {
// WARNING: NOT ALL OF THESE ARE ACTUALLY CONFIGURABLE HERE
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
// etcd connection
config_path: "/etc/vitastor/vitastor.conf",
etcd_prefix: "/vitastor",
// etcd connection - configurable online
etcd_address: "10.0.115.10:2379/v3",
// mon
etcd_mon_ttl: 5, // min: 1
etcd_mon_timeout: 1000, // ms. min: 0
etcd_mon_retries: 5, // min: 0
mon_change_timeout: 1000, // ms. min: 100
mon_retry_change_timeout: 50, // ms. min: 10
mon_stats_timeout: 1000, // ms. min: 100
osd_out_time: 600, // seconds. min: 0
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
use_old_pg_combinator: false,
// client and osd
tcp_header_buffer_size: 65536,
use_sync_send_recv: false,
use_rdma: true,
rdma_device: null, // for example, "rocep5s0f0"
rdma_port_num: 1,
rdma_gid_index: 0,
rdma_mtu: 4096,
rdma_max_sge: 128,
rdma_max_send: 8,
rdma_max_recv: 16,
rdma_max_msg: 132096,
block_size: 131072,
disk_alignment: 4096,
bitmap_granularity: 4096,
immediate_commit: false, // 'all' or 'small'
// client - configurable online
client_max_dirty_bytes: 33554432,
client_max_dirty_ops: 1024,
client_enable_writeback: false,
client_max_buffered_bytes: 33554432,
client_max_buffered_ops: 1024,
client_max_writeback_iodepth: 256,
client_retry_interval: 50, // ms. min: 10
client_eio_retry_interval: 1000, // ms
client_retry_enospc: true,
osd_nearfull_ratio: 0.95,
// client and osd - configurable online
log_level: 0,
peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1
osd_idle_timeout: 5, // seconds. min: 1
osd_ping_timeout: 5, // seconds. min: 1
max_etcd_attempts: 5,
etcd_quick_timeout: 1000, // ms
etcd_slow_timeout: 5000, // ms
etcd_keepalive_timeout: 30, // seconds, default is max(30, etcd_report_interval*2)
etcd_ws_keepalive_interval: 30, // seconds
// osd
etcd_report_interval: 5, // seconds
etcd_stats_interval: 30, // seconds
run_primary: true,
osd_network: null, // "192.168.7.0/24" or an array of masks
bind_address: "0.0.0.0",
bind_port: 0,
readonly: false,
osd_memlock: false,
// osd - configurable online
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 1,
recovery_sleep_us: 0,
recovery_tune_util_low: 0.1,
recovery_tune_client_util_low: 0,
recovery_tune_util_high: 1.0,
recovery_tune_client_util_high: 0.5,
recovery_tune_interval: 1,
recovery_tune_agg_interval: 10, // 10 times recovery_tune_interval
recovery_tune_sleep_min_us: 10, // 10 microseconds
recovery_pg_switch: 128,
recovery_sync_batch: 16,
no_recovery: false,
no_rebalance: false,
print_stats_interval: 3,
slow_log_interval: 10,
inode_vanish_time: 60,
auto_scrub: false,
no_scrub: false,
scrub_interval: '30d', // 1s/1m/1h/1d
scrub_queue_depth: 1,
scrub_sleep: 0, // milliseconds
scrub_list_limit: 1000, // objects to list on one scrub iteration
scrub_find_best: true,
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
// blockstore - fixed in superblock
block_size,
disk_alignment,
journal_block_size,
meta_block_size,
bitmap_granularity,
journal_device,
journal_offset,
journal_size,
disable_journal_fsync,
data_device,
data_offset,
data_size,
disable_data_fsync,
meta_device,
meta_offset,
disable_meta_fsync,
disable_device_lock,
// blockstore - configurable offline
inmemory_metadata,
inmemory_journal,
journal_sector_buffer_count,
journal_no_same_sector_overwrites,
// blockstore - configurable online
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
throttle_small_writes: false,
throttle_target_iops: 100,
throttle_target_mbs: 100,
throttle_target_parallelism: 1,
throttle_threshold_us: 50,
}, */
global: {},
/* node_placement: {
host1: { level: 'host', parent: 'rack1' },
...
}, */
node_placement: {},
/* pools: {
<id>: {
name: 'testpool',
// 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
pg_size: 3,
pg_minsize: 2,
// number of parity chunks, required for EC
parity_chunks?: 1,
pg_count: 100,
// default is failure_domain=host
failure_domain?: 'host',
// additional failure domain rules; failure_domain=x is equivalent to x=123..N
level_placement?: 'dc=112233 host=123456',
raw_placement?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
old_combinator: false,
max_osd_combinations: 10000,
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
block_size: 131072,
bitmap_granularity: 4096,
// 'all'/'small'/'none', same as in OSD options
immediate_commit: 'none',
pg_stripe_size: 0,
root_node?: 'rack1',
// restrict pool to OSDs having all of these tags
osd_tags?: 'nvme' | [ 'nvme', ... ],
// prefer to put primary on OSD with these tags
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
// scrub interval
scrub_interval?: '30d',
},
...
}, */
pools: {},
osd: {
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
},
/* pgs: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
pgs: {},
/* inode: {
<pool_id>: {
<inode_t>: {
name: string,
size?: uint64_t, // bytes
parent_pool?: <pool_id>,
parent_id?: <inode_t>,
readonly?: boolean,
}
}
}, */
inode: {},
},
osd: {
state: {
/* <osd_num_t>: {
state: "up",
addresses: string[],
host: string,
port: uint16_t,
primary_enabled: boolean,
blockstore_enabled: boolean,
}, */
},
stats: {
/* <osd_num_t>: {
time: number, // unix time
blockstore_ready: boolean,
size: uint64_t, // bytes
free: uint64_t, // bytes
host: string,
op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t },
},
}, */
},
inodestats: {
/* <pool_id>: {
<inode_t>: {
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
}, */
},
space: {
/* <osd_num_t>: {
<pool_id>: {
<inode_t>: uint64_t, // bytes
},
}, */
},
},
mon: {
master: {
/* ip: [ string ], id: uint64_t */
},
standby: {
/* <uint64_t>: { ip: [ string ] }, */
},
},
pg: {
state: {
/* <pool_id>: {
<pg_id>: {
primary: osd_num_t,
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
}
}, */
},
stats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
history: {
/* <pool_id>: {
<pg_id>: {
osd_sets: osd_num_t[][],
all_peers: osd_num_t[],
epoch: uint64_t,
next_scrub: uint64_t,
},
}, */
},
},
inode: {
stats: {
/* <pool_id>: {
<inode_t>: {
raw_used: uint64_t, // raw used bytes on OSDs
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
},
}, */
},
},
pool: {
stats: {
/* <pool_id>: {
used_raw_tb: float, // used raw space in the pool
total_raw_tb: float, // maximum amount of space in the pool
raw_to_usable: float, // raw to usable ratio
space_efficiency: float, // 0..1
} */
},
},
stats: {
/* op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t, iops: uint64_t, lat: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
},
object_counts: {
object: uint64_t,
clean: uint64_t,
misplaced: uint64_t,
degraded: uint64_t,
incomplete: uint64_t,
},
object_bytes: {
total: uint64_t,
clean: uint64_t,
misplaced: uint64_t,
degraded: uint64_t,
incomplete: uint64_t,
}, */
},
history: {
last_clean_pgs: {},
},
index: {
image: {
/* <name>: {
id: uint64_t,
pool_id: uint64_t,
}, */
},
maxid: {
/* <pool_id>: uint64_t, */
},
},
};
module.exports = {
etcd_nonempty_keys,
etcd_allow,
etcd_tree,
};

View File

@ -8,7 +8,7 @@
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change(). // But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
const { SimpleCombinator } = require('./simple_pgs.js'); const { SimpleCombinator } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = { const osd_tree = {
ripper5: { ripper5: {

View File

@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js'); const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp_optimizer.js');
async function run() async function run()
{ {

View File

@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js'); const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp_optimizer.js');
const crush_tree = [ const crush_tree = [
{ level: 1, children: [ { level: 1, children: [

View File

@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js'); const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = { const osd_tree = {
100: { 100: {

View File

@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js'); const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = { const osd_tree = {
100: { 100: {

1641
mon/mon.js

File diff suppressed because it is too large Load Diff

215
mon/osd_tree.js Normal file
View File

@ -0,0 +1,215 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
function get_osd_tree(global_config, state)
{
const levels = global_config.placement_levels||{};
levels.host = levels.host || 100;
levels.osd = levels.osd || 101;
const tree = {};
let up_osds = {};
// This requires monitor system time to be in sync with OSD system times (at least to some extent)
const down_time = Date.now()/1000 - global_config.osd_out_time;
for (const osd_num of Object.keys(state.osd.stats).sort((a, b) => a - b))
{
const stat = state.osd.stats[osd_num];
const osd_cfg = state.config.osd[osd_num];
let reweight = osd_cfg == null ? 1 : Number(osd_cfg.reweight);
if (reweight < 0 || isNaN(reweight))
reweight = 1;
if (stat && stat.size && reweight && (state.osd.state[osd_num] || Number(stat.time) >= down_time ||
osd_cfg && osd_cfg.noout))
{
// Numeric IDs are reserved for OSDs
if (state.osd.state[osd_num] && reweight > 0)
{
// React to down OSDs immediately
up_osds[osd_num] = true;
}
tree[osd_num] = tree[osd_num] || {};
tree[osd_num].id = osd_num;
tree[osd_num].parent = tree[osd_num].parent || stat.host;
tree[osd_num].level = 'osd';
tree[osd_num].size = reweight * stat.size / 1024 / 1024 / 1024 / 1024; // terabytes
if (osd_cfg && osd_cfg.tags)
{
tree[osd_num].tags = (osd_cfg.tags instanceof Array ? [ ...osd_cfg.tags ] : [ osd_cfg.tags ])
.reduce((a, c) => { a[c] = true; return a; }, {});
}
delete tree[osd_num].children;
if (!tree[stat.host])
{
tree[stat.host] = {
id: stat.host,
level: 'host',
parent: null,
children: [],
};
}
}
}
for (const node_id in state.config.node_placement||{})
{
const node_cfg = state.config.node_placement[node_id];
if (/^\d+$/.exec(node_id))
{
node_cfg.level = 'osd';
}
if (!node_id || !node_cfg.level || !levels[node_cfg.level] ||
node_cfg.level === 'osd' && !tree[node_id])
{
// All nodes must have non-empty IDs and valid levels
// OSDs have to actually exist
continue;
}
tree[node_id] = tree[node_id] || {};
tree[node_id].id = node_id;
tree[node_id].level = node_cfg.level;
tree[node_id].parent = node_cfg.parent;
if (node_cfg.level !== 'osd')
{
tree[node_id].children = [];
}
}
return { up_osds, levels, osd_tree: tree };
}
function make_hier_tree(global_config, tree)
{
const levels = global_config.placement_levels||{};
levels.host = levels.host || 100;
levels.osd = levels.osd || 101;
tree = { ...tree };
for (const node_id in tree)
{
tree[node_id] = { ...tree[node_id], children: [] };
}
tree[''] = { children: [] };
for (const node_id in tree)
{
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
{
continue;
}
const node_cfg = tree[node_id];
const node_level = levels[node_cfg.level] || node_cfg.level;
let parent_level = node_cfg.parent && tree[node_cfg.parent] && tree[node_cfg.parent].children
&& tree[node_cfg.parent].level;
parent_level = parent_level ? (levels[parent_level] || parent_level) : null;
// Parent's level must be less than child's; OSDs must be leaves
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
tree[parent].children.push(tree[node_id]);
}
// Delete empty nodes
let deleted = 0;
do
{
deleted = 0;
for (const node_id in tree)
{
if (tree[node_id].level !== 'osd' && (!tree[node_id].children || !tree[node_id].children.length))
{
const parent = tree[node_id].parent;
if (parent)
{
tree[parent].children = tree[parent].children.filter(c => c != tree[node_id]);
}
deleted++;
delete tree[node_id];
}
}
} while (deleted > 0);
return tree;
}
function filter_osds_by_root_node(global_config, pool_tree, root_node)
{
if (!root_node)
{
return;
}
let hier_tree = make_hier_tree(global_config, pool_tree);
let included = [ ...(hier_tree[root_node] || {}).children||[] ];
for (let i = 0; i < included.length; i++)
{
if (included[i].children)
{
included.splice(i+1, 0, ...included[i].children);
}
}
let cur = pool_tree[root_node] || {};
while (cur && cur.id)
{
included.unshift(cur);
cur = pool_tree[cur.parent||''];
}
included = included.reduce((a, c) => { a[c.id||''] = true; return a; }, {});
for (const item in pool_tree)
{
if (!included[item])
{
delete pool_tree[item];
}
}
}
function filter_osds_by_tags(orig_tree, tags)
{
if (!tags)
{
return;
}
for (const tag of (tags instanceof Array ? tags : [ tags ]))
{
for (const osd in orig_tree)
{
if (orig_tree[osd].level === 'osd' &&
(!orig_tree[osd].tags || !orig_tree[osd].tags[tag]))
{
delete orig_tree[osd];
}
}
}
}
function filter_osds_by_block_layout(orig_tree, osd_stats, block_size, bitmap_granularity, immediate_commit)
{
for (const osd in orig_tree)
{
if (orig_tree[osd].level === 'osd')
{
const osd_stat = osd_stats[osd];
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
{
delete orig_tree[osd];
}
}
}
}
function get_affinity_osds(pool_cfg, up_osds, osd_tree)
{
let aff_osds = up_osds;
if (pool_cfg.primary_affinity_tags)
{
aff_osds = Object.keys(up_osds).reduce((a, c) => { a[c] = osd_tree[c]; return a; }, {});
filter_osds_by_tags(aff_osds, pool_cfg.primary_affinity_tags);
for (const osd in aff_osds)
{
aff_osds[osd] = true;
}
}
return aff_osds;
}
module.exports = {
get_osd_tree,
make_hier_tree,
filter_osds_by_root_node,
filter_osds_by_tags,
filter_osds_by_block_layout,
get_affinity_osds,
};

View File

@ -4,7 +4,7 @@
"description": "Vitastor SDS monitor service", "description": "Vitastor SDS monitor service",
"main": "mon-main.js", "main": "mon-main.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "lint": "eslint *.js lp_optimizer/*.js scripts/*.js"
}, },
"author": "Vitaliy Filippov", "author": "Vitaliy Filippov",
"license": "UNLICENSED", "license": "UNLICENSED",
@ -14,12 +14,10 @@
}, },
"devDependencies": { "devDependencies": {
"eslint": "^8.0.0", "eslint": "^8.0.0",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-node": "^11.1.0" "eslint-plugin-node": "^11.1.0"
}, },
"engines": { "engines": {
"node": ">=12.0.0" "node": ">=12.0.0"
},
"scripts": {
"lint": "eslint *.js"
} }
} }

267
mon/pg_gen.js Normal file
View File

@ -0,0 +1,267 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { RuleCombinator } = require('./lp_optimizer/dsl_pgs.js');
const { SimpleCombinator, flatten_tree } = require('./lp_optimizer/simple_pgs.js');
const { validate_pool_cfg, get_pg_rules } = require('./pool_config.js');
const LPOptimizer = require('./lp_optimizer/lp_optimizer.js');
const { scale_pg_count } = require('./pg_utils.js');
const { make_hier_tree, filter_osds_by_root_node,
filter_osds_by_tags, filter_osds_by_block_layout, get_affinity_osds } = require('./osd_tree.js');
let seed;
function reset_rng()
{
seed = 0x5f020e43;
}
function rng()
{
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed + 2147483648;
}
function pick_primary(pool_config, osd_set, up_osds, aff_osds)
{
let alive_set;
if (pool_config.scheme === 'replicated')
{
// Prefer "affinity" OSDs
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
}
else
{
// Prefer data OSDs for EC because they can actually read something without an additional network hop
const pg_data_size = (pool_config.pg_size||0) - (pool_config.parity_chunks||0);
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
{
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && up_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
}
}
if (!alive_set.length)
{
return 0;
}
return alive_set[rng() % alive_set.length];
}
function recheck_primary(state, global_config, up_osds, osd_tree)
{
let new_config_pgs;
for (const pool_id in state.config.pools)
{
const pool_cfg = state.config.pools[pool_id];
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
{
continue;
}
const aff_osds = get_affinity_osds(pool_cfg, up_osds, osd_tree);
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.config.pgs.items[pool_id])
{
continue;
}
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_config_pgs)
{
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
return new_config_pgs;
}
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
{
const aff_osds = get_affinity_osds(state.config.pools[pool_id] || {}, up_osds, osd_tree);
const pg_items = {};
reset_rng();
new_pgs.map((osd_set, i) =>
{
osd_set = osd_set.map(osd_num => osd_num === LPOptimizer.NO_OSD ? 0 : osd_num);
pg_items[i+1] = {
osd_set,
primary: pick_primary(state.config.pools[pool_id], osd_set, up_osds, aff_osds),
};
if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
prev_pgs[i].filter(osd_num => osd_num).length > 0)
{
pg_history[i] = pg_history[i] || {};
pg_history[i].osd_sets = pg_history[i].osd_sets || [];
pg_history[i].osd_sets.push(prev_pgs[i]);
}
if (pg_history[i] && pg_history[i].osd_sets)
{
pg_history[i].osd_sets = Object.values(pg_history[i].osd_sets
.reduce((a, c) => { a[c.join(' ')] = c; return a; }, {}));
}
});
for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++)
{
// FIXME: etcd has max_txn_ops limit, and it's 128 by default
// Sooo we probably want to change our storage scheme for PG histories...
request.compare.push({
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
target: 'MOD',
mod_revision: ''+etcd_watch_revision,
result: 'LESS',
});
if (pg_history[i])
{
request.success.push({
requestPut: {
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
value: b64(JSON.stringify(pg_history[i])),
},
});
}
else
{
request.success.push({
requestDeleteRange: {
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
},
});
}
}
save_to.items = save_to.items || {};
if (!new_pgs.length)
{
delete save_to.items[pool_id];
}
else
{
save_to.items[pool_id] = pg_items;
}
}
async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels)
{
const pool_cfg = state.config.pools[pool_id];
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
{
return null;
}
let pool_tree = { ...osd_tree };
filter_osds_by_root_node(global_config, pool_tree, pool_cfg.root_node);
filter_osds_by_tags(pool_tree, pool_cfg.osd_tags);
filter_osds_by_block_layout(
pool_tree,
state.osd.stats,
pool_cfg.block_size || global_config.block_size || 131072,
pool_cfg.bitmap_granularity || global_config.bitmap_granularity || 4096,
pool_cfg.immediate_commit || global_config.immediate_commit || 'none'
);
pool_tree = make_hier_tree(global_config, pool_tree);
// First try last_clean_pgs to minimize data movement
let prev_pgs = [];
for (const pg in ((state.history.last_clean_pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.history.last_clean_pgs.items[pool_id][pg].osd_set ];
}
if (!prev_pgs.length)
{
// Fall back to config/pgs if it's empty
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
}
}
const old_pg_count = prev_pgs.length;
const optimize_cfg = {
osd_weights: Object.values(pool_tree).filter(item => item.level === 'osd').reduce((a, c) => { a[c.id] = c.size; return a; }, {}),
combinator: !global_config.use_old_pg_combinator || pool_cfg.level_placement || pool_cfg.raw_placement
// new algorithm:
? new RuleCombinator(pool_tree, get_pg_rules(pool_id, pool_cfg, global_config.placement_levels), pool_cfg.max_osd_combinations)
// old algorithm:
: new SimpleCombinator(flatten_tree(pool_tree[''].children, levels, pool_cfg.failure_domain, 'osd'), pool_cfg.pg_size, pool_cfg.max_osd_combinations),
pg_count: pool_cfg.pg_count,
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
// Re-shuffle PGs if config/pgs.hash is empty
if (old_pg_count > 0 && state.config.pgs.hash)
{
if (prev_pgs.length != pool_cfg.pg_count)
{
// Scale PG count
// Do it even if old_pg_count is already equal to pool_cfg.pg_count,
// because last_clean_pgs may still contain the old number of PGs
scale_pg_count(prev_pgs, pool_cfg.pg_count);
}
for (const pg of prev_pgs)
{
while (pg.length < pool_cfg.pg_size)
{
pg.push(0);
}
}
optimize_result = await LPOptimizer.optimize_change({
prev_pgs,
...optimize_cfg,
});
}
else
{
optimize_result = await LPOptimizer.optimize_initial(optimize_cfg);
}
console.log(`Pool ${pool_id} (${pool_cfg.name || 'unnamed'}):`);
LPOptimizer.print_change_stats(optimize_result);
let pg_effsize = pool_cfg.pg_size;
for (const pg of optimize_result.int_pgs)
{
const this_pg_size = pg.filter(osd => osd != LPOptimizer.NO_OSD).length;
if (this_pg_size && this_pg_size < pg_effsize)
{
pg_effsize = this_pg_size;
}
}
return {
pool_id,
pgs: optimize_result.int_pgs,
stats: {
total_raw_tb: optimize_result.space,
pg_real_size: pg_effsize || pool_cfg.pg_size,
raw_to_usable: (pg_effsize || pool_cfg.pg_size) / (pool_cfg.scheme === 'replicated'
? 1 : (pool_cfg.pg_size - (pool_cfg.parity_chunks||0))),
space_efficiency: optimize_result.space/(optimize_result.total_space||1),
},
};
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
module.exports = {
recheck_primary,
save_new_pgs_txn,
generate_pool_pgs,
};

169
mon/pool_config.js Normal file
View File

@ -0,0 +1,169 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { parse_level_indexes, parse_pg_dsl } = require('./lp_optimizer/dsl_pgs.js');
function validate_pool_cfg(pool_id, pool_cfg, placement_levels, warn)
{
pool_cfg.pg_size = Math.floor(pool_cfg.pg_size);
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
if (!/^[1-9]\d*$/.exec(''+pool_id))
{
if (warn)
console.log('Pool ID '+pool_id+' is invalid');
return false;
}
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' &&
pool_cfg.scheme !== 'ec' && pool_cfg.scheme !== 'jerasure')
{
if (warn)
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated", "ec" and "jerasure" required)');
return false;
}
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
pool_cfg.scheme !== 'replicated' && pool_cfg.pg_size < 3)
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_size');
return false;
}
if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size ||
pool_cfg.scheme === 'xor' && pool_cfg.pg_minsize < (pool_cfg.pg_size - 1))
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_minsize');
return false;
}
if (pool_cfg.scheme === 'xor' && pool_cfg.parity_chunks != 0 && pool_cfg.parity_chunks != 1)
{
if (warn)
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
return false;
}
if ((pool_cfg.scheme === 'ec' || pool_cfg.scheme === 'jerasure') &&
(pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
{
if (warn)
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
return false;
}
if (!pool_cfg.pg_count || pool_cfg.pg_count < 1)
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_count');
return false;
}
if (!pool_cfg.name)
{
if (warn)
console.log('Pool '+pool_id+' has empty name');
return false;
}
if (pool_cfg.max_osd_combinations < 100)
{
if (warn)
console.log('Pool '+pool_id+' has invalid max_osd_combinations (must be at least 100)');
return false;
}
if (pool_cfg.root_node && typeof(pool_cfg.root_node) != 'string')
{
if (warn)
console.log('Pool '+pool_id+' has invalid root_node (must be a string)');
return false;
}
if (pool_cfg.osd_tags && typeof(pool_cfg.osd_tags) != 'string' &&
(!(pool_cfg.osd_tags instanceof Array) || pool_cfg.osd_tags.filter(t => typeof t != 'string').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' has invalid osd_tags (must be a string or array of strings)');
return false;
}
if (pool_cfg.primary_affinity_tags && typeof(pool_cfg.primary_affinity_tags) != 'string' &&
(!(pool_cfg.primary_affinity_tags instanceof Array) || pool_cfg.primary_affinity_tags.filter(t => typeof t != 'string').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
return false;
}
if (!get_pg_rules(pool_id, pool_cfg, placement_levels, true))
{
return false;
}
return true;
}
function get_pg_rules(pool_id, pool_cfg, placement_levels, warn)
{
if (pool_cfg.level_placement)
{
const pg_size = (0|pool_cfg.pg_size);
let rules = pool_cfg.level_placement;
if (typeof rules === 'string')
{
rules = rules.split(/\s+/).map(s => s.split(/=/, 2)).reduce((a, c) => { a[c[0]] = c[1]; return a; }, {});
}
else
{
rules = { ...rules };
}
// Always add failure_domain to prevent rules from being totally incorrect
const all_diff = [];
for (let i = 1; i <= pg_size; i++)
{
all_diff.push(i);
}
rules[pool_cfg.failure_domain || 'host'] = all_diff;
placement_levels = placement_levels||{};
placement_levels.host = placement_levels.host || 100;
placement_levels.osd = placement_levels.osd || 101;
for (const k in rules)
{
if (!placement_levels[k] || typeof rules[k] !== 'string' &&
(!(rules[k] instanceof Array) ||
rules[k].filter(s => typeof s !== 'string' && typeof s !== 'number').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: level_placement should be { [level]: string | (string|number)[] }');
return null;
}
else if (rules[k].length != pg_size)
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: values in level_placement should contain exactly pg_size ('+pg_size+') items');
return null;
}
}
return parse_level_indexes(rules);
}
else if (typeof pool_cfg.raw_placement === 'string')
{
try
{
return parse_pg_dsl(pool_cfg.raw_placement);
}
catch (e)
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: invalid raw_placement: '+e.message);
}
}
else
{
let rules = [ [] ];
let prev = [ 1 ];
for (let i = 1; i < pool_cfg.pg_size; i++)
{
rules.push([ [ pool_cfg.failure_domain||'host', '!=', prev ] ]);
prev = [ ...prev, i+1 ];
}
return rules;
}
}
module.exports = {
validate_pool_cfg,
get_pg_rules,
};

286
mon/stats.js Normal file
View File

@ -0,0 +1,286 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
function derive_osd_stats(st, prev, prev_diff)
{
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
if (!st || !st.time || !prev || !prev.time || prev.time >= st.time)
{
return prev_diff || diff;
}
const timediff = BigInt(st.time*1000 - prev.time*1000);
for (const op in st.op_stats||{})
{
const pr = prev && prev.op_stats && prev.op_stats[op];
let c = st.op_stats[op];
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
const pr = prev && prev.subop_stats && prev.subop_stats[op];
let c = st.subop_stats[op];
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
let c = st.recovery_stats[op];
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
for (const pool_id in st.inode_stats||{})
{
diff.inode_stats[pool_id] = {};
for (const inode_num in st.inode_stats[pool_id])
{
const inode_diff = diff.inode_stats[pool_id][inode_num] = {};
for (const op of [ 'read', 'write', 'delete' ])
{
const c = st.inode_stats[pool_id][inode_num][op];
const pr = prev && prev.inode_stats && prev.inode_stats[pool_id] &&
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
inode_diff[op] = {
bps: (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff,
iops: n*1000n/timediff,
lat: (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/(n || 1n),
};
}
}
}
return diff;
}
// sum_op_stats(this.state.osd, this.prev_stats)
function sum_op_stats(all_osd, prev_stats)
{
for (const osd in all_osd.stats)
{
const cur = { ...all_osd.stats[osd], inode_stats: all_osd.inodestats[osd]||{} };
prev_stats.osd_diff[osd] = derive_osd_stats(
cur, prev_stats.osd_stats[osd], prev_stats.osd_diff[osd]
);
prev_stats.osd_stats[osd] = cur;
}
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
// Sum derived values instead of deriving summed
for (const osd in all_osd.state)
{
const derived = prev_stats.osd_diff[osd];
if (!all_osd.state[osd] || !derived)
{
continue;
}
for (const type in sum_diff)
{
for (const op in derived[type]||{})
{
for (const k in derived[type][op])
{
sum_diff[type][op] = sum_diff[type][op] || {};
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
}
}
}
}
return sum_diff;
}
// sum_object_counts(this.state, this.config)
function sum_object_counts(state, global_config)
{
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
for (const pool_id in state.pg.stats)
{
let object_size = 0;
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
{
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
{
object_size = state.osd.stats[osd_num].block_size;
break;
}
}
const pool_cfg = (state.config.pools[pool_id]||{});
if (!object_size)
{
object_size = pool_cfg.block_size || global_config.block_size || 131072;
}
if (pool_cfg.scheme !== 'replicated')
{
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
}
object_size = BigInt(object_size);
for (const pg_num in state.pg.stats[pool_id])
{
const st = state.pg.stats[pool_id][pg_num];
if (st)
{
for (const k in object_counts)
{
if (st[k+'_count'])
{
object_counts[k] += BigInt(st[k+'_count']);
object_bytes[k] += BigInt(st[k+'_count']) * object_size;
}
}
}
}
}
return { object_counts, object_bytes };
}
// sum_inode_stats(this.state, this.prev_stats)
function sum_inode_stats(state, prev_stats)
{
const inode_stats = {};
const inode_stub = () => ({
raw_used: 0n,
read: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
write: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
delete: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
});
const seen_pools = {};
for (const pool_id in state.config.pools)
{
seen_pools[pool_id] = true;
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
state.pool.stats[pool_id].used_raw_tb = 0n;
}
for (const osd_num in state.osd.space)
{
for (const pool_id in state.osd.space[osd_num])
{
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
if (!seen_pools[pool_id])
{
state.pool.stats[pool_id].used_raw_tb = 0n;
seen_pools[pool_id] = true;
}
inode_stats[pool_id] = inode_stats[pool_id] || {};
for (const inode_num in state.osd.space[osd_num][pool_id])
{
const u = BigInt(state.osd.space[osd_num][pool_id][inode_num]||0);
if (inode_num)
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
inode_stats[pool_id][inode_num].raw_used += u;
}
state.pool.stats[pool_id].used_raw_tb += u;
}
}
}
for (const pool_id in seen_pools)
{
const used = state.pool.stats[pool_id].used_raw_tb;
state.pool.stats[pool_id].used_raw_tb = Number(used)/1024/1024/1024/1024;
}
for (const osd_num in state.osd.state)
{
const ist = state.osd.inodestats[osd_num];
if (!ist || !state.osd.state[osd_num])
{
continue;
}
for (const pool_id in ist)
{
inode_stats[pool_id] = inode_stats[pool_id] || {};
for (const inode_num in ist[pool_id])
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
for (const op of [ 'read', 'write', 'delete' ])
{
inode_stats[pool_id][inode_num][op].count += BigInt(ist[pool_id][inode_num][op].count||0);
inode_stats[pool_id][inode_num][op].usec += BigInt(ist[pool_id][inode_num][op].usec||0);
inode_stats[pool_id][inode_num][op].bytes += BigInt(ist[pool_id][inode_num][op].bytes||0);
}
}
}
}
for (const osd in state.osd.state)
{
const osd_diff = prev_stats.osd_diff[osd];
if (!osd_diff || !state.osd.state[osd])
{
continue;
}
for (const pool_id in osd_diff.inode_stats)
{
for (const inode_num in prev_stats.osd_diff[osd].inode_stats[pool_id])
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
for (const op of [ 'read', 'write', 'delete' ])
{
const op_diff = prev_stats.osd_diff[osd].inode_stats[pool_id][inode_num][op] || {};
const op_st = inode_stats[pool_id][inode_num][op];
op_st.bps += op_diff.bps;
op_st.iops += op_diff.iops;
op_st.lat += op_diff.lat;
op_st.n_osd = (op_st.n_osd || 0) + 1;
}
}
}
}
for (const pool_id in inode_stats)
{
for (const inode_num in inode_stats[pool_id])
{
let nonzero = inode_stats[pool_id][inode_num].raw_used > 0;
for (const op of [ 'read', 'write', 'delete' ])
{
const op_st = inode_stats[pool_id][inode_num][op];
if (op_st.n_osd)
{
op_st.lat /= BigInt(op_st.n_osd);
delete op_st.n_osd;
}
if (op_st.bps > 0 || op_st.iops > 0)
nonzero = true;
}
if (!nonzero && (!state.config.inode[pool_id] || !state.config.inode[pool_id][inode_num]))
{
// Deleted inode (no data, no I/O, no config)
delete inode_stats[pool_id][inode_num];
}
}
}
return { inode_stats, seen_pools };
}
function serialize_bigints(obj)
{
obj = { ...obj };
for (const k in obj)
{
if (typeof obj[k] == 'bigint')
{
obj[k] = ''+obj[k];
}
else if (typeof obj[k] == 'object')
{
obj[k] = serialize_bigints(obj[k]);
}
}
return obj;
}
module.exports = {
derive_osd_stats,
sum_op_stats,
sum_object_counts,
sum_inode_stats,
serialize_bigints,
};

View File

@ -9,7 +9,7 @@ for i in "$DIR"/qemu-*-vitastor.patch "$DIR"/pve-qemu-*-vitastor.patch; do
echo '===================================================================' >> $i echo '===================================================================' >> $i
echo '--- /dev/null' >> $i echo '--- /dev/null' >> $i
echo '+++ a/block/vitastor.c' >> $i echo '+++ a/block/vitastor.c' >> $i
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/client/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i
cat "$DIR"/../src/qemu_driver.c | sed 's/^/+/' >> $i cat "$DIR"/../src/client/qemu_driver.c | sed 's/^/+/' >> $i
fi fi
done done

View File

@ -1,5 +1,5 @@
# This is an attempt to automatically build patched RPM specs # This is an attempt to automatically build patched RPM specs
# More or less broken, better use *.spec.patch for now (and copy src/qemu_driver.c to SOURCES/qemu-vitastor.c) # More or less broken, better use *.spec.patch for now (and copy src/client/qemu_driver.c to SOURCES/qemu-vitastor.c)
# Build packages for CentOS 8 inside a container # Build packages for CentOS 8 inside a container
# cd ..; podman build -t qemu-el8 -v `pwd`/packages:/root/packages -f rpm/qemu-el8.Dockerfile . # cd ..; podman build -t qemu-el8 -v `pwd`/packages:/root/packages -f rpm/qemu-el8.Dockerfile .

View File

@ -108,10 +108,11 @@ npm install --production
cd .. cd ..
mkdir -p %buildroot/usr/lib/vitastor mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files %files

View File

@ -105,10 +105,11 @@ npm install --production
cd .. cd ..
mkdir -p %buildroot/usr/lib/vitastor mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files %files

View File

@ -98,10 +98,11 @@ npm install --production
cd .. cd ..
mkdir -p %buildroot/usr/lib/vitastor mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files %files

View File

@ -72,300 +72,28 @@ add_dependencies(test build_tests)
include_directories( include_directories(
../ ../
${CMAKE_SOURCE_DIR}/src/blockstore
${CMAKE_SOURCE_DIR}/src/cmd
${CMAKE_SOURCE_DIR}/src/client
${CMAKE_SOURCE_DIR}/src/disk_tool
${CMAKE_SOURCE_DIR}/src/kv
${CMAKE_SOURCE_DIR}/src/nfs
${CMAKE_SOURCE_DIR}/src/osd
${CMAKE_SOURCE_DIR}/src/test
${CMAKE_SOURCE_DIR}/src/util
/usr/include/jerasure /usr/include/jerasure
${LIBURING_INCLUDE_DIRS} ${LIBURING_INCLUDE_DIRS}
${IBVERBS_INCLUDE_DIRS} ${IBVERBS_INCLUDE_DIRS}
) )
# libvitastor_blk.so add_subdirectory(blockstore)
add_library(vitastor_blk SHARED add_subdirectory(cmd)
allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_disk.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp add_subdirectory(client)
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp crc32c.c ringloop.cpp add_subdirectory(disk_tool)
) add_subdirectory(kv)
target_link_libraries(vitastor_blk add_subdirectory(nfs)
${LIBURING_LIBRARIES} add_subdirectory(osd)
tcmalloc_minimal add_subdirectory(test)
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so
add_library(fio_vitastor_blk SHARED
fio_engine.cpp
../json11/json11.cpp
)
target_link_libraries(fio_vitastor_blk
vitastor_blk
)
endif (${WITH_FIO})
# libvitastor_common.a
set(MSGR_RDMA "")
if (IBVERBS_LIBRARIES)
set(MSGR_RDMA "msgr_rdma.cpp")
endif (IBVERBS_LIBRARIES)
add_library(vitastor_common STATIC
epoll_manager.cpp etcd_state_client.cpp messenger.cpp addr_util.cpp
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ringloop.cpp ../json11/json11.cpp
http_client.cpp osd_ops.cpp pg_states.cpp timerfd_manager.cpp str_util.cpp ${MSGR_RDMA}
)
target_compile_options(vitastor_common PUBLIC -fPIC)
# vitastor-osd
add_executable(vitastor-osd
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
osd_cluster.cpp osd_rmw.cpp osd_scrub.cpp osd_primary_describe.cpp
)
target_link_libraries(vitastor-osd
vitastor_common
vitastor_blk
Jerasure
${ISAL_LIBRARIES}
${IBVERBS_LIBRARIES}
)
if (${WITH_FIO})
# libfio_vitastor_sec.so
add_library(fio_vitastor_sec SHARED
fio_sec_osd.cpp
rw_blocking.cpp
addr_util.cpp
)
target_link_libraries(fio_vitastor_sec
tcmalloc_minimal
)
endif (${WITH_FIO})
# libvitastor_client.so
add_library(vitastor_client SHARED
cluster_client.cpp
cluster_client_list.cpp
cluster_client_wb.cpp
vitastor_c.cpp
cli_common.cpp
cli_alloc_osd.cpp
cli_status.cpp
cli_describe.cpp
cli_fix.cpp
cli_ls.cpp
cli_create.cpp
cli_modify.cpp
cli_flatten.cpp
cli_merge.cpp
cli_rm_data.cpp
cli_rm.cpp
cli_rm_osd.cpp
cli_pool_cfg.cpp
cli_pool_create.cpp
cli_pool_ls.cpp
cli_pool_modify.cpp
cli_pool_rm.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
target_link_libraries(vitastor_client
vitastor_common
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor.so
add_library(fio_vitastor SHARED
fio_cluster.cpp
)
target_link_libraries(fio_vitastor
vitastor_client
)
endif (${WITH_FIO})
# vitastor-nbd
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
add_executable(vitastor-nbd
nbd_proxy.cpp
)
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
# libvitastor_kv.so
add_library(vitastor_kv SHARED
kv_db.cpp
kv_db.h
)
target_link_libraries(vitastor_kv
vitastor_client
)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
# vitastor-kv
add_executable(vitastor-kv
kv_cli.cpp
)
target_link_libraries(vitastor-kv
vitastor_kv
)
add_executable(vitastor-kv-stress
kv_stress.cpp
)
target_link_libraries(vitastor-kv-stress
vitastor_kv
)
# vitastor-nfs
add_executable(vitastor-nfs
nfs_proxy.cpp
nfs_block.cpp
nfs_kv.cpp
nfs_kv_create.cpp
nfs_kv_getattr.cpp
nfs_kv_link.cpp
nfs_kv_lookup.cpp
nfs_kv_read.cpp
nfs_kv_readdir.cpp
nfs_kv_remove.cpp
nfs_kv_rename.cpp
nfs_kv_setattr.cpp
nfs_kv_write.cpp
nfs_fsstat.cpp
nfs_mount.cpp
nfs_portmap.cpp
sha256.c
nfs/xdr_impl.cpp
nfs/rpc_xdr.cpp
nfs/portmap_xdr.cpp
nfs/nfs_xdr.cpp
)
target_link_libraries(vitastor-nfs
vitastor_client
vitastor_kv
)
# vitastor-cli
add_executable(vitastor-cli
cli.cpp
)
target_link_libraries(vitastor-cli
vitastor_client
)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
# vitastor-disk
add_executable(vitastor-disk
disk_tool.cpp disk_simple_offsets.cpp
disk_tool_journal.cpp disk_tool_meta.cpp disk_tool_prepare.cpp disk_tool_resize.cpp disk_tool_udev.cpp disk_tool_utils.cpp disk_tool_upgrade.cpp
crc32c.c str_util.cpp ../json11/json11.cpp rw_blocking.cpp allocator.cpp ringloop.cpp blockstore_disk.cpp
)
target_link_libraries(vitastor-disk
tcmalloc_minimal
${LIBURING_LIBRARIES}
)
if (${WITH_QEMU})
# qemu_driver.so
add_library(qemu_vitastor SHARED
qemu_driver.c
)
target_compile_options(qemu_vitastor PUBLIC -DVITASTOR_SOURCE_TREE)
target_include_directories(qemu_vitastor PUBLIC
../qemu/b/qemu
../qemu/include
${GLIB_INCLUDE_DIRS}
)
target_link_libraries(qemu_vitastor
vitastor_client
)
set_target_properties(qemu_vitastor PROPERTIES
PREFIX ""
OUTPUT_NAME "block-vitastor"
)
endif (${WITH_QEMU})
### Test stubs
# stub_osd, stub_bench, osd_test
add_executable(stub_osd stub_osd.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(stub_osd tcmalloc_minimal)
add_executable(stub_bench stub_bench.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(stub_bench tcmalloc_minimal)
add_executable(osd_test osd_test.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(osd_test tcmalloc_minimal)
# osd_rmw_test
add_executable(osd_rmw_test EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test)
add_test(NAME osd_rmw_test COMMAND osd_rmw_test)
if (ISAL_LIBRARIES)
add_executable(osd_rmw_test_je EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_compile_definitions(osd_rmw_test_je PUBLIC -DNO_ISAL)
target_link_libraries(osd_rmw_test_je Jerasure tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test_je)
add_test(NAME osd_rmw_test_jerasure COMMAND osd_rmw_test_je)
endif (ISAL_LIBRARIES)
# stub_uring_osd
add_executable(stub_uring_osd
stub_uring_osd.cpp
)
target_link_libraries(stub_uring_osd
vitastor_common
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
tcmalloc_minimal
)
# osd_peering_pg_test
add_executable(osd_peering_pg_test EXCLUDE_FROM_ALL osd_peering_pg_test.cpp osd_peering_pg.cpp)
target_link_libraries(osd_peering_pg_test tcmalloc_minimal)
add_dependencies(build_tests osd_peering_pg_test)
add_test(NAME osd_peering_pg_test COMMAND osd_peering_pg_test)
# test_allocator
add_executable(test_allocator EXCLUDE_FROM_ALL test_allocator.cpp allocator.cpp)
add_dependencies(build_tests test_allocator)
add_test(NAME test_allocator COMMAND test_allocator)
# test_cas
add_executable(test_cas
test_cas.cpp
)
target_link_libraries(test_cas
vitastor_client
)
# test_crc32
add_executable(test_crc32
test_crc32.cpp
)
target_link_libraries(test_crc32
vitastor_blk
)
# test_cluster_client
add_executable(test_cluster_client
EXCLUDE_FROM_ALL
test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)
add_dependencies(build_tests test_cluster_client)
add_test(NAME test_cluster_client COMMAND test_cluster_client)
## test_blockstore, test_shit
#add_executable(test_blockstore test_blockstore.cpp)
#target_link_libraries(test_blockstore blockstore)
#add_executable(test_shit test_shit.cpp osd_peering_pg.cpp)
#target_link_libraries(test_shit ${LIBURING_LIBRARIES} m)
### Install ### Install
@ -378,7 +106,7 @@ install(
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
) )
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/vitastor.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/client/vitastor.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
if (${WITH_FIO}) if (${WITH_FIO})
install(TARGETS fio_vitastor fio_vitastor_blk fio_vitastor_sec LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(TARGETS fio_vitastor fio_vitastor_blk fio_vitastor_sec LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif (${WITH_FIO}) endif (${WITH_FIO})

View File

@ -0,0 +1,27 @@
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
# libvitastor_blk.so
add_library(vitastor_blk SHARED
../util/allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_disk.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp ../util/crc32c.c ../util/ringloop.cpp
)
target_link_libraries(vitastor_blk
${LIBURING_LIBRARIES}
tcmalloc_minimal
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so
add_library(fio_vitastor_blk SHARED
fio_engine.cpp
../../json11/json11.cpp
)
target_link_libraries(fio_vitastor_blk
vitastor_blk
)
endif (${WITH_FIO})

95
src/client/CMakeLists.txt Normal file
View File

@ -0,0 +1,95 @@
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
# libvitastor_common.a
set(MSGR_RDMA "")
if (IBVERBS_LIBRARIES)
set(MSGR_RDMA "msgr_rdma.cpp")
endif (IBVERBS_LIBRARIES)
add_library(vitastor_common STATIC
../util/epoll_manager.cpp etcd_state_client.cpp messenger.cpp ../util/addr_util.cpp
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ../util/ringloop.cpp ../../json11/json11.cpp
http_client.cpp osd_ops.cpp pg_states.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ${MSGR_RDMA}
)
target_compile_options(vitastor_common PUBLIC -fPIC)
# libvitastor_client.so
add_library(vitastor_client SHARED
cluster_client.cpp
cluster_client_list.cpp
cluster_client_wb.cpp
vitastor_c.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "client/vitastor_c.h")
target_link_libraries(vitastor_client
vitastor_common
vitastor_cli
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
if (${WITH_FIO})
# libfio_vitastor.so
add_library(fio_vitastor SHARED
fio_cluster.cpp
)
target_link_libraries(fio_vitastor
vitastor_client
)
# libfio_vitastor_sec.so
add_library(fio_vitastor_sec SHARED
fio_sec_osd.cpp
../util/rw_blocking.cpp
../util/addr_util.cpp
)
target_link_libraries(fio_vitastor_sec
tcmalloc_minimal
)
endif (${WITH_FIO})
# vitastor-nbd
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
add_executable(vitastor-nbd
nbd_proxy.cpp
)
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
if (${WITH_QEMU})
# qemu_driver.so
add_library(qemu_vitastor SHARED
qemu_driver.c
)
target_compile_options(qemu_vitastor PUBLIC -DVITASTOR_SOURCE_TREE)
target_include_directories(qemu_vitastor PUBLIC
../../qemu/b/qemu
../../qemu/include
${GLIB_INCLUDE_DIRS}
)
target_link_libraries(qemu_vitastor
vitastor_client
)
set_target_properties(qemu_vitastor PROPERTIES
PREFIX ""
OUTPUT_NAME "block-vitastor"
)
endif (${WITH_QEMU})
# test_cluster_client
add_executable(test_cluster_client
EXCLUDE_FROM_ALL
../test/test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp ../test/mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ../../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client BEFORE PUBLIC ${CMAKE_SOURCE_DIR}/src/test/mock)
add_dependencies(build_tests test_cluster_client)
add_test(NAME test_cluster_client COMMAND test_cluster_client)

View File

@ -20,9 +20,11 @@
#define MAX_DATA_BLOCK_SIZE 128*1024*1024 #define MAX_DATA_BLOCK_SIZE 128*1024*1024
#define DEFAULT_BITMAP_GRANULARITY 4096 #define DEFAULT_BITMAP_GRANULARITY 4096
#ifndef IMMEDIATE_NONE
#define IMMEDIATE_NONE 0 #define IMMEDIATE_NONE 0
#define IMMEDIATE_SMALL 1 #define IMMEDIATE_SMALL 1
#define IMMEDIATE_ALL 2 #define IMMEDIATE_ALL 2
#endif
struct etcd_kv_t struct etcd_kv_t
{ {

View File

@ -200,6 +200,11 @@ vitastor_c *vitastor_c_create_epoll_json(const char **options, int options_len)
return self; return self;
} }
void* vitastor_c_get_internal_client(vitastor_c *client)
{
return client->cli;
}
void vitastor_c_destroy(vitastor_c *client) void vitastor_c_destroy(vitastor_c *client)
{ {
delete client->cli; delete client->cli;
@ -379,4 +384,12 @@ int vitastor_c_inode_get_readonly(void *handle)
return watch->cfg.readonly; return watch->cfg.readonly;
} }
uint32_t vitastor_c_inode_get_immediate_commit(vitastor_c *client, uint64_t inode_num)
{
auto pool_it = client->cli->st_cli.pool_config.find(INODE_POOL(inode_num));
if (pool_it == client->cli->st_cli.pool_config.end())
return 0;
return pool_it->second.immediate_commit;
}
} }

View File

@ -7,11 +7,18 @@
#define VITASTOR_QEMU_PROXY_H #define VITASTOR_QEMU_PROXY_H
// C API wrapper version // C API wrapper version
#define VITASTOR_C_API_VERSION 3 #define VITASTOR_C_API_VERSION 4
#ifndef POOL_ID_BITS #ifndef POOL_ID_BITS
#define POOL_ID_BITS 16 #define POOL_ID_BITS 16
#endif #endif
#ifndef IMMEDIATE_NONE
#define IMMEDIATE_NONE 0
#define IMMEDIATE_SMALL 1
#define IMMEDIATE_ALL 2
#endif
#include <stdint.h> #include <stdint.h>
#include <sys/uio.h> #include <sys/uio.h>
@ -41,6 +48,7 @@ vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_ho
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level); int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len); vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
vitastor_c *vitastor_c_create_epoll_json(const char **options, int options_len); vitastor_c *vitastor_c_create_epoll_json(const char **options, int options_len);
void* vitastor_c_get_internal_client(vitastor_c *client);
void vitastor_c_destroy(vitastor_c *client); void vitastor_c_destroy(vitastor_c *client);
int vitastor_c_is_ready(vitastor_c *client); int vitastor_c_is_ready(vitastor_c *client);
int vitastor_c_uring_register_eventfd(vitastor_c *client); int vitastor_c_uring_register_eventfd(vitastor_c *client);
@ -64,6 +72,8 @@ uint64_t vitastor_c_inode_get_num(void *handle);
uint32_t vitastor_c_inode_get_block_size(vitastor_c *client, uint64_t inode_num); uint32_t vitastor_c_inode_get_block_size(vitastor_c *client, uint64_t inode_num);
uint32_t vitastor_c_inode_get_bitmap_granularity(vitastor_c *client, uint64_t inode_num); uint32_t vitastor_c_inode_get_bitmap_granularity(vitastor_c *client, uint64_t inode_num);
int vitastor_c_inode_get_readonly(void *handle); int vitastor_c_inode_get_readonly(void *handle);
// Returns IMMEDIATE_ALL, IMMEDIATE_SMALL or IMMEDIATE_NONE
uint32_t vitastor_c_inode_get_immediate_commit(vitastor_c *client, uint64_t inode_num);
#ifdef __cplusplus #ifdef __cplusplus
} }

34
src/cmd/CMakeLists.txt Normal file
View File

@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
# libvitastor_cli.a
add_library(vitastor_cli STATIC
cli_common.cpp
cli_alloc_osd.cpp
cli_status.cpp
cli_describe.cpp
cli_fix.cpp
cli_ls.cpp
cli_create.cpp
cli_modify.cpp
cli_flatten.cpp
cli_merge.cpp
cli_rm_data.cpp
cli_rm.cpp
cli_rm_osd.cpp
cli_pool_cfg.cpp
cli_pool_create.cpp
cli_pool_ls.cpp
cli_pool_modify.cpp
cli_pool_rm.cpp
)
target_compile_options(vitastor_cli PUBLIC -fPIC)
# vitastor-cli
add_executable(vitastor-cli
cli.cpp
)
target_link_libraries(vitastor-cli
vitastor_client
)

Some files were not shown because too many files have changed in this diff Show More