Compare commits

..

241 Commits

Author SHA1 Message Date
c3ec6dd3b9 Add postponed send loop to QEMU driver
Some checks failed
Test / test_interrupted_rebalance (push) Successful in 7m24s
Test / test_interrupted_rebalance_imm (push) Failing after 10m5s
Test / test_interrupted_rebalance_ec (push) Failing after 10m6s
Test / test_interrupted_rebalance_ec_imm (push) Failing after 10m15s
Test / test_failure_domain (push) Successful in 7s
Test / test_snapshot (push) Successful in 18s
Test / test_snapshot_ec (push) Successful in 18s
Test / test_minsize_1 (push) Successful in 11s
Test / test_move_reappear (push) Successful in 17s
Test / test_rm (push) Successful in 11s
Test / test_snapshot_chain (push) Successful in 1m2s
Test / test_snapshot_chain_ec (push) Successful in 2m14s
Test / test_snapshot_down (push) Successful in 20s
Test / test_snapshot_down_ec (push) Successful in 20s
Test / test_splitbrain (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 3m10s
Test / test_rebalance_verify_imm (push) Successful in 3m3s
Test / test_rebalance_verify_ec (push) Successful in 3m26s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m12s
Test / test_write (push) Successful in 40s
Test / test_write_xor (push) Successful in 55s
Test / test_write_no_same (push) Successful in 14s
Test / test_heal_pg_size_2 (push) Successful in 3m51s
Test / test_heal_ec (push) Failing after 10m14s
Test / test_scrub (push) Successful in 59s
Test / test_scrub_zero_osd_2 (push) Successful in 45s
Test / test_scrub_xor (push) Successful in 42s
Test / test_scrub_pg_size_3 (push) Successful in 1m2s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 50s
Test / test_scrub_ec (push) Successful in 34s
2023-07-03 02:29:01 +03:00
c8d61568b5 Fix primary_read bitmap buffers being freed too early (use-after-free)
All checks were successful
Test / test_etcd_fail (push) Successful in 1m43s
Test / test_interrupted_rebalance (push) Successful in 1m23s
Test / test_interrupted_rebalance_imm (push) Successful in 1m33s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m39s
Test / test_failure_domain (push) Successful in 10s
Test / test_snapshot (push) Successful in 19s
Test / test_snapshot_ec (push) Successful in 17s
Test / test_minsize_1 (push) Successful in 13s
Test / test_move_reappear (push) Successful in 17s
Test / test_rm (push) Successful in 11s
Test / test_snapshot_chain (push) Successful in 1m7s
Test / test_snapshot_chain_ec (push) Successful in 2m1s
Test / test_snapshot_down (push) Successful in 22s
Test / test_snapshot_down_ec (push) Successful in 20s
Test / test_splitbrain (push) Successful in 12s
Test / test_rebalance_verify (push) Successful in 2m51s
Test / test_rebalance_verify_imm (push) Successful in 2m49s
Test / test_rebalance_verify_ec (push) Successful in 4m6s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m3s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 34s
Test / test_write_no_same (push) Successful in 12s
Test / test_heal_pg_size_2 (push) Successful in 3m11s
Test / test_heal_ec (push) Successful in 3m54s
Test / test_scrub (push) Successful in 44s
Test / test_scrub_zero_osd_2 (push) Successful in 30s
Test / test_scrub_xor (push) Successful in 31s
Test / test_scrub_pg_size_3 (push) Successful in 37s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Test / test_scrub_ec (push) Successful in 1m31s
2023-06-30 12:47:45 +03:00
84ed3c6395 Fix CAS retries during snapshot merge
Some checks failed
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Failing after 10m10s
Test / test_interrupted_rebalance (push) Successful in 2m46s
Test / test_interrupted_rebalance_imm (push) Successful in 1m35s
Test / test_interrupted_rebalance_ec (push) Successful in 1m45s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m53s
Test / test_failure_domain (push) Successful in 7s
Test / test_snapshot (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 29s
Test / test_minsize_1 (push) Successful in 13s
Test / test_move_reappear (push) Failing after 49s
Test / test_rm (push) Successful in 12s
Test / test_snapshot_chain (push) Successful in 1m35s
Test / test_snapshot_chain_ec (push) Successful in 2m9s
Test / test_splitbrain (push) Successful in 14s
Test / test_rebalance_verify (push) Successful in 3m0s
Test / test_rebalance_verify_imm (push) Successful in 3m5s
Test / test_rebalance_verify_ec (push) Successful in 3m20s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m24s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 38s
Test / test_write_no_same (push) Successful in 14s
Test / test_heal_pg_size_2 (push) Successful in 4m0s
Test / test_heal_ec (push) Successful in 3m58s
Test / test_scrub (push) Successful in 58s
Test / test_scrub_zero_osd_2 (push) Successful in 38s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_3 (push) Successful in 35s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 30s
Test / test_scrub_ec (push) Successful in 23s
2023-06-30 02:30:23 +03:00
a7b57386c0 Do not print last subcommand result twice during "inverse" snapshot merge
Some checks failed
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 1m28s
Test / test_interrupted_rebalance (push) Successful in 1m45s
Test / test_interrupted_rebalance_imm (push) Successful in 2m46s
Test / test_interrupted_rebalance_ec (push) Successful in 1m44s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m30s
Test / test_failure_domain (push) Successful in 8s
Test / test_snapshot (push) Successful in 30s
Test / test_snapshot_ec (push) Successful in 29s
Test / test_minsize_1 (push) Failing after 19s
Test / test_move_reappear (push) Successful in 17s
Test / test_rm (push) Successful in 14s
Test / test_snapshot_chain (push) Successful in 1m57s
Test / test_snapshot_chain_ec (push) Successful in 2m42s
Test / test_splitbrain (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 3m23s
Test / test_rebalance_verify_imm (push) Successful in 3m17s
Test / test_rebalance_verify_ec (push) Successful in 4m5s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m50s
Test / test_write (push) Successful in 41s
Test / test_write_xor (push) Failing after 3m17s
Test / test_write_no_same (push) Successful in 13s
Test / test_heal_pg_size_2 (push) Successful in 3m12s
Test / test_heal_ec (push) Successful in 4m13s
Test / test_scrub (push) Successful in 38s
Test / test_scrub_zero_osd_2 (push) Successful in 36s
Test / test_scrub_xor (push) Successful in 33s
Test / test_scrub_pg_size_3 (push) Successful in 46s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s
Test / test_scrub_ec (push) Successful in 33s
2023-06-30 02:07:10 +03:00
9d4ea5f764 Fix inverse parent selection which prevented the use of optimized merge in case of only 1 snapshot 2023-06-30 01:39:11 +03:00
000e4944ec Remove "inverse parent" image name index key from etcd during snapshot merge 2023-06-30 01:23:30 +03:00
8426616d89 Warn about unfinished deletions in rm-data
Some checks failed
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 1m46s
Test / test_interrupted_rebalance (push) Successful in 2m34s
Test / test_interrupted_rebalance_imm (push) Successful in 2m50s
Test / test_interrupted_rebalance_ec (push) Successful in 1m35s
Test / test_interrupted_rebalance_ec_imm (push) Failing after 10m6s
Test / test_failure_domain (push) Successful in 12s
Test / test_snapshot (push) Successful in 21s
Test / test_snapshot_ec (push) Successful in 27s
Test / test_minsize_1 (push) Successful in 12s
Test / test_move_reappear (push) Failing after 48s
Test / test_rm (push) Successful in 13s
Test / test_snapshot_chain (push) Successful in 1m35s
Test / test_snapshot_chain_ec (push) Successful in 2m11s
Test / test_splitbrain (push) Successful in 15s
Test / test_rebalance_verify (push) Successful in 3m4s
Test / test_rebalance_verify_imm (push) Successful in 3m0s
Test / test_rebalance_verify_ec (push) Successful in 3m25s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m58s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 34s
Test / test_write_no_same (push) Successful in 12s
Test / test_heal_pg_size_2 (push) Successful in 4m3s
Test / test_heal_ec (push) Successful in 3m53s
Test / test_scrub (push) Successful in 26s
Test / test_scrub_zero_osd_2 (push) Successful in 35s
Test / test_scrub_xor (push) Successful in 35s
Test / test_scrub_pg_size_3 (push) Successful in 45s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Test / test_scrub_ec (push) Successful in 25s
2023-06-30 01:18:25 +03:00
1a841344ec Print progress of all operations during snapshot merge 2023-06-30 01:13:47 +03:00
8603b5cb1d Do not hang on inactive OSDs during delete, report and skip them instead
All checks were successful
Test / test_create_nomaxid (push) Successful in 8s
Test / test_etcd_fail (push) Successful in 46s
Test / test_interrupted_rebalance (push) Successful in 3m13s
Test / test_interrupted_rebalance_imm (push) Successful in 1m30s
Test / test_interrupted_rebalance_ec (push) Successful in 2m18s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m34s
Test / test_failure_domain (push) Successful in 7s
Test / test_snapshot (push) Successful in 29s
Test / test_snapshot_ec (push) Successful in 32s
Test / test_minsize_1 (push) Successful in 12s
Test / test_move_reappear (push) Successful in 17s
Test / test_rm (push) Successful in 12s
Test / test_snapshot_chain (push) Successful in 1m37s
Test / test_snapshot_chain_ec (push) Successful in 2m21s
Test / test_splitbrain (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 3m14s
Test / test_rebalance_verify_imm (push) Successful in 3m8s
Test / test_rebalance_verify_ec (push) Successful in 3m18s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m20s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 43s
Test / test_write_no_same (push) Successful in 13s
Test / test_heal_pg_size_2 (push) Successful in 3m55s
Test / test_heal_ec (push) Successful in 4m2s
Test / test_scrub (push) Successful in 56s
Test / test_scrub_zero_osd_2 (push) Successful in 51s
Test / test_scrub_xor (push) Successful in 29s
Test / test_scrub_pg_size_3 (push) Successful in 1m2s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 32s
2023-06-30 00:15:16 +03:00
f12b8e45a9 Remove /usr/local/bin path from make-etcd
Some checks failed
Test / test_create_nomaxid (push) Successful in 6s
Test / test_etcd_fail (push) Successful in 51s
Test / test_interrupted_rebalance (push) Successful in 7m28s
Test / test_interrupted_rebalance_imm (push) Failing after 10m10s
Test / test_interrupted_rebalance_ec (push) Successful in 7m21s
Test / test_interrupted_rebalance_ec_imm (push) Failing after 10m7s
Test / test_failure_domain (push) Successful in 7s
Test / test_snapshot (push) Successful in 17s
Test / test_snapshot_ec (push) Successful in 18s
Test / test_minsize_1 (push) Successful in 11s
Test / test_move_reappear (push) Successful in 17s
Test / test_rm (push) Successful in 10s
Test / test_snapshot_chain (push) Successful in 1m19s
Test / test_snapshot_chain_ec (push) Successful in 1m43s
Test / test_splitbrain (push) Successful in 12s
Test / test_rebalance_verify (push) Successful in 3m9s
Test / test_rebalance_verify_imm (push) Successful in 3m3s
Test / test_rebalance_verify_ec (push) Successful in 3m31s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m33s
Test / test_write (push) Successful in 49s
Test / test_write_xor (push) Successful in 58s
Test / test_write_no_same (push) Successful in 16s
Test / test_heal_pg_size_2 (push) Successful in 3m47s
Test / test_heal_ec (push) Successful in 3m59s
Test / test_scrub (push) Successful in 58s
Test / test_scrub_zero_osd_2 (push) Successful in 42s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_3 (push) Successful in 39s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 31s
Test / test_scrub_ec (push) Successful in 31s
2023-06-29 23:49:31 +03:00
878ccbb6ea Fix snapshot chain "down-merge" ("up-merge" worked well...)
All checks were successful
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_interrupted_rebalance (push) Successful in 7m46s
Test / test_interrupted_rebalance_imm (push) Successful in 1m31s
Test / test_interrupted_rebalance_ec (push) Successful in 1m42s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m22s
Test / test_failure_domain (push) Successful in 7s
Test / test_snapshot (push) Successful in 18s
Test / test_snapshot_ec (push) Successful in 20s
Test / test_minsize_1 (push) Successful in 11s
Test / test_move_reappear (push) Successful in 16s
Test / test_rm (push) Successful in 11s
Test / test_snapshot_chain (push) Successful in 1m11s
Test / test_snapshot_chain_ec (push) Successful in 2m13s
Test / test_splitbrain (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 3m26s
Test / test_rebalance_verify_imm (push) Successful in 3m27s
Test / test_rebalance_verify_ec (push) Successful in 4m23s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m32s
Test / test_write (push) Successful in 31s
Test / test_write_xor (push) Successful in 33s
Test / test_write_no_same (push) Successful in 13s
Test / test_heal_pg_size_2 (push) Successful in 3m37s
Test / test_heal_ec (push) Successful in 4m43s
Test / test_scrub (push) Successful in 45s
Test / test_scrub_zero_osd_2 (push) Successful in 37s
Test / test_scrub_xor (push) Successful in 39s
Test / test_scrub_pg_size_3 (push) Successful in 55s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 33s
Test / test_scrub_ec (push) Successful in 32s
2023-06-29 00:47:21 +03:00
b14220b4d0 Add a test for snapshot chain 2023-06-29 00:47:21 +03:00
181d6ba407 Move npm install to CI build and remove it from tests 2023-06-28 23:41:46 +03:00
63c2b9832c Fix chained (snapshot) reads often not working at all with chain size > 2
All checks were successful
Test / test_change_pg_count (push) Successful in 42s
Test / test_change_pg_count_ec (push) Successful in 36s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_etcd_fail (push) Successful in 1m6s
Test / test_failure_domain (push) Successful in 10s
Test / test_interrupted_rebalance (push) Successful in 1m52s
Test / test_interrupted_rebalance_imm (push) Successful in 1m49s
Test / test_interrupted_rebalance_ec (push) Successful in 2m2s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m19s
Test / test_minsize_1 (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 3m10s
Test / test_rebalance_verify_imm (push) Successful in 2m56s
Test / test_rebalance_verify_ec (push) Successful in 3m6s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m9s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 27s
Test / test_splitbrain (push) Successful in 25s
Test / test_write (push) Successful in 1m30s
Test / test_write_xor (push) Successful in 2m14s
Test / test_write_no_same (push) Successful in 20s
Test / test_heal_pg_size_2 (push) Successful in 4m5s
Test / test_heal_ec (push) Successful in 4m11s
Test / test_scrub (push) Successful in 48s
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_3 (push) Successful in 53s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 41s
2023-06-28 18:54:03 +03:00
10e2e6a7c8 Add a patch for pve-qemu 8.0 2023-06-24 01:33:52 +03:00
a598428992 Add a note about PVE 8.0 2023-06-24 01:29:28 +03:00
08a677b684 Notes about rebuilding pve-qemu 2023-06-24 01:00:21 +03:00
7c8fbdad16 Fix typo 2023-06-22 02:07:33 +03:00
2f9353df60 Markdown preprocessor to compile multiple .md files into one 2023-06-22 01:27:38 +03:00
57c744f288 Fix some cross-references in docs 2023-06-22 01:16:03 +03:00
a11ca56fb1 Fix compatibility of the QEMU driver with iothread
All checks were successful
Test / test_change_pg_count_ec (push) Successful in 55s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 57s
Test / test_failure_domain (push) Successful in 10s
Test / test_interrupted_rebalance (push) Successful in 1m53s
Test / test_interrupted_rebalance_imm (push) Successful in 1m31s
Test / test_interrupted_rebalance_ec (push) Successful in 2m39s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m46s
Test / test_minsize_1 (push) Successful in 55s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 3m13s
Test / test_rebalance_verify_imm (push) Successful in 2m52s
Test / test_rebalance_verify_ec (push) Successful in 3m10s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m25s
Test / test_rm (push) Successful in 18s
Test / test_snapshot (push) Successful in 36s
Test / test_snapshot_ec (push) Successful in 26s
Test / test_splitbrain (push) Successful in 13s
Test / test_write (push) Successful in 38s
Test / test_write_xor (push) Successful in 1m50s
Test / test_write_no_same (push) Successful in 14s
Test / test_heal_pg_size_2 (push) Successful in 4m17s
Test / test_heal_ec (push) Successful in 4m18s
Test / test_scrub (push) Successful in 50s
Test / test_scrub_zero_osd_2 (push) Successful in 51s
Test / test_scrub_xor (push) Successful in 39s
Test / test_scrub_pg_size_3 (push) Successful in 53s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 42s
Test / test_scrub_ec (push) Successful in 28s
2023-06-21 02:11:28 +03:00
b84927b340 Fix \n in nbd_proxy
All checks were successful
Test / test_change_pg_count (push) Successful in 41s
Test / test_change_pg_count_ec (push) Successful in 37s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 1m9s
Test / test_failure_domain (push) Successful in 9s
Test / test_interrupted_rebalance (push) Successful in 1m54s
Test / test_interrupted_rebalance_imm (push) Successful in 1m47s
Test / test_interrupted_rebalance_ec (push) Successful in 1m17s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m26s
Test / test_minsize_1 (push) Successful in 19s
Test / test_move_reappear (push) Successful in 50s
Test / test_rebalance_verify (push) Successful in 3m6s
Test / test_rebalance_verify_imm (push) Successful in 3m3s
Test / test_rebalance_verify_ec (push) Successful in 3m10s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m55s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 24s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 1m38s
Test / test_write_xor (push) Successful in 2m13s
Test / test_write_no_same (push) Successful in 23s
Test / test_heal_pg_size_2 (push) Successful in 4m5s
Test / test_scrub (push) Successful in 42s
Test / test_scrub_zero_osd_2 (push) Successful in 33s
Test / test_scrub_xor (push) Successful in 31s
Test / test_scrub_pg_size_3 (push) Successful in 40s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Test / test_scrub_ec (push) Successful in 30s
2023-06-19 01:48:58 +03:00
83cacba226 Fix patched-qemu build 2023-06-19 01:47:55 +03:00
2c8f0bc6d5 Add a note about Debian 12 2023-06-19 01:08:41 +03:00
7ae5b0e368 Add patch for libvirt 9.0 2023-06-19 01:07:08 +03:00
926be372fd Release 0.9.2
All checks were successful
Test / test_change_pg_count (push) Successful in 35s
Test / test_change_pg_count_ec (push) Successful in 34s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 1m1s
Test / test_failure_domain (push) Successful in 9s
Test / test_interrupted_rebalance (push) Successful in 1m26s
Test / test_interrupted_rebalance_imm (push) Successful in 2m15s
Test / test_interrupted_rebalance_ec (push) Successful in 1m31s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_minsize_1 (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 2m49s
Test / test_rebalance_verify_imm (push) Successful in 2m45s
Test / test_rebalance_verify_ec (push) Successful in 3m9s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m3s
Test / test_rm (push) Successful in 12s
Test / test_snapshot (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 35s
Test / test_splitbrain (push) Successful in 21s
Test / test_write (push) Successful in 1m10s
Test / test_write_xor (push) Successful in 2m23s
Test / test_write_no_same (push) Successful in 19s
Test / test_heal_pg_size_2 (push) Successful in 3m58s
Test / test_heal_ec (push) Successful in 4m8s
Test / test_scrub (push) Successful in 1m3s
Test / test_scrub_zero_osd_2 (push) Successful in 36s
Test / test_scrub_xor (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 51s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Test / test_scrub_ec (push) Successful in 43s
- Measure and report scrub I/O statistics in vitastor-cli status
- Make aggregated statistics in vitastor-cli status much smoother
  (first derive, then sum instead of first summing and then deriving)
- Fix an old rare bug leading to journal corruption
  (try to use scrub if you think you're affected...)
- Do not start EC PGs without at least <data chunks> OSDs in each old set
  (prevents spurious read errors with EC during reconnections/restarts)
- Fix failed assert(!scrub_list_op) on OSD restart with pending scrubs
- Fix future planned scrubs not starting because of incorrect time comparison
- Build packages for Debian 12 (Bookworm)
2023-06-18 19:44:33 +03:00
6222779b52 Support debian bookworm (12) build 2023-06-18 19:44:33 +03:00
a4186e20aa First derive, then sum per-OSD statistics instead of first summing and then deriving
All checks were successful
Test / test_change_pg_count (push) Successful in 43s
Test / test_change_pg_count_ec (push) Successful in 37s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_failure_domain (push) Successful in 16s
Test / test_interrupted_rebalance (push) Successful in 1m49s
Test / test_interrupted_rebalance_imm (push) Successful in 1m38s
Test / test_interrupted_rebalance_ec (push) Successful in 1m49s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s
Test / test_minsize_1 (push) Successful in 13s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 3m2s
Test / test_rebalance_verify_imm (push) Successful in 2m53s
Test / test_rebalance_verify_ec (push) Successful in 3m9s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m27s
Test / test_rm (push) Successful in 17s
Test / test_snapshot (push) Successful in 34s
Test / test_snapshot_ec (push) Successful in 29s
Test / test_splitbrain (push) Successful in 22s
Test / test_write (push) Successful in 37s
Test / test_write_xor (push) Successful in 44s
Test / test_write_no_same (push) Successful in 16s
Test / test_heal_pg_size_2 (push) Successful in 3m31s
Test / test_heal_ec (push) Successful in 4m20s
Test / test_scrub (push) Successful in 38s
Test / test_scrub_zero_osd_2 (push) Successful in 30s
Test / test_scrub_xor (push) Successful in 32s
Test / test_scrub_pg_size_3 (push) Successful in 42s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 34s
This makes statistics reported by vitastor-cli status much smoother
2023-06-18 01:32:24 +03:00
c74a424930 Report scrub I/O in vitastor-cli status 2023-06-17 21:11:21 +03:00
32f2c4dd27 Measure scrub statistics 2023-06-17 20:56:26 +03:00
3ad16b9a1a Fix auto_scrubs not starting because of < vs <= =))
All checks were successful
Test / test_change_pg_count (push) Successful in 41s
Test / test_change_pg_count_ec (push) Successful in 36s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 1m20s
Test / test_failure_domain (push) Successful in 12s
Test / test_interrupted_rebalance (push) Successful in 2m1s
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s
Test / test_interrupted_rebalance_ec (push) Successful in 1m48s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_move_reappear (push) Successful in 51s
Test / test_rebalance_verify (push) Successful in 3m19s
Test / test_rebalance_verify_imm (push) Successful in 3m9s
Test / test_rebalance_verify_ec (push) Successful in 3m21s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m4s
Test / test_rm (push) Successful in 17s
Test / test_snapshot (push) Successful in 23s
Test / test_snapshot_ec (push) Successful in 26s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 1m35s
Test / test_write_xor (push) Successful in 2m29s
Test / test_write_no_same (push) Successful in 29s
Test / test_heal_pg_size_2 (push) Successful in 4m11s
Test / test_heal_ec (push) Successful in 5m4s
Test / test_scrub (push) Successful in 55s
Test / test_scrub_zero_osd_2 (push) Successful in 41s
Test / test_scrub_xor (push) Successful in 37s
Test / test_scrub_pg_size_3 (push) Successful in 57s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 46s
Test / test_scrub_ec (push) Successful in 31s
2023-06-17 17:32:21 +03:00
1c2df841c2 Fix failed assert(!scrub_list_op) on OSD restart with pending scrubs
All checks were successful
Test / test_change_pg_count (push) Successful in 38s
Test / test_change_pg_count_ec (push) Successful in 38s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_etcd_fail (push) Successful in 53s
Test / test_failure_domain (push) Successful in 15s
Test / test_interrupted_rebalance (push) Successful in 2m3s
Test / test_interrupted_rebalance_imm (push) Successful in 2m55s
Test / test_interrupted_rebalance_ec (push) Successful in 2m43s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m31s
Test / test_minsize_1 (push) Successful in 14s
Test / test_move_reappear (push) Successful in 17s
Test / test_rebalance_verify (push) Successful in 3m9s
Test / test_rebalance_verify_imm (push) Successful in 3m10s
Test / test_rebalance_verify_ec (push) Successful in 3m15s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m45s
Test / test_rm (push) Successful in 18s
Test / test_snapshot (push) Successful in 35s
Test / test_snapshot_ec (push) Successful in 18s
Test / test_splitbrain (push) Successful in 15s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 1m12s
Test / test_write_no_same (push) Successful in 15s
Test / test_heal_pg_size_2 (push) Successful in 3m41s
Test / test_scrub (push) Successful in 33s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_3 (push) Successful in 51s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 31s
Test / test_scrub_ec (push) Successful in 26s
2023-06-17 17:02:54 +03:00
aa5dacc7a9 Do not start EC PGs without at least pg_data_size connections to old OSDs from each set
All checks were successful
Test / test_change_pg_count (push) Successful in 36s
Test / test_change_pg_count_ec (push) Successful in 38s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 1m13s
Test / test_failure_domain (push) Successful in 11s
Test / test_interrupted_rebalance (push) Successful in 1m51s
Test / test_interrupted_rebalance_imm (push) Successful in 1m43s
Test / test_interrupted_rebalance_ec (push) Successful in 1m47s
Test / test_minsize_1 (push) Successful in 43s
Test / test_move_reappear (push) Successful in 43s
Test / test_rebalance_verify (push) Successful in 3m16s
Test / test_rebalance_verify_imm (push) Successful in 3m9s
Test / test_rebalance_verify_ec (push) Successful in 3m8s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m10s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 22s
Test / test_snapshot_ec (push) Successful in 25s
Test / test_splitbrain (push) Successful in 15s
Test / test_write (push) Successful in 1m44s
Test / test_write_xor (push) Successful in 2m29s
Test / test_write_no_same (push) Successful in 22s
Test / test_heal_pg_size_2 (push) Successful in 4m37s
Test / test_heal_ec (push) Successful in 4m4s
Test / test_scrub (push) Successful in 48s
Test / test_scrub_zero_osd_2 (push) Successful in 41s
Test / test_scrub_xor (push) Successful in 39s
Test / test_scrub_pg_size_3 (push) Successful in 47s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 41s
Test / test_scrub_ec (push) Successful in 34s
2023-06-17 02:16:30 +03:00
affe8fc270 Raise timeout also for add_osd and rebalance_verify 2023-06-17 00:29:03 +03:00
4fdc49bdc7 Add another assert-type check (it does not fire, just as a safety measure for the future) 2023-06-17 00:07:22 +03:00
86b4682975 Put get_trim_pos into the "critical section". Fixes rare journal corruption issue
The consequence of this issue was that in some very rare cases (only reproduced
under load in CI when running 4+ tests in parallel) small write data written to
journal could overwrite journal entries.

Also add an assert-type safety check to be able to catch this issue in the
future again in case of a regression.
2023-06-17 00:06:42 +03:00
bdd48e4cf1 Release 0.9.1
All checks were successful
Test / test_change_pg_count (push) Successful in 35s
Test / test_change_pg_count_ec (push) Successful in 32s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 6s
Test / test_etcd_fail (push) Successful in 49s
Test / test_failure_domain (push) Successful in 9s
Test / test_interrupted_rebalance (push) Successful in 1m4s
Test / test_interrupted_rebalance_imm (push) Successful in 57s
Test / test_interrupted_rebalance_ec (push) Successful in 1m2s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 53s
Test / test_minsize_1 (push) Successful in 14s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 1m45s
Test / test_rebalance_verify_imm (push) Successful in 1m41s
Test / test_rebalance_verify_ec (push) Successful in 1m53s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m48s
Test / test_rm (push) Successful in 10s
Test / test_snapshot (push) Successful in 14s
Test / test_snapshot_ec (push) Successful in 16s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 49s
Test / test_write_xor (push) Successful in 1m1s
Test / test_write_no_same (push) Successful in 11s
Test / test_heal_pg_size_2 (push) Successful in 3m12s
Test / test_scrub (push) Successful in 28s
Test / test_scrub_zero_osd_2 (push) Successful in 22s
Test / test_scrub_xor (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 26s
Test / test_scrub_ec (push) Successful in 24s
- Fix "Client XX command out of sync" messages sometimes happening on OSD reconnections
- Fix a bug where EC reads parallel with writes to the same object failed with -ERANGE error
- Slightly reduce the amount of metadata writes during journal flushing
- Correctly unmap NBD volumes when Proxmox forces map_volume use (with SWTPM and maybe some other cases)
2023-06-10 11:42:49 +03:00
af8c3411cd Correctly unmap NBD when Proxmox forces map_volume use (with SWTPM and maybe something else) 2023-06-08 01:31:49 +03:00
9c405009f3 Use randrw in test_heal
All checks were successful
Test / test_change_pg_count (push) Successful in 33s
Test / test_change_pg_count_ec (push) Successful in 32s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 51s
Test / test_failure_domain (push) Successful in 12s
Test / test_interrupted_rebalance (push) Successful in 1m4s
Test / test_interrupted_rebalance_imm (push) Successful in 1m0s
Test / test_interrupted_rebalance_ec (push) Successful in 1m20s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 55s
Test / test_minsize_1 (push) Successful in 22s
Test / test_move_reappear (push) Successful in 22s
Test / test_rebalance_verify (push) Successful in 1m47s
Test / test_rebalance_verify_imm (push) Successful in 1m45s
Test / test_rebalance_verify_ec (push) Successful in 2m3s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m52s
Test / test_rm (push) Successful in 11s
Test / test_snapshot (push) Successful in 15s
Test / test_snapshot_ec (push) Successful in 16s
Test / test_splitbrain (push) Successful in 12s
Test / test_write (push) Successful in 31s
Test / test_write_xor (push) Successful in 47s
Test / test_write_no_same (push) Successful in 11s
Test / test_heal_pg_size_2 (push) Successful in 3m12s
Test / test_scrub (push) Successful in 24s
Test / test_scrub_zero_osd_2 (push) Successful in 26s
Test / test_scrub_xor (push) Successful in 26s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 42s
Test / test_scrub_ec (push) Successful in 33s
2023-06-03 00:49:53 +03:00
f9fbea25a4 Remove double write when old and new locations are in the same metadata block
Also add another metadata entry fool-safety check which, ideally, will never fire %)
2023-06-03 00:47:10 +03:00
2c9a10d081 Fix an idiotic bug leading to failed reads with -ERANGE with EC :D 2023-06-03 00:44:52 +03:00
150968070f Slightly improve some debug prints
All checks were successful
Test / test_change_pg_count (push) Successful in 30s
Test / test_change_pg_count_ec (push) Successful in 31s
Test / test_change_pg_size (push) Successful in 7s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 45s
Test / test_failure_domain (push) Successful in 8s
Test / test_interrupted_rebalance (push) Successful in 1m3s
Test / test_interrupted_rebalance_imm (push) Successful in 55s
Test / test_interrupted_rebalance_ec (push) Successful in 1m30s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 57s
Test / test_minsize_1 (push) Successful in 20s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 1m49s
Test / test_rebalance_verify_imm (push) Successful in 1m40s
Test / test_rebalance_verify_ec (push) Successful in 2m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m51s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 16s
Test / test_snapshot_ec (push) Successful in 17s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 41s
Test / test_write_xor (push) Successful in 49s
Test / test_write_no_same (push) Successful in 10s
Test / test_heal_pg_size_2 (push) Successful in 2m58s
Test / test_scrub (push) Successful in 23s
Test / test_scrub_zero_osd_2 (push) Successful in 19s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 24s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 28s
Test / test_scrub_ec (push) Successful in 25s
2023-05-29 01:04:16 +03:00
cdfc74665b Close client FDs only when destroying the client, after handling all async reads/writes
All checks were successful
Test / test_change_pg_count (push) Successful in 50s
Test / test_change_pg_count_ec (push) Successful in 58s
Test / test_change_pg_size (push) Successful in 17s
Test / test_create_nomaxid (push) Successful in 19s
Test / test_etcd_fail (push) Successful in 58s
Test / test_failure_domain (push) Successful in 14s
Test / test_interrupted_rebalance (push) Successful in 1m31s
Test / test_interrupted_rebalance_imm (push) Successful in 1m0s
Test / test_interrupted_rebalance_ec (push) Successful in 1m23s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m16s
Test / test_minsize_1 (push) Successful in 38s
Test / test_move_reappear (push) Successful in 54s
Test / test_rebalance_verify (push) Successful in 2m26s
Test / test_rebalance_verify_imm (push) Successful in 2m7s
Test / test_rebalance_verify_ec (push) Successful in 2m51s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m15s
Test / test_rm (push) Successful in 22s
Test / test_snapshot (push) Successful in 40s
Test / test_snapshot_ec (push) Successful in 34s
Test / test_splitbrain (push) Successful in 23s
Test / test_write (push) Successful in 1m7s
Test / test_write_xor (push) Successful in 2m13s
Test / test_write_no_same (push) Successful in 18s
Test / test_heal_pg_size_2 (push) Successful in 5m14s
Test / test_scrub (push) Successful in 36s
Test / test_scrub_zero_osd_2 (push) Successful in 40s
Test / test_scrub_xor (push) Successful in 54s
Test / test_scrub_pg_size_3 (push) Successful in 54s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m12s
Test / test_scrub_ec (push) Successful in 1m10s
Fixes "Client XX command out of sync" sometimes happening on reconnections
2023-05-25 00:52:43 +03:00
3f60fecd7c Fix typo 2023-05-21 18:37:01 +03:00
3b4cf29e65 Release 0.9.0
New features:
- Scrubbing! Check documentation: [auto_scrub](src/branch/master/docs/config/osd.en.md#auto_scrub)
- Document online-updatable configuration parameters

Bug fixes:
- Fix NaN during PG optimisation if there are nonexisting OSDs in node_placement
- Fix monitor crash on pool deletion
- Clear journal_device and meta_device before initialising the next OSD in automatic mode
- Sync unsynced deletes before overwriting them with a lower version
  (reproducted mostly/only after scrubbing)
2023-05-21 15:07:14 +03:00
eeaba11ebd Use fio 3.27-8 for alma9 2023-05-21 14:48:26 +03:00
aea567cfbd Slightly improve scrub docs
Some checks failed
Test / test_cas (push) Successful in 9s
Test / test_change_pg_count (push) Successful in 52s
Test / test_change_pg_count_ec (push) Successful in 1m0s
Test / test_change_pg_size (push) Successful in 16s
Test / test_create_nomaxid (push) Successful in 16s
Test / test_etcd_fail (push) Successful in 56s
Test / test_failure_domain (push) Successful in 13s
Test / test_interrupted_rebalance (push) Successful in 1m24s
Test / test_interrupted_rebalance_imm (push) Successful in 1m10s
Test / test_interrupted_rebalance_ec (push) Successful in 1m9s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m6s
Test / test_minsize_1 (push) Failing after 19s
Test / test_move_reappear (push) Successful in 28s
Test / test_rebalance_verify (push) Successful in 2m25s
Test / test_rebalance_verify_imm (push) Successful in 2m19s
Test / test_rebalance_verify_ec (push) Successful in 3m3s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m20s
Test / test_rm (push) Successful in 16s
Test / test_snapshot (push) Successful in 21s
Test / test_snapshot_ec (push) Successful in 28s
Test / test_splitbrain (push) Successful in 20s
Test / test_write_xor (push) Has started running
Test / test_heal_pg_size_2 (push) Has started running
Test / test_write (push) Has started running
Test / test_scrub (push) Has been cancelled
Test / test_scrub_zero_osd_2 (push) Has been cancelled
Test / test_scrub_xor (push) Has been cancelled
Test / test_scrub_pg_size_3 (push) Has been cancelled
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled
Test / test_scrub_ec (push) Has been cancelled
2023-05-21 12:52:30 +03:00
ce02f47de6 Allow to disable scrub_find_best 2023-05-21 12:33:38 +03:00
5fd3208616 Add version archive link to docs 2023-05-21 11:47:33 +03:00
5997b76535 Remove -runtime=10 from fio params in test_scrub, it was breaking the test in CI :D
All checks were successful
Test / test_change_pg_count (push) Successful in 49s
Test / test_change_pg_count_ec (push) Successful in 2m53s
Test / test_change_pg_size (push) Successful in 17s
Test / test_create_nomaxid (push) Successful in 13s
Test / test_etcd_fail (push) Successful in 1m0s
Test / test_failure_domain (push) Successful in 13s
Test / test_interrupted_rebalance (push) Successful in 1m18s
Test / test_interrupted_rebalance_imm (push) Successful in 1m3s
Test / test_interrupted_rebalance_ec (push) Successful in 1m40s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 52s
Test / test_minsize_1 (push) Successful in 22s
Test / test_move_reappear (push) Successful in 23s
Test / test_rebalance_verify (push) Successful in 2m32s
Test / test_rebalance_verify_imm (push) Successful in 2m29s
Test / test_rebalance_verify_ec (push) Successful in 2m55s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m30s
Test / test_rm (push) Successful in 22s
Test / test_snapshot (push) Successful in 26s
Test / test_snapshot_ec (push) Successful in 38s
Test / test_splitbrain (push) Successful in 28s
Test / test_write (push) Successful in 1m5s
Test / test_write_xor (push) Successful in 2m13s
Test / test_write_no_same (push) Successful in 18s
Test / test_heal_ec (push) Successful in 5m27s
Test / test_scrub (push) Successful in 36s
Test / test_scrub_zero_osd_2 (push) Successful in 39s
Test / test_scrub_xor (push) Successful in 1m2s
Test / test_scrub_pg_size_3 (push) Successful in 50s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 29s
Test / test_scrub_ec (push) Successful in 1m25s
2023-05-21 11:03:59 +03:00
f1961157f0 Fix brute-force error locator for EC n+k with k > 2
Some checks failed
Test / test_change_pg_count_ec (push) Successful in 2m23s
Test / test_change_pg_size (push) Successful in 20s
Test / test_create_nomaxid (push) Successful in 16s
Test / test_etcd_fail (push) Successful in 55s
Test / test_failure_domain (push) Successful in 12s
Test / test_interrupted_rebalance (push) Successful in 1m18s
Test / test_interrupted_rebalance_imm (push) Successful in 1m9s
Test / test_interrupted_rebalance_ec (push) Successful in 1m23s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m8s
Test / test_minsize_1 (push) Successful in 22s
Test / test_move_reappear (push) Successful in 28s
Test / test_rebalance_verify (push) Successful in 2m17s
Test / test_rebalance_verify_imm (push) Successful in 2m19s
Test / test_rebalance_verify_ec (push) Successful in 3m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m22s
Test / test_rm (push) Successful in 23s
Test / test_snapshot (push) Successful in 20s
Test / test_snapshot_ec (push) Successful in 34s
Test / test_splitbrain (push) Successful in 33s
Test / test_write (push) Successful in 1m15s
Test / test_write_xor (push) Successful in 2m6s
Test / test_write_no_same (push) Successful in 16s
Test / test_heal_pg_size_2 (push) Successful in 5m22s
Test / test_heal_ec (push) Successful in 5m31s
Test / test_scrub (push) Successful in 29s
Test / test_scrub_zero_osd_2 (push) Successful in 27s
Test / test_scrub_xor (push) Successful in 22s
Test / test_scrub_pg_size_3 (push) Failing after 37s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 29s
Test / test_scrub_ec (push) Failing after 33s
2023-05-21 00:57:14 +03:00
88c1ba0790 Fix compile errors with gcc 10
Some checks reported warnings
Test / build (push) Has started running
Test / buildenv (push) Successful in 11s
Test / make_test (push) Has been cancelled
Test / test_add_osd (push) Has been cancelled
Test / test_cas (push) Has been cancelled
Test / test_change_pg_count (push) Has been cancelled
Test / test_change_pg_count_ec (push) Has been cancelled
Test / test_change_pg_size (push) Has been cancelled
Test / test_create_nomaxid (push) Has been cancelled
Test / test_etcd_fail (push) Has been cancelled
Test / test_failure_domain (push) Has been cancelled
Test / test_interrupted_rebalance (push) Has been cancelled
Test / test_interrupted_rebalance_imm (push) Has been cancelled
Test / test_interrupted_rebalance_ec (push) Has been cancelled
Test / test_interrupted_rebalance_ec_imm (push) Has been cancelled
Test / test_minsize_1 (push) Has been cancelled
Test / test_move_reappear (push) Has been cancelled
Test / test_rebalance_verify (push) Has been cancelled
Test / test_rebalance_verify_imm (push) Has been cancelled
Test / test_rebalance_verify_ec (push) Has been cancelled
Test / test_rebalance_verify_ec_imm (push) Has been cancelled
Test / test_rm (push) Has been cancelled
Test / test_snapshot (push) Has been cancelled
Test / test_snapshot_ec (push) Has been cancelled
Test / test_splitbrain (push) Has been cancelled
Test / test_write (push) Has been cancelled
Test / test_write_xor (push) Has been cancelled
Test / test_write_no_same (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
2023-05-20 23:20:09 +03:00
b5bd611683 Add scrub tests to CI 2023-05-20 23:20:09 +03:00
fa90b5a4e7 Schedule automatic scrubs correctly (not just after previous scrub) 2023-05-20 23:20:09 +03:00
8d40ad99a6 Add scrub documentation 2023-05-20 23:19:39 +03:00
3475772b07 Add configuration online update documentation 2023-05-20 23:19:39 +03:00
25fcedf6e7 Enable vitastor-cli fix in test 2023-05-20 23:19:39 +03:00
6ca20aa194 Allow scrub to fix corrupted object states 2023-05-20 23:19:39 +03:00
4bfd994341 Sync unsynced deletes before overwriting them with a lower version 2023-05-20 23:19:39 +03:00
59e959dcbb Do not die when "different versions are returned from subops" 2023-05-20 23:19:39 +03:00
a9581f0739 Handle dirty deletes during read correctly O_o 2023-05-20 23:19:39 +03:00
105a405b0a Implement vitastor-cli fix 2023-05-20 23:19:39 +03:00
d55d7d5326 Add scrub test 2023-05-20 23:19:39 +03:00
0e5d0e02a9 Add "vitastor-cli describe" command 2023-05-20 23:19:39 +03:00
0439981a66 Implement "describe object(s)" operation
Required to implement fixing inconsistent objects in vitastor-cli
2023-05-20 23:19:39 +03:00
6648f6bb6e Implement ambiguity detection during scrub 2023-05-20 23:19:39 +03:00
281be547eb Implement brute-force error locator for EC 2023-05-20 23:19:39 +03:00
0c78dd7178 Add no_scrub flag 2023-05-20 23:19:39 +03:00
3c924397e7 Store next scrub timestamp instead of last scrub timestamp 2023-05-20 23:19:39 +03:00
c3bd26193d Implement PG scrub runner 2023-05-20 23:19:39 +03:00
43b77d7619 Implement scrubbing "data path" - OSD_OP_SCRUB 2023-05-20 23:19:39 +03:00
a6d846863b Add min/max stripe and limit to OP_LIST 2023-05-20 23:19:39 +03:00
8dc427b43c Retry failed reads (including chained and RMW) from other replicas 2023-05-20 23:19:39 +03:00
bf2112653b Refcount object_states 2023-05-20 23:19:39 +03:00
0538a484b3 Add corrupted object state 2023-05-20 23:19:39 +03:00
97720fa6b4 Remove unused capture
Some checks reported warnings
Test / buildenv (push) Successful in 12s
Test / build (push) Has started running
Test / make_test (push) Has been cancelled
Test / test_add_osd (push) Has been cancelled
Test / test_cas (push) Has been cancelled
Test / test_change_pg_count (push) Has been cancelled
Test / test_change_pg_count_ec (push) Has been cancelled
Test / test_change_pg_size (push) Has been cancelled
Test / test_create_nomaxid (push) Has been cancelled
Test / test_etcd_fail (push) Has been cancelled
Test / test_failure_domain (push) Has been cancelled
Test / test_interrupted_rebalance (push) Has been cancelled
Test / test_interrupted_rebalance_imm (push) Has been cancelled
Test / test_interrupted_rebalance_ec (push) Has been cancelled
Test / test_interrupted_rebalance_ec_imm (push) Has been cancelled
Test / test_minsize_1 (push) Has been cancelled
Test / test_move_reappear (push) Has been cancelled
Test / test_rebalance_verify (push) Has been cancelled
Test / test_rebalance_verify_imm (push) Has been cancelled
Test / test_rebalance_verify_ec (push) Has been cancelled
Test / test_rebalance_verify_ec_imm (push) Has been cancelled
Test / test_rm (push) Has been cancelled
Test / test_snapshot (push) Has been cancelled
Test / test_snapshot_ec (push) Has been cancelled
Test / test_splitbrain (push) Has been cancelled
Test / test_write (push) Has been cancelled
Test / test_write_xor (push) Has been cancelled
Test / test_write_no_same (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
2023-05-20 22:58:51 +03:00
e60e352df6 Improve vitastor-nbd documentation 2023-05-20 22:58:51 +03:00
98077a1712 Remove unused dependencies from CSI 2023-05-18 11:54:47 +03:00
1c7d53996d Reweight only 2 OSDs to zero in test_rebalance_verify, otherwise the test does not pass with EC 3+2
All checks were successful
Test / buildenv (push) Successful in 9s
Test / build (push) Successful in 2m20s
Test / test_cas (push) Successful in 11s
Test / make_test (push) Successful in 35s
Test / test_change_pg_size (push) Successful in 22s
Test / test_change_pg_count (push) Successful in 52s
Test / test_create_nomaxid (push) Successful in 19s
Test / test_change_pg_count_ec (push) Successful in 1m3s
Test / test_failure_domain (push) Successful in 13s
Test / test_etcd_fail (push) Successful in 1m0s
Test / test_interrupted_rebalance_imm (push) Successful in 1m3s
Test / test_interrupted_rebalance (push) Successful in 1m14s
Test / test_minsize_1 (push) Successful in 22s
Test / test_move_reappear (push) Successful in 18s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m1s
Test / test_interrupted_rebalance_ec (push) Successful in 1m38s
Test / test_rebalance_verify (push) Successful in 2m20s
Test / test_rebalance_verify_imm (push) Successful in 2m1s
Test / test_rm (push) Successful in 26s
Test / test_rebalance_verify_ec (push) Successful in 2m30s
Test / test_snapshot (push) Successful in 22s
Test / test_snapshot_ec (push) Successful in 28s
Test / test_splitbrain (push) Successful in 20s
Test / test_write (push) Successful in 48s
Test / test_write_no_same (push) Successful in 15s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m11s
Test / test_write_xor (push) Successful in 1m28s
Test / test_heal_pg_size_2 (push) Successful in 4m48s
Test / test_heal_ec (push) Successful in 5m12s
Test / test_add_osd (push) Successful in 1m20s
2023-05-18 00:42:40 +03:00
2ca07b1ea7 Raise timeout in test_rebalance_verify
Some checks failed
Test / buildenv (push) Successful in 10s
Test / build (push) Successful in 2m27s
Test / test_cas (push) Successful in 11s
Test / make_test (push) Successful in 34s
Test / test_change_pg_size (push) Successful in 22s
Test / test_change_pg_count (push) Successful in 52s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_failure_domain (push) Successful in 12s
Test / test_etcd_fail (push) Successful in 1m0s
Test / test_interrupted_rebalance (push) Successful in 1m15s
Test / test_add_osd (push) Successful in 2m33s
Test / test_interrupted_rebalance_imm (push) Successful in 1m4s
Test / test_change_pg_count_ec (push) Successful in 2m52s
Test / test_minsize_1 (push) Successful in 19s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 53s
Test / test_move_reappear (push) Successful in 21s
Test / test_interrupted_rebalance_ec (push) Successful in 1m36s
Test / test_rebalance_verify (push) Successful in 2m22s
Test / test_rebalance_verify_imm (push) Successful in 2m22s
Test / test_rm (push) Successful in 15s
Test / test_snapshot (push) Successful in 19s
Test / test_snapshot_ec (push) Successful in 27s
Test / test_rebalance_verify_ec (push) Failing after 3m6s
Test / test_splitbrain (push) Successful in 17s
Test / test_write_no_same (push) Successful in 20s
Test / test_rebalance_verify_ec_imm (push) Failing after 3m9s
Test / test_write (push) Successful in 49s
Test / test_write_xor (push) Successful in 1m17s
Test / test_heal_ec (push) Successful in 4m53s
Test / test_heal_pg_size_2 (push) Failing after 10m10s
2023-05-17 01:58:01 +03:00
022176aa98 Fix NaN during PG optimisation if there are nonexisting OSDs in node_placement
Some checks failed
Test / buildenv (push) Successful in 11s
Test / build (push) Successful in 2m28s
Test / test_cas (push) Successful in 12s
Test / make_test (push) Successful in 40s
Test / test_change_pg_size (push) Successful in 23s
Test / test_change_pg_count (push) Successful in 1m1s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_failure_domain (push) Successful in 11s
Test / test_change_pg_count_ec (push) Successful in 1m35s
Test / test_etcd_fail (push) Successful in 51s
Test / test_add_osd (push) Successful in 2m27s
Test / test_interrupted_rebalance (push) Successful in 1m14s
Test / test_interrupted_rebalance_imm (push) Successful in 1m3s
Test / test_minsize_1 (push) Successful in 28s
Test / test_move_reappear (push) Successful in 41s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m13s
Test / test_interrupted_rebalance_ec (push) Successful in 1m49s
Test / test_rebalance_verify (push) Successful in 2m21s
Test / test_rm (push) Successful in 15s
Test / test_rebalance_verify_imm (push) Successful in 2m12s
Test / test_snapshot (push) Successful in 20s
Test / test_snapshot_ec (push) Successful in 28s
Test / test_splitbrain (push) Successful in 23s
Test / test_write_no_same (push) Successful in 17s
Test / test_write (push) Successful in 1m6s
Test / test_write_xor (push) Successful in 1m42s
Test / test_heal_pg_size_2 (push) Successful in 4m57s
Test / test_heal_ec (push) Successful in 4m42s
Test / test_rebalance_verify_ec_imm (push) Failing after 2m19s
Test / test_rebalance_verify_ec (push) Failing after 2m25s
2023-05-17 01:20:30 +03:00
120e3fa7bc Fix pool deletion
Some checks failed
Test / buildenv (push) Successful in 10s
Test / build (push) Successful in 2m32s
Test / test_cas (push) Successful in 13s
Test / make_test (push) Successful in 35s
Test / test_change_pg_size (push) Successful in 21s
Test / test_change_pg_count (push) Successful in 53s
Test / test_create_nomaxid (push) Successful in 17s
Test / test_change_pg_count_ec (push) Successful in 1m3s
Test / test_failure_domain (push) Successful in 16s
Test / test_etcd_fail (push) Successful in 1m3s
Test / test_add_osd (push) Successful in 2m36s
Test / test_interrupted_rebalance_imm (push) Successful in 1m10s
Test / test_interrupted_rebalance (push) Successful in 1m24s
Test / test_minsize_1 (push) Failing after 28s
Test / test_interrupted_rebalance_ec (push) Successful in 1m8s
Test / test_move_reappear (push) Failing after 1m2s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m8s
Test / test_rebalance_verify_imm (push) Successful in 2m12s
Test / test_rebalance_verify (push) Successful in 2m22s
Test / test_rm (push) Successful in 21s
Test / test_snapshot (push) Successful in 24s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m19s
Test / test_snapshot_ec (push) Successful in 27s
Test / test_splitbrain (push) Successful in 20s
Test / test_rebalance_verify_ec (push) Successful in 2m33s
Test / test_write_no_same (push) Successful in 15s
Test / test_write (push) Successful in 1m14s
Test / test_write_xor (push) Successful in 2m9s
Test / test_heal_ec (push) Successful in 4m25s
Test / test_heal_pg_size_2 (push) Successful in 4m59s
2023-05-17 00:45:59 +03:00
629999f789 Clear journal_device and meta_device before initialising the next OSD in automatic mode 2023-05-15 23:58:55 +03:00
93eca11ba2 Fix rhel 9 installation docs 2023-05-15 13:09:18 +03:00
5a9e1ede52 Release 0.8.9
All checks were successful
Test / buildenv (push) Successful in 9s
Test / build (push) Successful in 2m31s
Test / test_cas (push) Successful in 12s
Test / make_test (push) Successful in 33s
Test / test_change_pg_size (push) Successful in 19s
Test / test_change_pg_count (push) Successful in 55s
Test / test_create_nomaxid (push) Successful in 21s
Test / test_change_pg_count_ec (push) Successful in 58s
Test / test_failure_domain (push) Successful in 13s
Test / test_etcd_fail (push) Successful in 1m4s
Test / test_interrupted_rebalance (push) Successful in 1m13s
Test / test_interrupted_rebalance_imm (push) Successful in 1m7s
Test / test_add_osd (push) Successful in 2m59s
Test / test_move_reappear (push) Successful in 24s
Test / test_interrupted_rebalance_ec (push) Successful in 1m22s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m1s
Test / test_rebalance_verify (push) Successful in 2m12s
Test / test_minsize_1 (push) Successful in 15s
Test / test_rebalance_verify_imm (push) Successful in 2m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m9s
Test / test_rm (push) Successful in 17s
Test / test_snapshot (push) Successful in 23s
Test / test_rebalance_verify_ec (push) Successful in 2m31s
Test / test_splitbrain (push) Successful in 23s
Test / test_snapshot_ec (push) Successful in 30s
Test / test_write_no_same (push) Successful in 16s
Test / test_write (push) Successful in 53s
Test / test_write_xor (push) Successful in 1m19s
Test / test_heal_pg_size_2 (push) Successful in 4m30s
Test / test_heal_ec (push) Successful in 4m32s
- The tests are now stable and run in a CI system based on Gitea CI
- The release includes final bug fixes for EC:
  - Implement missing EC recovery of allocation bitmap when built with ISA-L
  - Fix broken snapshot export with EC (allocation bitmap reads were giving incorrect results previously)
- Also fixed bugs manifesting under heavy load:
  - Fix monitor possibly applying incorrect PG history on retries
  - Fix monitor incorrectly changing PG count when last_clean_pgs contains less PGs than the new number
  - Allow writes to wait for free space again, but now correctly (previously dropped in 0.8.2)
  - Fix a rare segfault in client (handle client stop during incoming stream handling in 1 more place)
  - Make monitor correctly handle etcd connection errors - it could die instead of connecting to another etcd
  - Fix OSD rarely being unable to report PG states after a PG was taken over by another OSD
- Fixed return code for incomplete EC objects (now EIO) and made cluster client retry this error
- Made other small changes for tests: timeouts, nice/ionice for etcd, waiting conditions, NBD device checks and so on
2023-05-14 01:25:09 +03:00
1c9a188600 Add tests to CI
All checks were successful
Test / buildenv (push) Successful in 10s
Test / build (push) Successful in 10s
Test / test_cas (push) Successful in 12s
Test / make_test (push) Successful in 34s
Test / test_change_pg_size (push) Successful in 17s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_change_pg_count (push) Successful in 1m29s
Test / test_failure_domain (push) Successful in 11s
Test / test_change_pg_count_ec (push) Successful in 1m35s
Test / test_etcd_fail (push) Successful in 52s
Test / test_add_osd (push) Successful in 2m13s
Test / test_interrupted_rebalance_imm (push) Successful in 1m4s
Test / test_interrupted_rebalance (push) Successful in 1m28s
Test / test_minsize_1 (push) Successful in 21s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m4s
Test / test_move_reappear (push) Successful in 30s
Test / test_interrupted_rebalance_ec (push) Successful in 1m53s
Test / test_rebalance_verify_imm (push) Successful in 2m14s
Test / test_rebalance_verify (push) Successful in 2m16s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m4s
Test / test_rm (push) Successful in 22s
Test / test_snapshot (push) Successful in 28s
Test / test_rebalance_verify_ec (push) Successful in 2m27s
Test / test_splitbrain (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 34s
Test / test_write_no_same (push) Successful in 19s
Test / test_write (push) Successful in 1m19s
Test / test_write_xor (push) Successful in 1m36s
Test / test_heal_pg_size_2 (push) Successful in 4m34s
Test / test_heal_ec (push) Successful in 4m21s
2023-05-14 00:06:09 +03:00
de3e609166 Add a FIXME about QEMU driver thread safety 2023-05-14 00:06:09 +03:00
11481170f5 Add a FIXME about ENOSPC 2023-05-13 23:59:44 +03:00
e69d459d43 Allow rebalance to start in test_interrupted_rebalance, raise etcd start timeout 2023-05-13 15:16:28 +03:00
da82754baa Wait for conditions in test_move_reappear instead of waiting a fixed amount of time 2023-05-12 23:18:07 +03:00
d356aca030 Add missing $NO_SAME OSD argument to test_splitbrain 2023-05-12 23:18:07 +03:00
04a273d213 Raise NBD timeout in tests 2023-05-12 23:18:07 +03:00
6442010f93 Skip offline PGs during state reporting when the state is already deleted or taken over by another OSD
This fixes OSDs being unable to report PG states in rare conditions
2023-05-12 23:17:45 +03:00
6f4dc16c59 Handle etcd connection errors correctly in mon (unhandled error events) 2023-05-11 11:02:44 +03:00
ce4a8067b5 Handle client stop during incoming stream handling in 1 more place 2023-05-11 01:53:41 +03:00
e431ecb715 Make tests more stable in CI 2023-05-11 01:53:41 +03:00
8cac795445 Return EIO instead of EINVAL for incomplete EC objects 2023-05-11 01:15:23 +03:00
a409598b16 Wait for free space again, but count on big_write flushes instead of just flusher activity 2023-05-10 01:51:02 +03:00
f4c6765522 Ignore ENOENT in epoll_ctl 2023-05-08 20:39:20 +03:00
ad2916068a Fix test_add_osd rebalance timeout check 2023-05-08 20:39:20 +03:00
321cb435a6 Fix monitor incorrectly changing PG count when last_clean_pgs contains less PGs than the new number 2023-05-08 20:39:20 +03:00
cfcf4f4355 Support checking /dev/nbdX nodes in Docker 2023-05-08 20:39:20 +03:00
e0fb17bfee Make etcd more stable in tests (add ionice and raise timeout) 2023-05-08 20:36:00 +03:00
5b9031fecc Fix monitor possibly applying incorrect PG history under heavy load
Monitor could deceive itself by immediately saving PG configuration changes
which weren't applied to etcd yet in memory, and apply incorrect PG history
changes next time if the first update fails.

This usually only happened under heavy load and was caught in CI. :-)
2023-05-07 23:23:00 +03:00
5da1d8e1b5 Fix EC just-bitmap reads (len=0) (fixes SCHEME=ec test_snapshot.sh) 2023-05-07 14:00:08 +03:00
44f86f1999 Add a basic EC 2+2 recovery test (not really required, but let it be there) 2023-05-07 11:26:27 +03:00
2d9a80c6f6 Implement missing bitmap recovery with ISA-L \(°□°)/ 2023-05-07 11:25:51 +03:00
5e295e346e Do not make vitastor-mon part of vitastor.target 2023-04-29 00:17:47 +03:00
d9c0898b7c Notes about config and vitastor-disk cache status 2023-04-29 00:08:24 +03:00
04cfb48361 Add a note about PVE 7.4 2023-04-28 11:37:11 +03:00
ab615849d6 Release 0.8.8
- Fix vitastor-cli rm/rm-data broken in 0.8.6 (missing messenger initialization)
- Prepare OSD read handler for upcoming version with scrub - allow "secondary reads" to return errors
- Fix OSDs re-peering PGs infinitely with a big number of PGs (reproduced in test_add_osd)
- Fix another variant of flusher sync-waiting stall (reproduced in test_write)
- Fix other tests in tests/ (will add them to Gitea CI soon)
- Add patches for QEMU 6.2-8.0
- Fix QEMU driver compatibility with QEMU 8.0
- Build packages for RHEL 9 clones (based on AlmaLinux 9)
2023-04-28 11:22:00 +03:00
38be9a49c0 Add AlmaLinux 9 build to documentation 2023-04-28 02:00:52 +03:00
7d6bf84a3e Add scripts/meson-buildoptions.sh to QEMU patches 2023-04-28 01:43:22 +03:00
41a40a4123 Add QEMU spec patch for Alma/Rocky/RH 9 2023-04-28 01:32:06 +03:00
b94587ef0e Fix some build warnings 2023-04-28 00:44:27 +03:00
2a2f4f6738 Add Almalinux 9 build 2023-04-28 00:40:50 +03:00
c768a9015f Fix QEMU driver compatibility with QEMU 8.0 2023-04-25 11:20:21 +03:00
0d9e10cf96 Add patches for QEMU 6.2-8.0 2023-04-25 11:20:21 +03:00
b74ccb613c Fix another variant of flusher sync-waiting stall 2023-04-24 00:44:41 +03:00
5052174918 Fix test_write_no_same (too large image) 2023-04-24 00:44:41 +03:00
eec9cf5575 Fix test_snapshot.sh - qemu-img requires explicit backing_fmt 2023-04-24 00:44:41 +03:00
a04dab0840 Initialize messenger in cluster_client listings 2023-04-24 00:44:41 +03:00
160863f707 Print op pointer values in slow log 2023-04-23 17:54:00 +03:00
2f16c32eb4 Fix test_minsize_1 (left_on_dead) 2023-04-23 17:54:00 +03:00
2877cd0adb Allow OP_SEC_READ to return errors (do not hang the connection) 2023-04-23 17:54:00 +03:00
480509f5b9 Fix pg_data_size > 1 for replicas (harmless bug) 2023-04-23 01:50:42 +03:00
46462da45e Preload own PG history updates to fix PG state loop possibly applying the old metadata version 2023-04-23 01:50:30 +03:00
024c8658f6 Fix missing } in quick start documentation 2023-04-12 18:26:38 +03:00
7e958afeda Release 0.8.7
This release includes a bunch of important bugfixes for erasure-coded setups
with disabled immediate_commit. After these fixes, "test_heal" OSD killing test
now passes fine with EC:

- Fix cluster write stalls with "Error while doing flush on OSD xx: -16 (Device or resource busy)"
  in OSD logs possible in EC setups with disabled immediate_commit by selectively
  syncing nonsynced objects on STABILIZE/ROLLBACK (https://github.com/vitalif/vitastor/issues/51)
- Fix other EC + disabled immediate_commit problems:
  - Fix "opcode=5 retval=-2" errors happening on SYNC retries
  - Fix non-working "pagination" during PG dirty object flushing
  - Fix write operations not continued correctly after dirty object flushing
- Fix incorrect parity read-modify-write calculation when writing into a lost chunk
- Fix OSDs losing left_on_dead PG state of non-clean PGs and thus not removing junk data in the cluster
- Fix a small memory leak caused by bad indexing of EC recovery matrices
- Fix a rare use-after-free in cluster_client caused by a reenterability issue
- Fix vitastor-cli create command syntax in the CSI driver
- Allow to start OSDs without local store for tests
- Fix memory allocation error in disk_tool_meta for non-standard metadata block sizes
- Fix delete operations received before loading pool metadata crashing OSDs with "null pointer exception"
- Improve "theoretical performance" Russian documentation

New features:

- Implement online configuration update for some parameters. Documentation is coming soon :)
2023-04-11 02:11:57 +03:00
2f5e769a29 Fix a small memory leak caused by bad indexing of EC recovery matrices 2023-04-11 00:30:36 +03:00
28d5e53c6c Add test_heal to run_tests 2023-04-09 02:10:42 +03:00
d9f55f11d8 More logs (log_level 10), append to log instead of overwriting on restart in tests 2023-04-09 02:06:10 +03:00
3237014608 Fix incorrect parity read-modify-write calculation when writing into a lost chunk 2023-04-09 02:06:10 +03:00
baaf8f6f44 Fix write operations not continued correctly after flush 2023-04-09 02:06:10 +03:00
1d83fdcd17 Add debug logs to osd_flush 2023-04-09 02:06:10 +03:00
0ddd787c38 Fix non-working "pagination" during PG dirty object flushing 2023-04-08 02:44:02 +03:00
6eff3a60a5 Do not lose left_on_dead PG state of non-clean PGs 2023-04-08 02:44:02 +03:00
888a6975ab Fix a rare use-after-free in cluster_client caused by a reenterability issue 2023-04-08 02:44:02 +03:00
cd1e890bd4 Fix "opcode=5 retval=-2" errors sometimes possible with EC 2023-04-08 02:44:02 +03:00
0fbf4c6a08 Selectively sync nonsynced objects on STABILIZE/ROLLBACK (fix for github issue #51) 2023-04-08 02:44:02 +03:00
d06ed2b0e7 Implement online config update 2023-03-26 19:21:50 +03:00
3bbc46543d Fix vitastor-cli create syntax 2023-03-17 11:12:58 +03:00
2fb0c85618 Allow to start OSDs without local store (only for tests) 2023-03-15 01:13:59 +03:00
d81a6c04fc Update cmake min version so it does not complain about deprecation 2023-03-15 01:08:23 +03:00
7b35801647 Fix possible bad realloc in disk_tool_meta for non-standard metadata block sizes 2023-03-15 01:08:23 +03:00
f3228d5c07 Fix typo (did not affect execution though) 2023-03-15 01:08:23 +03:00
18366f5055 Fix read/write return type in rw_blocking 2023-03-15 01:08:14 +03:00
851507c147 Add missing close() in test stubs 2023-03-15 00:23:56 +03:00
9aaad28488 Fix "null pointer exception" for unhandled OSD_OP_DELETEs (when pool is not loaded yet) 2023-03-02 11:16:39 +03:00
dd57d086fe Add a missing part of the "theoretical performance" to the Russian version 2023-03-01 00:24:54 +03:00
8810eae8fb Release 0.8.6
Important fixes:

- Fix possibly incorrect EC parity chunk updates with EC n+k, k > 1 and when
  the first parity chunk is missing

Minor fixes and improvements:

- Fix incorrect EC free space statistics in vitastor-cli df output
- Speedup vitastor-cli startup in clusters with RDMA
- Remove unused PG "peered" state (previously used to update PG epoch)
- Use sfdisk with just --json in vitastor-disk (--dump --json isn't needed)
- Allow trailing comma in sfdisk output (fixes sfdisk 2.36 compatibility)
- Slightly improve RDMA send/receive code
- Reduce RDMA memory consumption by default (rdma_max_recv/send = 16/8)
- Use vitastor-cli instead of direct etcd interaction in the CSI driver
2023-02-28 11:18:48 +03:00
c1365f46c9 Use vitastor-cli instead of direct etcd interaction in the CSI driver 2023-02-28 11:02:50 +03:00
14d6acbcba Set default rdma_max_recv/send to 16/8, fix documentation 2023-02-28 11:00:56 +03:00
1e307069bc Fix missing parity chunk calculation for EC n+k, k > 1 and first parity chunk missing 2023-02-28 02:40:19 +03:00
c3e80abad7 Allow to send more than 1 operation at a time 2023-02-26 02:01:04 +03:00
138ffe4032 Reuse incoming RDMA buffers 2023-02-26 00:55:01 +03:00
8139a34e97 Fix json11: allow trailing comma 2023-02-23 01:16:01 +03:00
4ab630b44d Use just sfdisk --json, --dump is not needed 2023-02-23 00:55:47 +03:00
2c8241b7db Remove PG "peered" state 2023-02-21 01:30:42 +03:00
36a7dd3671 Move tests to "make test" 2023-02-21 01:30:42 +03:00
936122bbcf Initialize msgr lazily in client to speedup vitastor-cli with RDMA enabled 2023-02-19 18:59:07 +03:00
1a1ba0d1e7 Add set_immediate to ringloop and use it for bs/osd ops to prevent reenterability issues 2023-02-09 17:37:26 +03:00
3d09c9cec7 Remove unused wait_sqe() from ringloop 2023-02-09 17:37:26 +03:00
3d08a1ad6c Fix cluster_client test after last reenterability fixes 2023-02-05 01:47:32 +03:00
499881d81c Fix typo 2023-01-27 01:52:02 +03:00
aba93b951b Fix incorrect EC free space statistics in vitastor-cli df output 2023-01-26 02:04:29 +03:00
d125fb1f30 Release 0.8.5
- Fix a possible "double free" bug in the client library happening on OSD restart
- Fix a possible write hang on PG history update when only epoch is changed
- Fix incorrect systemd target "local.target" in mon/make-etcd
- Allow "content" option in PVE storage plugin to allow to enable containers
- Build client library without tcmalloc which fixes "attempt to free invalid pointer"
  errors when, for example, trying to run QEMU with both Vitastor and Ceph RBD disks
2023-01-25 01:43:49 +03:00
9d3fd72298 Require liburing < 2 in rpm specs 2023-01-25 01:43:49 +03:00
8b552a01f9 Do not retry successful operation parts in client (could lead to "double free" bugs) 2023-01-25 01:30:36 +03:00
0385b2f9e8 Fix write hangs on PG epoch update - always set pg.history_changed to true 2023-01-25 01:30:15 +03:00
749c837045 Replace non-existing local.target with multi-user.target 2023-01-25 01:29:31 +03:00
98001d845b Remove version from vitastor-release.rpm links 2023-01-23 14:03:33 +03:00
c96bcae74b Allow "content" option in PVE storage plugin to allow to enable containers 2023-01-16 18:14:45 +03:00
9f4e34a8cc Build client library without tcmalloc
Fixes "[src/tcmalloc.cc:332] Attempt to free invalid pointer ..." when trying
to run QEMU with both Vitastor and Ceph RBD disks and other possible allocator
collisions.
2023-01-15 00:01:11 +03:00
81fc8bb94c Release 0.8.4
New features:
- Implement QCOW2 image/snapshot export via qemu-img (bdrv_co_block_status in the driver)
- Remove OSDs from PG history during `vitastor-cli rm-osd` to prevent `left_on_dead` PG states after deletion
- Add a new recovery_pg_switch setting to mix all PGs during recovery, to almost
  fully reduce the probability of ENOSPC during rebalance
- Introduce partial ENOSPC ("OSD is full") handling - now ENOSPC doesn't turn
  into cascades of crashes
- Add migration support to Proxmox VE Vitastor driver
- Track last_clean_pgs on a per-pool basis thus reducing data movement in a cluster
  with pools remaining unclean/degraded for a long time

Bug fixes:
- Fix a bug where monitor could generate degraded PGs if one of the hosts had no OSDs
- Fix a bug where monitor could skip PG redistribution with a lot of OSDs in cluster
- Report PG history synchronously on the first write, which improves PG consistency
  and availability at the same time, because history now gets reported correctly
  and doesn't get reported without the need for it
- Fix possible write and recovery stalls which could happen in a cluster with both EC and replicated pools
- Make OSD and monitors sanitize & deduplicate PG history items in etcd
- Fix non-working OSD peer config safety check
- Fix a rare journal flush stall where flushing wasn't activated with full journal, but with empty flush queue
- Fix builds without ISA-L (jerasure-only) crashing with EC N+K, K>=2 due to the lack of 16-byte buffer alignment
- Fix a possible crash for EC N+K, K>=2 when calculating a parity chunk with previous parity chunk missing
- Fix a bug where vitastor-disk purge with suppressed warnings didn't work
2023-01-13 23:59:54 +03:00
bc465c16de Fix arithmetic on void* for clang 2023-01-13 23:58:42 +03:00
8763e9211c Fix qemu driver compilation warning/error 2023-01-13 23:44:39 +03:00
9e1a80bd17 Replace apt-key with trusted.gpg.d 2023-01-13 19:51:47 +03:00
3e280f2f08 Mark vitastor as shared storage in PVE driver 2023-01-13 01:36:30 +03:00
fe87b4076b Fix backwards compatibility in cluster_client 2023-01-12 02:37:31 +03:00
a38957c1a7 Skip empty hosts in lp-optimizer 2023-01-09 16:26:16 +03:00
137309cf29 Implement bdrv_co_block_status for snapshot export support 2023-01-07 17:06:58 +03:00
373f9d0387 Try to re-peer PGs on history change 2023-01-06 12:46:44 +03:00
c4516ea971 Also remove deleted OSD from PG configuration and last_clean_pgs 2023-01-06 12:46:44 +03:00
91065c80fc Try to prevent left_on_dead when deleting OSDs by removing them from PG history 2023-01-06 12:46:43 +03:00
0f6b946add Time changes with every stat change, do not schedule checks based on it 2023-01-05 13:54:16 +03:00
465cbf0b2f Do not re-schedule recheck indefinitely, run it after mon_change_timeout in any case 2023-01-05 13:48:06 +03:00
41add50e4e Track last_clean_pgs on a per-pool basis 2023-01-03 02:20:50 +03:00
02e7be7dc9 Prevent reenterability side effects during PG history operation resume 2023-01-03 02:20:50 +03:00
73940adf07 Prioritize EC (non-instantly-stable) operations under journal pressure
This reduces the probability of hitting OSD stalls with EC due to "deadlocks"
where two parallel write operations wait for each other to complete
2023-01-03 00:05:45 +03:00
e950c024d3 Do not sync peer OSDs before listing
Sync before listing was added to wait for all PG writes possibly left in queue
from the previous master to finish before listing it

But in fact it may block the cluster when EC is used and some unstable writes
are left in the queue - they block journal flushing, rollback/stabilize is
required to unblock them, but rollback/stabilize may only happen after PG is
peered. But peering needs listings, listings are requested only after sync, and
sync itself waits for currently blocked writes waiting in the queue
2023-01-03 00:05:45 +03:00
71d6d9f868 Fix possible crash on ENOSPC during operation cancel in blockstore 2023-01-03 00:05:45 +03:00
a4dfa519af Report PG history synchronously during write
This has 2 effects:
1) OSD sets aren't added into PG history until actual write attempts anymore
   which removes unneeded extra osd_sets in PG history
2) New OSD sets are reported synchronously and can't be lost on PG restarts
   happening at the same time with reconfiguration
2023-01-01 23:41:05 +03:00
37a6aff2fa Write OSD numbers always as numbers in mon 2023-01-01 23:17:42 +03:00
67019f5b02 Make OSD sort & sanitize PG history items 2023-01-01 23:17:42 +03:00
0593e5c21c Fix OSD peer config safety check 2022-12-31 02:24:42 +03:00
998e24adf8 Add a new recovery_pg_switch setting to mix all PGs during recovery 2022-12-30 02:03:33 +03:00
d7bd36dc32 Fix another rare journal flush stall 2022-12-30 02:03:33 +03:00
cf5c562800 Log all object locations when peering PGs 2022-12-30 02:03:33 +03:00
629200b0cc Return ENOSPC as the primary OSD 2022-12-30 02:03:33 +03:00
3589ccec22 Do not disconnect peer on ENOSPC during write 2022-12-30 01:54:25 +03:00
8d55a1e780 Build osd_rmw_test both with and without ISA-L 2022-12-29 19:13:57 +03:00
65f6b3a4eb Fix jerasure crashing on bitmap calculation/restoration due to the lack of 16-byte alignment 2022-12-29 19:13:57 +03:00
fd216eac77 Add a test for missing parity chunk calculation 2022-12-29 19:13:57 +03:00
61fca7c426 Fix crash when calculating a parity chunk with previous parity chunk missing (test coming shortly) 2022-12-29 19:13:57 +03:00
1c29ed80b9 Fix quote in docs :) 2022-12-28 18:08:53 +03:00
68f3fb795e Suppress warnings in vitastor-disk purge correctly 2022-12-27 11:09:19 +03:00
fa90f287da Release 0.8.3
- Implement a new "vitastor-disk purge" command to remove OSDs with safety checks
- Implement a new "vitastor-cli rm-osd" command to only remove OSD metadata from etcd
- Fix a bug where the monitor could ignore OSD removal and other /osd/stats key changes
- Fix a bug where garbage could be returned when reading objects being written at the same time
- Fix a rare write stall where journal space could be not reclaimed where there
  were no new operations in the flush queue
- Fix a rare peering stall caused by a previous long listing operations queues limiting attempt
- Fix total object count statistic in OSD on object creation
- Add missing offset&len into vitastor-disk dump-journal for big_writes, fix JSON format
- Make vitastor-cli print help on missing command
- Make vitastor-cli translate all '-' to '_' in CLI options
2022-12-27 02:40:55 +03:00
795020674d Loop journal flusher when the queue is empty but there is a trim request 2022-12-27 02:28:20 +03:00
8e12285629 Fix vitastor-disk purge (now it works) 2022-12-27 02:28:20 +03:00
b9b50ab4cc Implement vitastor-disk purge command 2022-12-26 02:48:48 +03:00
0d8625f92d Make vitastor-cli print help on missing command 2022-12-26 02:48:48 +03:00
2f3c2c5140 Implement safety check for OSD removal, translate all '-' to '_' in cli options
'-' to '_' translation fixes a bug with create --image_meta
2022-12-26 02:48:48 +03:00
4ebdd02b0f Remove LIST op limiter
It doesn't prevent OSD slow ops but may itself lead to stalls :)
2022-12-26 02:48:48 +03:00
bf6fdc4141 Check add/rm osd with 2048 PGs 2022-12-26 02:48:48 +03:00
c2244331e6 Add vitastor-cli rm-osd command 2022-12-26 02:48:48 +03:00
3de57e87b1 Recheck OSD tree in monitor on /osd/stats changes 2022-12-26 02:48:48 +03:00
2d4cc688b2 Add a remove-osd test 2022-12-26 02:48:48 +03:00
31bd1ec145 Fix object creation check for statistics 2022-12-21 02:51:11 +03:00
c08d1f2dfe Add missing offset&len into big_writes journal dump, fix commas again 2022-12-21 02:51:11 +03:00
1d80bcc8d0 Fix blockstore returning garbage for unstable reads if there is an in-flight version
"In-flight" versions are added into dirty_db when writes are enqueued. And they
weren't ignored by subsequent reads even though they didn't have data location yet.
This bug was leading to test_heal.sh not passing sometimes with replicated setups.
2022-12-21 02:48:24 +03:00
5ef8bed75f Release 0.8.2
- Fix QEMU driver compatibility with QEMU 7.0 and < 2.9
- Add patches for pve-qemu-kvm 7.1 (PVE 7.3) and pve-qemu-kvm 6.2 (PVE 7.2)
- Fix Proxmox driver location in the pve-storage-vitastor package
- Disable HDD autodetection in non-hybrid mode
- Explicitly warn about a buggy kernels on -EAGAIN in io_uring
- Final fix for the lack of zeroing out of old metadata entries
  (do not crash with "big_write journal_entry was allocated over another object"
  in some cases after an unclean OSD shutdown)
- Wait for data writes before fsyncing data if data fsync is enabled
- Never try to wait for free space inside blockstore thus stalling OSDs
- Fix a rare crash in osd_peering due to callback ordering
- Fix a rare duplication of ping & op message IDs
- Fix a rare use-after-free during pings
- Add --force to vitastor-disk read-sb
- Make vitastor-disk dump metadata object IDs in hex, add forgotten commas
- Fix vitastor-disk SCSI disk cache check
2022-12-17 17:54:13 +03:00
8669998e5e Fix discard_list_subop() for local ops 2022-12-17 17:54:13 +03:00
b457327e77 Oops. Fix metadata read after fixes :-) 2022-12-17 17:31:57 +03:00
f7fa9d5e34 Fix SCSI device cache type check 2022-12-17 17:31:57 +03:00
49b88b01f9 Fix clang build 2022-12-17 16:25:26 +03:00
71688bcb59 Disable HDD autodetection in non-hybrid mode 2022-12-17 16:12:15 +03:00
552e207d2b Explicitly print errors about -EAGAIN in io_uring 2022-12-17 15:49:49 +03:00
5464821fa5 Final fix for the lack of zeroing out of old metadata entries
If a crash occurs during flushing a redirect-write it may happen so that
the disk contains both old and new metadata entries. This is OK, but prior
to 0.8.0 after this situation OSDs started without problem, but then they
crashed after some more overwrites with a "tried to overwrite non-zero
metadata entry" error. 0.8.0 introduced a change that was intended to fix
this situation, but rather than fixing it it prevented OSDs from starting,
now because of a "big_write journal_entry was allocated over another object"
error... :-)

This change finally fixes the original issue.

Followup to 54ef2c389f
2022-12-17 14:50:31 +03:00
6917a32ca8 Add --force to vitastor-disk read-sb 2022-12-17 02:47:15 +03:00
f8722a8bd5 Dump meta in hex 2022-12-17 01:50:38 +03:00
9c2f69c9fa Add forgotten commas to vitastor-disk dump-journal 2022-12-17 01:22:58 +03:00
1a93e3f33a Wait for data writes before fsyncing data if data fsync is enabled 2022-12-16 20:46:55 +03:00
3f35744052 Fix compatibility with QEMU aio_set_fd_handler signatures in 7.0 and < 2.9 2022-12-15 19:17:17 +03:00
66f14ac019 Update notes about Proxmox 7.1-7.3 2022-12-15 18:57:28 +03:00
1364009931 Add patches for pve-qemu-kvm 7.1 (PVE 7.3) and pve-qemu-kvm 6.2 (PVE 7.2) 2022-12-14 19:01:36 +03:00
d7e30b8353 Fix pve-storage-vitastor filename 2022-12-14 16:41:35 +03:00
cb437913d3 Never try to wait for free space inside blockstore 2022-12-12 00:27:05 +03:00
472bce58ab Fix rare crash in osd_peering due to callback ordering 2022-12-12 00:27:05 +03:00
7a71e7ef01 Fix possible duplication of ping & op message IDs 2022-12-04 00:16:47 +03:00
c71e5e7bbd Fix possible use-after-free during pings 2022-12-04 00:16:47 +03:00
204 changed files with 11819 additions and 1984 deletions

View File

@@ -0,0 +1,36 @@
FROM node:16-bullseye
WORKDIR /root
ADD ./docker/vitastor.gpg /etc/apt/trusted.gpg.d
RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/sources.list; \
echo 'deb http://vitastor.io/debian bullseye main' >> /etc/apt/sources.list; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo 'Pin: release a=bullseye-backports' >> /etc/apt/preferences; \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo 'Pin: origin "vitastor.io"' >> /etc/apt/preferences; \
echo 'Pin-Priority: 1000' >> /etc/apt/preferences; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
RUN apt-get -y install jq lp-solve sudo
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
RUN set -ex; \
mkdir qemu-build; \
cd qemu-build; \
dpkg-source -x /root/qemu*.dsc; \
cd qemu*/; \
debian/rules configure-qemu || debian/rules b/configure-stamp; \
cd b/qemu; \
make -j8 config-poison.h || true; \
make -j8 qapi/qapi-builtin-types.h

View File

@@ -0,0 +1,19 @@
FROM git.yourcmc.ru/vitalif/vitastor/buildenv
ADD . /root/vitastor
RUN set -e -x; \
mkdir -p /root/fio-build/; \
cd /root/fio-build/; \
dpkg-source -x /root/fio*.dsc; \
cd /root/vitastor; \
ln -s /root/fio-build/fio-*/ ./fio; \
ln -s /root/qemu-build/qemu-*/ ./qemu; \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
cd mon; \
npm install; \
cd ..; \
mkdir build; \
cd build; \
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
make -j16

732
.gitea/workflows/test.yml Normal file
View File

@@ -0,0 +1,732 @@
name: Test
on:
push:
branches:
- '*'
paths:
- '.gitea/**'
- 'src/**'
- 'mon/**'
- 'json11'
- 'cpp-btree'
- 'tests/**'
env:
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
OSD_ARGS: '--etcd_quick_timeout 2000'
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
buildenv:
runs-on: ubuntu-latest
container: git.yourcmc.ru/vitalif/gitea-ci-dind
steps:
- uses: actions/checkout@v3
- name: Build and push
run: |
set -ex
if ! docker manifest inspect $BUILDENV_IMAGE >/dev/null; then
docker build -t $BUILDENV_IMAGE -f .gitea/workflows/buildenv.Dockerfile .
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
docker push $BUILDENV_IMAGE
fi
build:
runs-on: ubuntu-latest
needs: buildenv
container: git.yourcmc.ru/vitalif/gitea-ci-dind
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Build and push
run: |
set -ex
if ! docker manifest inspect $TEST_IMAGE:$GITHUB_SHA >/dev/null; then
docker build -t $TEST_IMAGE:$GITHUB_SHA -f .gitea/workflows/test.Dockerfile .
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
docker push $TEST_IMAGE:$GITHUB_SHA
fi
make_test:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
# leak sanitizer sometimes crashes
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
test_add_osd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_add_osd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_cas:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_cas.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_size:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_size.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_create_nomaxid:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_create_nomaxid.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_etcd_fail:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_etcd_fail.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_minsize_1:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_minsize_1.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_move_reappear:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_move_reappear.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_no_same:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write_no_same.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_pg_size_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_zero_osd_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: ZERO_OSD=2 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_3:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=3 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done

View File

@@ -0,0 +1,69 @@
#!/usr/bin/perl
use strict;
for my $line (<>)
{
if ($line =~ /\.\/(test_[^\.]+)/s)
{
chomp $line;
my $test_name = $1;
my $timeout = 3;
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
$test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
{
$timeout = 10;
}
while ($line =~ /([^\s=]+)=(\S+)/gs)
{
if ($1 eq 'SCHEME' && $2 eq 'ec')
{
$test_name .= '_ec';
}
elsif ($1 eq 'SCHEME' && $2 eq 'xor')
{
$test_name .= '_xor';
}
elsif ($1 eq 'IMMEDIATE_COMMIT')
{
$test_name .= '_imm';
}
else
{
$test_name .= '_'.lc($1).'_'.$2;
}
}
$line =~ s!\./test_!/root/vitastor/tests/test_!;
# Gitea CI doesn't support artifacts yet, lol
#- name: Upload results
# uses: actions/upload-artifact\@v3
# if: always()
# with:
# name: ${test_name}_result
# path: |
# /root/vitastor/testdata
# !/root/vitastor/testdata/*.bin
# retention-days: 5
print <<"EOF"
$test_name:
runs-on: ubuntu-latest
needs: build
container: \${{env.TEST_IMAGE}}:\${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: $timeout
run: $line
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- \$i --------"
cat \$i
echo ""
done
EOF
;
}
}

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 2.8)
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "0.8.1")
set(VERSION "0.9.2")
add_subdirectory(src)

View File

@@ -48,9 +48,9 @@ Vitastor, составлены для того, чтобы убедиться,
интерфейс (прокси), опять же, без открытия в свободный публичный доступ как
самой программы, так и прокси.
Сетевая Публичная Лицензия Vitastor разработана специально чтобы
Сетевая Публичная Лицензия Vitastor разработана специально, чтобы
гарантировать, что в таких случаях и модифицированная версия программы, и
прокси оставались доступными сообществу. Для этого лицензия требует от
прокси останутся доступными сообществу. Для этого лицензия требует от
операторов сетевых серверов предоставлять исходный код оригинальной программы,
а также всех других программ, взаимодействующих с ней на их серверах,
пользователям этих серверов, на условиях свободных лицензий. Таким образом,

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.8.1
VERSION ?= v0.9.2
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.8.1
image: vitalif/vitastor-csi:v0.9.2
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.8.1
image: vitalif/vitastor-csi:v0.9.2
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -4,25 +4,10 @@ go 1.15
require (
github.com/container-storage-interface/spec v1.4.0
github.com/coreos/bbolt v0.0.0-00010101000000-000000000000 // indirect
github.com/coreos/etcd v3.3.25+incompatible // indirect
github.com/coreos/go-semver v0.3.0 // indirect
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/gorilla/websocket v1.4.2 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/jonboulle/clockwork v0.2.2 // indirect
github.com/kubernetes-csi/csi-lib-utils v0.9.1
github.com/soheilhy/cmux v0.1.5 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
go.etcd.io/bbolt v0.0.0-00010101000000-000000000000 // indirect
go.etcd.io/etcd v3.3.25+incompatible
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/grpc v1.33.1
k8s.io/klog v1.0.0
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10

View File

@@ -31,14 +31,11 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
@@ -46,25 +43,12 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn
github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/coreos/bbolt v1.3.5 h1:XFv7xaq7701j8ZSEzR28VohFYSlyakMyqNMU5FQH6Ac=
github.com/coreos/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
github.com/coreos/etcd v3.3.25+incompatible h1:0GQEw6h3YnuOVdtwygkIfJ+Omx0tZ8/QkVyXI4LkbeY=
github.com/coreos/etcd v3.3.25+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -73,7 +57,6 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@@ -88,14 +71,10 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
@@ -113,7 +92,6 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -127,38 +105,24 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -171,14 +135,11 @@ github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXv
github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
@@ -188,38 +149,28 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
@@ -231,24 +182,11 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/etcd v3.3.25+incompatible h1:V1RzkZJj9LqsJRy+TUBgpWSbZXITLB819lstuTFoZOY=
go.etcd.io/etcd v3.3.25+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@@ -256,7 +194,6 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -276,8 +213,6 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -291,26 +226,20 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -326,11 +255,9 @@ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 h1:5/PjkGUjvEU5Gl6BxmvKRPpqo2uNMv4rcHBMwzk/st8=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -341,7 +268,6 @@ golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -360,14 +286,10 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -388,8 +310,6 @@ google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
@@ -415,7 +335,6 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
@@ -444,5 +363,4 @@ k8s.io/utils v0.0.0-20210305010621-2afb4311ab10/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q=
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.8.1"
vitastorCSIDriverVersion = "0.9.2"
)
// Config struct fills the parameters of request or user input

View File

@@ -6,11 +6,11 @@ package vitastor
import (
"context"
"encoding/json"
"fmt"
"strings"
"bytes"
"strconv"
"time"
"fmt"
"os"
"os/exec"
"io/ioutil"
@@ -21,8 +21,6 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"go.etcd.io/etcd/clientv3"
"github.com/container-storage-interface/spec/lib/go/csi"
)
@@ -114,6 +112,34 @@ func GetConnectionParams(params map[string]string) (map[string]string, []string,
return ctxVars, etcdUrl, etcdPrefix
}
func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
{
if (ctxVars["etcdUrl"] != "")
{
args = append(args, "--etcd_address", ctxVars["etcdUrl"])
}
if (ctxVars["etcdPrefix"] != "")
{
args = append(args, "--etcd_prefix", ctxVars["etcdPrefix"])
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
c := exec.Command("/usr/bin/vitastor-cli", args...)
var stdout, stderr bytes.Buffer
c.Stdout = &stdout
c.Stderr = &stderr
err := c.Run()
stderrStr := string(stderr.Bytes())
if (err != nil)
{
klog.Errorf("vitastor-cli %s failed: %s, status %s\n", strings.Join(args, " "), stderrStr, err)
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
}
return stdout.Bytes(), nil
}
// Create the volume
func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error)
{
@@ -146,128 +172,41 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
}
// FIXME: The following should PROBABLY be implemented externally in a management tool
ctxVars, etcdUrl, etcdPrefix := GetConnectionParams(req.Parameters)
ctxVars, etcdUrl, _ := GetConnectionParams(req.Parameters)
if (len(etcdUrl) == 0)
{
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
}
// Connect to etcd
cli, err := clientv3.New(clientv3.Config{
DialTimeout: ETCD_TIMEOUT,
Endpoints: etcdUrl,
})
// Create image using vitastor-cli
_, err := invokeCLI(ctxVars, []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) })
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
}
defer cli.Close()
var imageId uint64 = 0
for
{
// Check if the image exists
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
cancel()
if (err != nil)
if (strings.Index(err.Error(), "already exists") > 0)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) > 0)
{
kv := resp.Kvs[0]
var v InodeIndex
err := json.Unmarshal(kv.Value, &v)
stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", volName })
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
return nil, err
}
poolId = v.PoolId
imageId = v.Id
inodeCfgKey := fmt.Sprintf("/config/inode/%d/%d", poolId, imageId)
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+inodeCfgKey)
cancel()
var inodeCfg []InodeConfig
err = json.Unmarshal(stat, &inodeCfg)
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
}
if (len(resp.Kvs) == 0)
if (len(inodeCfg) == 0)
{
return nil, status.Error(codes.Internal, "missing "+inodeCfgKey+" key in etcd")
return nil, status.Error(codes.Internal, "vitastor-cli create said that image already exists, but ls can't find it")
}
var inodeCfg InodeConfig
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
}
if (inodeCfg.Size < uint64(volSize))
if (inodeCfg[0].Size < uint64(volSize))
{
return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
}
}
else
{
// Find a free ID
// Create image metadata in a transaction verifying that the image doesn't exist yet AND ID is still free
maxIdKey := fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, maxIdKey)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
var modRev int64
var nextId uint64
if (len(resp.Kvs) > 0)
{
var err error
nextId, err = strconv.ParseUint(string(resp.Kvs[0].Value), 10, 64)
if (err != nil)
{
return nil, status.Error(codes.Internal, maxIdKey+" contains invalid ID")
}
modRev = resp.Kvs[0].ModRevision
nextId++
}
else
{
nextId = 1
}
inodeIdxJson, _ := json.Marshal(InodeIndex{
Id: nextId,
PoolId: poolId,
})
inodeCfgJson, _ := json.Marshal(InodeConfig{
Name: volName,
Size: uint64(volSize),
})
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
txnResp, err := cli.Txn(ctx).If(
clientv3.Compare(clientv3.ModRevision(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)), "=", modRev),
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)), "=", 0),
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId)), "=", 0),
).Then(
clientv3.OpPut(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId), fmt.Sprintf("%d", nextId)),
clientv3.OpPut(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName), string(inodeIdxJson)),
clientv3.OpPut(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId), string(inodeCfgJson)),
).Commit()
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to commit transaction in etcd: "+err.Error())
}
if (txnResp.Succeeded)
{
imageId = nextId
break
}
// Start over if the transaction fails
return nil, err
}
}
@@ -299,97 +238,12 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
}
volName := ctxVars["name"]
_, etcdUrl, etcdPrefix := GetConnectionParams(ctxVars)
if (len(etcdUrl) == 0)
{
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
}
ctxVars, _, _ = GetConnectionParams(ctxVars)
cli, err := clientv3.New(clientv3.Config{
DialTimeout: ETCD_TIMEOUT,
Endpoints: etcdUrl,
})
_, err = invokeCLI(ctxVars, []string{ "rm", volName })
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
}
defer cli.Close()
// Find inode by name
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) == 0)
{
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
}
var idx InodeIndex
err = json.Unmarshal(resp.Kvs[0].Value, &idx)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
}
// Get inode config
inodeCfgKey := fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err = cli.Get(ctx, inodeCfgKey)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) == 0)
{
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
}
var inodeCfg InodeConfig
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
}
// Delete inode data by invoking vitastor-cli
args := []string{
"rm-data", "--etcd_address", strings.Join(etcdUrl, ","),
"--pool", fmt.Sprintf("%d", idx.PoolId),
"--inode", fmt.Sprintf("%d", idx.Id),
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
c := exec.Command("/usr/bin/vitastor-cli", args...)
var stderr bytes.Buffer
c.Stdout = nil
c.Stderr = &stderr
err = c.Run()
stderrStr := string(stderr.Bytes())
if (err != nil)
{
klog.Errorf("vitastor-cli rm-data failed: %s, status %s\n", stderrStr, err)
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
}
// Delete inode config in etcd
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
txnResp, err := cli.Txn(ctx).Then(
clientv3.OpDelete(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)),
clientv3.OpDelete(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)),
).Commit()
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: "+err.Error())
}
if (!txnResp.Succeeded)
{
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: transaction failed")
return nil, err
}
return &csi.DeleteVolumeResponse{}, nil

58
debian/build-pve-qemu.sh vendored Normal file
View File

@@ -0,0 +1,58 @@
exit
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
# bookworm
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# bullseye
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# buster
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control

7
debian/build-vitastor-bookworm.sh vendored Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
cat < vitastor.Dockerfile > ../Dockerfile
cd ..
mkdir -p packages
sudo podman build --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
rm Dockerfile

4
debian/changelog vendored
View File

@@ -1,10 +1,10 @@
vitastor (0.8.1-1) unstable; urgency=medium
vitastor (0.9.2-1) unstable; urgency=medium
* Bugfixes
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
vitastor (0.8.1-1) unstable; urgency=medium
vitastor (0.9.2-1) unstable; urgency=medium
* Implement NFS proxy
* Add documentation

View File

@@ -1,4 +1,4 @@
# Build patched QEMU for Debian Buster or Bullseye/Sid inside a container
# Build patched QEMU for Debian inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
ARG REL=
@@ -15,17 +15,19 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep qemu
# To build a custom version
#RUN cp /root/packages/qemu-orig/* /root
RUN apt-get --download-only source qemu
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch patches/qemu-6.1-vitastor.patch src/qemu_driver.c /root/vitastor/patches/
ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
RUN set -e; \
apt-get install -y wget; \
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
@@ -37,23 +39,14 @@ RUN set -e; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \
dpkg-source -x /root/qemu*.dsc; \
if ls -d /root/packages/qemu-$REL/qemu-5.0*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-5.0*); \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch $D/debian/patches; \
echo qemu-5.0-vitastor.patch >> $D/debian/patches/series; \
elif ls /root/packages/qemu-$REL/qemu-6.1*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-6.1*); \
cp /root/vitastor/patches/qemu-6.1-vitastor.patch $D/debian/patches; \
echo qemu-6.1-vitastor.patch >> $D/debian/patches/series; \
else \
cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
echo qemu-5.1-vitastor.patch >> $P/series; \
fi; \
QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*(\d+\.\d+).*!$1!'); \
D=$(ls -d qemu*/); \
cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
cd /root/packages/qemu-$REL/qemu-*/; \
quilt push -a; \
quilt add block/vitastor.c; \
cp /root/vitastor/patches/qemu_driver.c block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \

View File

@@ -1 +1 @@
patches/PVE_VitastorPlugin.pm usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm
patches/VitastorPlugin.pm usr/share/perl5/PVE/Storage/Custom/

View File

@@ -1,4 +1,4 @@
# Build Vitastor packages for Debian Buster or Bullseye/Sid inside a container
# Build Vitastor packages for Debian inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
ARG REL=
@@ -15,11 +15,12 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep fio
RUN apt-get --download-only source fio
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
@@ -34,8 +35,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.8.1; \
cd vitastor-0.8.1; \
cp -r /root/vitastor vitastor-0.9.2; \
cd vitastor-0.9.2; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -48,8 +49,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.1.orig.tar.xz vitastor-0.8.1; \
cd vitastor-0.8.1; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
cd vitastor-0.9.2; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

Binary file not shown.

View File

@@ -17,14 +17,16 @@ Configuration parameters can be set in 3 places:
- Configuration file (`/etc/vitastor/vitastor.conf` or other path)
- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
connection parameters should obviously be set in the configuration file.
- Command line of Vitastor components: OSD, mon, fio and QEMU options,
OpenStack/Proxmox/etc configuration. The latter doesn't allow to set all
variables directly, but it allows to override the configuration file and
set everything you need inside it.
- Command line of Vitastor components: OSD (when you run it without vitastor-disk),
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
doesn't allow to set all variables directly, but it allows to override the
configuration file and set everything you need inside it.
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
primarily disk layout parameters of specific OSDs. In fact, these parameters
are automatically passed into the command line of vitastor-osd process, so
they have the same "status" as command-line parameters.
In the future, additional configuration methods may be added:
- OSD superblock which will, by design, contain parameters related to the disk
layout and to one specific OSD.
- OSD-specific keys in etcd like `/vitastor/config/osd/<number>`.
## Parameter Reference

View File

@@ -19,14 +19,17 @@
- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
задаваться там, кроме, естественно, самих параметров соединения с etcd,
которые должны задаваться в файле конфигурации
- В командной строке компонентов Vitastor: OSD, монитора, опциях fio и QEMU,
настроек OpenStack, Proxmox и т.п. Последние, как правило, не включают полный
набор параметров напрямую, но разрешают определить путь к файлу конфигурации
и задать любые параметры в нём.
- В командной строке компонентов Vitastor: OSD (при ручном запуске без vitastor-disk),
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
как правило, не включают полный набор параметров напрямую, но позволяют
определить путь к файлу конфигурации и задать любые параметры в нём.
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
при запуске OSD эти параметры автоматически передаются в командную строку
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
командной строки OSD.
В будущем также могут быть добавлены другие способы конфигурации:
- Суперблок OSD, в котором будут храниться параметры OSD, связанные с дисковым
форматом и с этим конкретным OSD.
- OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`.
## Список параметров

View File

@@ -25,11 +25,16 @@ running if required parameters are specified.
## etcd_address
- Type: string or array of strings
- Can be changed online: yes
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Note that https is not supported for etcd connections yet.
etcd connection endpoints can be changed online by updating global
configuration in etcd itself - this allows to switch the cluster to new
etcd addresses without downtime.
## etcd_prefix
- Type: string
@@ -42,5 +47,6 @@ example, use a single etcd cluster for multiple Vitastor clusters.
- Type: integer
- Default: 0
- Can be changed online: yes
Log level. Raise if you want more verbose output.

View File

@@ -24,10 +24,14 @@
## etcd_address
- Тип: строка или массив строк
- Можно менять на лету: да
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
## etcd_prefix
- Тип: строка
@@ -41,5 +45,6 @@
- Тип: целое число
- Значение по умолчанию: 0
- Можно менять на лету: да
Уровень логгирования. Повысьте, если хотите более подробный вывод.

View File

@@ -19,6 +19,7 @@ between clients, OSDs and etcd.
- [rdma_max_sge](#rdma_max_sge)
- [rdma_max_msg](#rdma_max_msg)
- [rdma_max_recv](#rdma_max_recv)
- [rdma_max_send](#rdma_max_send)
- [peer_connect_interval](#peer_connect_interval)
- [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout)
@@ -74,6 +75,12 @@ to work. For example, Mellanox ConnectX-3 and older adapters don't have
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
root to list available RDMA devices and their features.
Remember that you also have to configure your network switches if you use
RoCE/RoCEv2, otherwise you may experience unstable performance. Refer to
the manual of your network vendor for details about setting up the switch
for RoCEv2 correctly. Usually it means setting up Lossless Ethernet with
PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
## rdma_port_num
- Type: integer
@@ -116,26 +123,37 @@ required to change this parameter.
## rdma_max_msg
- Type: integer
- Default: 1048576
- Default: 132096
Maximum size of a single RDMA send or receive operation in bytes.
## rdma_max_recv
- Type: integer
- Default: 16
Maximum number of RDMA receive buffers per connection (RDMA requires
preallocated buffers to receive data). Each buffer is `rdma_max_msg` bytes
in size. So this setting directly affects memory usage: a single Vitastor
RDMA client uses `rdma_max_recv * rdma_max_msg * OSD_COUNT` bytes of memory.
Default is roughly 2 MB * number of OSDs.
## rdma_max_send
- Type: integer
- Default: 8
Maximum number of parallel RDMA receive operations. Note that this number
of receive buffers `rdma_max_msg` in size are allocated for each client,
so this setting actually affects memory usage. This is because RDMA receive
operations are (sadly) still not zero-copy in Vitastor. It may be fixed in
later versions.
Maximum number of outstanding RDMA send operations per connection. Should be
less than `rdma_max_recv` so the receiving side doesn't run out of buffers.
Doesn't affect memory usage - additional memory isn't allocated for send
operations.
## peer_connect_interval
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Interval before attempting to reconnect to an unavailable OSD.
@@ -144,6 +162,7 @@ Interval before attempting to reconnect to an unavailable OSD.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Timeout for OSD connection attempts.
@@ -152,6 +171,7 @@ Timeout for OSD connection attempts.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
OSD connection inactivity time after which clients and other OSDs send
keepalive requests to check state of the connection.
@@ -161,6 +181,7 @@ keepalive requests to check state of the connection.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt
@@ -171,6 +192,7 @@ is scheduled.
- Type: milliseconds
- Default: 500
- Minimum: 50
- Can be changed online: yes
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
@@ -180,6 +202,7 @@ the time for the clients to wait before re-attempting such I/O requests.
- Type: integer
- Default: 5
- Can be changed online: yes
Maximum number of attempts for etcd requests which can't be retried
indefinitely.
@@ -188,6 +211,7 @@ indefinitely.
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Timeout for etcd requests which should complete quickly, like lease refresh.
@@ -195,6 +219,7 @@ Timeout for etcd requests which should complete quickly, like lease refresh.
- Type: milliseconds
- Default: 5000
- Can be changed online: yes
Timeout for etcd requests which are allowed to wait for some time.
@@ -202,6 +227,7 @@ Timeout for etcd requests which are allowed to wait for some time.
- Type: seconds
- Default: max(30, etcd_report_interval*2)
- Can be changed online: yes
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
etcd_report_interval to guarantee that keepalive actually works.
@@ -210,6 +236,7 @@ etcd_report_interval to guarantee that keepalive actually works.
- Type: seconds
- Default: 30
- Can be changed online: yes
etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly.
@@ -218,6 +245,7 @@ detect disconnections quickly.
- Type: integer
- Default: 33554432
- Can be changed online: yes
Without immediate_commit=all this parameter sets the limit of "dirty"
(not committed by fsync) data allowed by the client before forcing an

View File

@@ -19,6 +19,7 @@
- [rdma_max_sge](#rdma_max_sge)
- [rdma_max_msg](#rdma_max_msg)
- [rdma_max_recv](#rdma_max_recv)
- [rdma_max_send](#rdma_max_send)
- [peer_connect_interval](#peer_connect_interval)
- [peer_connect_timeout](#peer_connect_timeout)
- [osd_idle_timeout](#osd_idle_timeout)
@@ -78,6 +79,13 @@ Implicit On-Demand Paging (Implicit ODP) и Scatter/Gather (SG). Наприме
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
параметры и возможности.
Обратите внимание, что если вы используете RoCE/RoCEv2, вам также необходимо
правильно настроить для него коммутаторы, иначе вы можете столкнуться с
нестабильной производительностью. Подробную информацию о настройке
коммутатора для RoCEv2 ищите в документации производителя. Обычно это
подразумевает настройку сети без потерь на основе PFC (Priority Flow
Control) и ECN (Explicit Congestion Notification).
## rdma_port_num
- Тип: целое число
@@ -121,28 +129,39 @@ OSD в любом случае согласовывают реальное зн
## rdma_max_msg
- Тип: целое число
- Значение по умолчанию: 1048576
- Значение по умолчанию: 132096
Максимальный размер одной RDMA-операции отправки или приёма.
## rdma_max_recv
- Тип: целое число
- Значение по умолчанию: 16
Максимальное число буферов для RDMA-приёма данных на одно соединение
(RDMA требует заранее выделенных буферов для приёма данных). Каждый буфер
имеет размер `rdma_max_msg` байт. Таким образом, настройка прямо влияет на
потребление памяти - один Vitastor-клиент с RDMA использует
`rdma_max_recv * rdma_max_msg * ЧИСЛО_OSD` байт памяти, по умолчанию -
примерно 2 МБ * число OSD.
## rdma_max_send
- Тип: целое число
- Значение по умолчанию: 8
Максимальное число параллельных RDMA-операций получения данных. Следует
иметь в виду, что данное число буферов размером `rdma_max_msg` выделяется
для каждого подключённого клиентского соединения, так что данная настройка
влияет на потребление памяти. Это так потому, что RDMA-приём данных в
Vitastor, увы, всё равно не является zero-copy, т.е. всё равно 1 раз
копирует данные в памяти. Данная особенность, возможно, будет исправлена в
более новых версиях Vitastor.
Максимальное число RDMA-операций отправки, отправляемых в очередь одного
соединения. Желательно, чтобы оно было меньше `rdma_max_recv`, чтобы
у принимающей стороны в процессе работы не заканчивались буферы на приём.
Не влияет на потребление памяти - дополнительная память на операции отправки
не выделяется.
## peer_connect_interval
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Время ожидания перед повторной попыткой соединиться с недоступным OSD.
@@ -151,6 +170,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Максимальное время ожидания попытки соединения с OSD.
@@ -159,6 +179,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Время неактивности соединения с OSD, после которого клиенты или другие OSD
посылают запрос проверки состояния соединения.
@@ -168,6 +189,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Максимальное время ожидания ответа на запрос проверки состояния соединения.
Если OSD не отвечает за это время, соединение отключается и производится
@@ -178,6 +200,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: миллисекунды
- Значение по умолчанию: 500
- Минимальное значение: 50
- Можно менять на лету: да
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
@@ -189,6 +212,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: целое число
- Значение по умолчанию: 5
- Можно менять на лету: да
Максимальное число попыток выполнения запросов к etcd для тех запросов,
которые нельзя повторять бесконечно.
@@ -197,6 +221,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Максимальное время выполнения запросов к etcd, которые должны завершаться
быстро, таких, как обновление резервации (lease).
@@ -205,6 +230,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: миллисекунды
- Значение по умолчанию: 5000
- Можно менять на лету: да
Максимальное время выполнения запросов к etcd, для которых не обязательно
гарантировать быстрое выполнение.
@@ -213,6 +239,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
- Тип: секунды
- Значение по умолчанию: max(30, etcd_report_interval*2)
- Можно менять на лету: да
Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
etcd_report_interval, чтобы keepalive гарантированно работал.
@@ -221,6 +248,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Тип: секунды
- Значение по умолчанию: 30
- Можно менять на лету: да
Интервал проверки живости вебсокет-подключений к etcd.
@@ -228,6 +256,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Тип: целое число
- Значение по умолчанию: 33554432
- Можно менять на лету: да
При работе без immediate_commit=all - это лимит объёма "грязных" (не
зафиксированных fsync-ом) данных, при достижении которого клиент будет

View File

@@ -7,7 +7,8 @@
# Runtime OSD Parameters
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
initialization and can be changed with an OSD restart.
initialization and can be changed - either with an OSD restart or, for some of
them, even without restarting by updating configuration in etcd.
- [etcd_report_interval](#etcd_report_interval)
- [run_primary](#run_primary)
@@ -17,6 +18,7 @@ initialization and can be changed with an OSD restart.
- [autosync_interval](#autosync_interval)
- [autosync_writes](#autosync_writes)
- [recovery_queue_depth](#recovery_queue_depth)
- [recovery_pg_switch](#recovery_pg_switch)
- [recovery_sync_batch](#recovery_sync_batch)
- [readonly](#readonly)
- [no_recovery](#no_recovery)
@@ -37,6 +39,14 @@ initialization and can be changed with an OSD restart.
- [throttle_target_parallelism](#throttle_target_parallelism)
- [throttle_threshold_us](#throttle_threshold_us)
- [osd_memlock](#osd_memlock)
- [auto_scrub](#auto_scrub)
- [no_scrub](#no_scrub)
- [scrub_interval](#scrub_interval)
- [scrub_queue_depth](#scrub_queue_depth)
- [scrub_sleep](#scrub_sleep)
- [scrub_list_limit](#scrub_list_limit)
- [scrub_find_best](#scrub_find_best)
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
## etcd_report_interval
@@ -90,6 +100,7 @@ OSD by hand.
- Type: seconds
- Default: 5
- Can be changed online: yes
Time interval at which automatic fsyncs/flushes are issued by each OSD when
the immediate_commit mode if disabled. fsyncs are required because without
@@ -102,6 +113,7 @@ issue fsyncs at all.
- Type: integer
- Default: 128
- Can be changed online: yes
Same as autosync_interval, but sets the maximum number of uncommitted write
operations before issuing an fsync operation internally.
@@ -110,15 +122,28 @@ operations before issuing an fsync operation internally.
- Type: integer
- Default: 4
- Can be changed online: yes
Maximum recovery operations per one primary OSD at any given moment of time.
Currently it's the only parameter available to tune the speed or recovery
and rebalancing, but it's planned to implement more.
## recovery_pg_switch
- Type: integer
- Default: 128
- Can be changed online: yes
Number of recovery operations before switching to recovery of the next PG.
The idea is to mix all PGs during recovery for more even space and load
distribution but still benefit from recovery queue depth greater than 1.
Degraded PGs are anyway scanned first.
## recovery_sync_batch
- Type: integer
- Default: 16
- Can be changed online: yes
Maximum number of recovery operations before issuing an additional fsync.
@@ -134,6 +159,7 @@ the underlying device. This may be useful for recovery purposes.
- Type: boolean
- Default: false
- Can be changed online: yes
Disable automatic background recovery of objects. Note that it doesn't
affect implicit recovery of objects happening during writes - a write is
@@ -143,6 +169,7 @@ always made to a full set of at least pg_minsize OSDs.
- Type: boolean
- Default: false
- Can be changed online: yes
Disable background movement of data between different OSDs. Disabling it
means that PGs in the `has_misplaced` state will be left in it indefinitely.
@@ -151,6 +178,7 @@ means that PGs in the `has_misplaced` state will be left in it indefinitely.
- Type: seconds
- Default: 3
- Can be changed online: yes
Time interval at which OSDs print simple human-readable operation
statistics on stdout.
@@ -159,6 +187,7 @@ statistics on stdout.
- Type: seconds
- Default: 10
- Can be changed online: yes
Time interval at which OSDs dump slow or stuck operations on stdout, if
they're any. Also it's the time after which an operation is considered
@@ -168,6 +197,7 @@ they're any. Also it's the time after which an operation is considered
- Type: seconds
- Default: 60
- Can be changed online: yes
Number of seconds after which a deleted inode is removed from OSD statistics.
@@ -175,6 +205,7 @@ Number of seconds after which a deleted inode is removed from OSD statistics.
- Type: integer
- Default: 128
- Can be changed online: yes
Parallel client write operation limit per one OSD. Operations that exceed
this limit are pushed to a temporary queue instead of being executed
@@ -184,6 +215,7 @@ immediately.
- Type: integer
- Default: 1
- Can be changed online: yes
Flusher is a micro-thread that moves data from the journal to the data
area of the device. Their number is auto-tuned between minimum and maximum.
@@ -193,6 +225,7 @@ Minimum number is set by this parameter.
- Type: integer
- Default: 256
- Can be changed online: yes
Maximum number of journal flushers (see above min_flusher_count).
@@ -249,6 +282,7 @@ Most (99%) other SSDs don't need this option.
- Type: boolean
- Default: false
- Can be changed online: yes
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
with fast journal/metadata devices and slow data devices. The idea is that
@@ -266,6 +300,7 @@ fills up.
- Type: integer
- Default: 100
- Can be changed online: yes
Target maximum number of throttled operations per second under the condition
of full journal. Set it to approximate random write iops of your data devices
@@ -275,6 +310,7 @@ of full journal. Set it to approximate random write iops of your data devices
- Type: integer
- Default: 100
- Can be changed online: yes
Target maximum bandwidth in MB/s of throttled operations per second under
the condition of full journal. Set it to approximate linear write
@@ -284,6 +320,7 @@ performance of your data devices (HDDs).
- Type: integer
- Default: 1
- Can be changed online: yes
Target maximum parallelism of throttled operations under the condition of
full journal. Set it to approximate internal parallelism of your data
@@ -293,6 +330,7 @@ devices (1 for HDDs, 4-8 for SSDs).
- Type: microseconds
- Default: 50
- Can be changed online: yes
Minimal computed delay to be applied to throttled operations. Usually
doesn't need to be changed.
@@ -302,4 +340,103 @@ doesn't need to be changed.
- Type: boolean
- Default: false
Lock all OSD memory to prevent it from being unloaded into swap with mlockall(). Requires sufficient ulimit -l (max locked memory).
Lock all OSD memory to prevent it from being unloaded into swap with
mlockall(). Requires sufficient ulimit -l (max locked memory).
## auto_scrub
- Type: boolean
- Default: false
- Can be changed online: yes
Data scrubbing is the process of background verification of copies to find
and repair corrupted blocks. It's not run automatically by default since
it's a new feature. Set this parameter to true to enable automatic scrubs.
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
every `scrub_interval` (see below). You can also start/schedule scrubbing
manually by setting `next_scrub` JSON key to the desired UNIX time of the
next scrub in `/pg/history/...` values in etcd.
## no_scrub
- Type: boolean
- Default: false
- Can be changed online: yes
Temporarily disable scrubbing and stop running scrubs.
## scrub_interval
- Type: string
- Default: 30d
- Can be changed online: yes
Default automatic scrubbing interval for all pools. Numbers without suffix
are treated as seconds, possible unit suffixes include 's' (seconds),
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
## scrub_queue_depth
- Type: integer
- Default: 1
- Can be changed online: yes
Number of parallel scrubbing operations per one OSD.
## scrub_sleep
- Type: milliseconds
- Default: 0
- Can be changed online: yes
Additional interval between two consecutive scrubbing operations on one OSD.
Can be used to slow down scrubbing if it affects user load too much.
## scrub_list_limit
- Type: integer
- Default: 1000
- Can be changed online: yes
Number of objects to list in one listing operation during scrub.
## scrub_find_best
- Type: boolean
- Default: true
- Can be changed online: yes
Find and automatically restore best versions of objects with unmatched
copies. In replicated setups, the best version is the version with most
matching replicas. In EC setups, the best version is the subset of data
and parity chunks without mismatches.
The hypothetical situation where you might want to disable it is when
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
corrupt an object in the same way (for example, zero it out) and only
1 HDD will remain good. In this case disabling scrub_find_best may help
you to recover the data! See also scrub_ec_max_bruteforce below.
## scrub_ec_max_bruteforce
- Type: integer
- Default: 100
- Can be changed online: yes
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
chunk by brute-forcing all possible error locations. This configuration
value limits the maximum number of checked combinations. You can try to
increase it if you have EC N+K setup with N and K large enough for
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
than the default 100.
If there are too many possible combinations or if multiple combinations give
correct results then objects are marked inconsistent and aren't recovered
automatically.
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
the variant with most available equal copies is correct. For example, if
you have 3 replicas and 1 of them differs, this one is considered to be
corrupted. But if there is no "best" version with more copies than all
others have then the object is also marked as inconsistent.

View File

@@ -8,7 +8,8 @@
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
момент с перезапуском OSD.
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
изменения конфигурации в etcd.
- [etcd_report_interval](#etcd_report_interval)
- [run_primary](#run_primary)
@@ -18,6 +19,7 @@
- [autosync_interval](#autosync_interval)
- [autosync_writes](#autosync_writes)
- [recovery_queue_depth](#recovery_queue_depth)
- [recovery_pg_switch](#recovery_pg_switch)
- [recovery_sync_batch](#recovery_sync_batch)
- [readonly](#readonly)
- [no_recovery](#no_recovery)
@@ -38,6 +40,14 @@
- [throttle_target_parallelism](#throttle_target_parallelism)
- [throttle_threshold_us](#throttle_threshold_us)
- [osd_memlock](#osd_memlock)
- [auto_scrub](#auto_scrub)
- [no_scrub](#no_scrub)
- [scrub_interval](#scrub_interval)
- [scrub_queue_depth](#scrub_queue_depth)
- [scrub_sleep](#scrub_sleep)
- [scrub_list_limit](#scrub_list_limit)
- [scrub_find_best](#scrub_find_best)
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
## etcd_report_interval
@@ -92,6 +102,7 @@ RUNNING), подходящий под заданную маску. Также н
- Тип: секунды
- Значение по умолчанию: 5
- Можно менять на лету: да
Временной интервал отправки автоматических fsync-ов (операций очистки кэша)
каждым OSD для случая, когда режим immediate_commit отключён. fsync-и нужны
@@ -104,6 +115,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Аналогично autosync_interval, но задаёт не временной интервал, а
максимальное количество незафиксированных операций записи перед
@@ -113,16 +125,30 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 4
- Можно менять на лету: да
Максимальное число операций восстановления на одном первичном OSD в любой
момент времени. На данный момент единственный параметр, который можно менять
для ускорения или замедления восстановления и перебалансировки данных, но
в планах реализация других параметров.
## recovery_pg_switch
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Число операций восстановления перед переключением на восстановление другой PG.
Идея заключается в том, чтобы восстанавливать все PG одновременно для более
равномерного распределения места и нагрузки, но при этом всё равно выигрывать
от глубины очереди восстановления, большей, чем 1. Деградированные PG в любом
случае сканируются первыми.
## recovery_sync_batch
- Тип: целое число
- Значение по умолчанию: 16
- Можно менять на лету: да
Максимальное число операций восстановления перед дополнительным fsync.
@@ -138,6 +164,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Отключить автоматическое фоновое восстановление объектов. Обратите внимание,
что эта опция не отключает восстановление объектов, происходящее при
@@ -148,6 +175,7 @@ OSD.
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Отключить фоновое перемещение объектов между разными OSD. Отключение
означает, что PG, находящиеся в состоянии `has_misplaced`, будут оставлены
@@ -157,6 +185,7 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 3
- Можно менять на лету: да
Временной интервал, с которым OSD печатают простую человекочитаемую
статистику выполнения операций в стандартный вывод.
@@ -165,6 +194,7 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 10
- Можно менять на лету: да
Временной интервал, с которым OSD выводят в стандартный вывод список
медленных или зависших операций, если таковые имеются. Также время, при
@@ -174,6 +204,7 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 60
- Можно менять на лету: да
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
@@ -181,6 +212,7 @@ OSD.
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Максимальное число одновременных клиентских операций записи на один OSD.
Операции, превышающие этот лимит, не исполняются сразу, а сохраняются во
@@ -190,6 +222,7 @@ OSD.
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Flusher - это микро-поток (корутина), которая копирует данные из журнала в
основную область устройства данных. Их число настраивается динамически между
@@ -199,6 +232,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: целое число
- Значение по умолчанию: 256
- Можно менять на лету: да
Максимальное число микро-потоков очистки журнала (см. выше min_flusher_count).
@@ -258,6 +292,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Разрешить мягкое ограничение скорости журналируемой записи. Полезно для
гибридных OSD с быстрыми устройствами метаданных и медленными устройствами
@@ -276,6 +311,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Расчётное максимальное число ограничиваемых операций в секунду при условии
отсутствия свободного места в журнале. Устанавливайте приблизительно равным
@@ -286,6 +322,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Расчётный максимальный размер в МБ/с ограничиваемых операций в секунду при
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@@ -296,6 +333,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Расчётный максимальный параллелизм ограничиваемых операций в секунду при
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@@ -306,6 +344,7 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: микросекунды
- Значение по умолчанию: 50
- Можно менять на лету: да
Минимальная применимая к ограничиваемым операциям задержка. Обычно не
требует изменений.
@@ -315,4 +354,113 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: булево (да/нет)
- Значение по умолчанию: false
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку в пространство подкачки. Требует достаточного значения ulimit -l (лимита заблокированной памяти).
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
заблокированной памяти).
## auto_scrub
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
запускаются автоматически, так как являются новой функцией. Чтобы включить
автоматическое планирование скрабов, установите данный параметр в true.
Включённый параметр заставляет OSD автоматически планировать фоновую
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
запустить или запланировать проверку вручную, установив значение ключа JSON
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
желаемой проверки.
## no_scrub
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Временно отключить и остановить запущенные скрабы.
## scrub_interval
- Тип: строка
- Значение по умолчанию: 30d
- Можно менять на лету: да
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
Значения без указанной единицы измерения считаются в секундах, допустимые
символы единиц измерения в конце: 's' (секунды),
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
## scrub_queue_depth
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Число параллельных операций фоновой проверки на один OSD.
## scrub_sleep
- Тип: миллисекунды
- Значение по умолчанию: 0
- Можно менять на лету: да
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
одном OSD. Может использоваться для замедления скраба, если он слишком
сильно влияет на пользовательскую нагрузку.
## scrub_list_limit
- Тип: целое число
- Значение по умолчанию: 1000
- Можно менять на лету: да
Размер загружаемых за одну операцию списков объектов в процессе фоновой
проверки.
## scrub_find_best
- Тип: булево (да/нет)
- Значение по умолчанию: true
- Можно менять на лету: да
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
незаметно и одинаково повредить данные одного и того же объекта, например,
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
отключение этого параметра поможет вам восстановить данные! Смотрите также
описание следующего параметра - scrub_ec_max_bruteforce.
## scrub_ec_max_bruteforce
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Vitastor старается определить повреждённые части объектов при использовании
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
его, если используете схему кодирования EC N+K с N и K, достаточно большими
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
было больше, чем стандартное значение 100.
Если возможных комбинаций слишком много или если корректная комбинаций не
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
и не восстанавливаются автоматически.
При использовании репликации перебор не нужен, Vitastor просто предполагает,
что вариант объекта с наибольшим количеством одинаковых копий корректен.
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
считается некорректной. Однако, если "лучшую" версию с числом доступных
копий большим, чем у всех других версий, найти невозможно, то объект тоже
маркируется неконсистентным.

View File

@@ -40,6 +40,7 @@ Parameters:
- [root_node](#root_node)
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
Examples:
@@ -272,6 +273,13 @@ Specifies OSD tags to prefer putting primary OSDs in this pool to.
Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one
of the OSDs containing a data chunk for a PG.
## scrub_interval
- Type: time interval (number + unit s/m/h/d/M/y)
Automatic scrubbing interval for this pool. Overrides
[global scrub_interval setting](osd.en.md#scrub_interval).
# Examples
## Replicated pool

View File

@@ -39,6 +39,7 @@
- [root_node](#root_node)
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
Примеры:
@@ -276,6 +277,13 @@ PG в Vitastor эферемерны, то есть вы можете менят
для PG этого пула. Имейте в виду, что для EC-пулов Vitastor также всегда
предпочитает помещать первичный OSD на один из OSD с данными, а не с чётностью.
## scrub_interval
- Тип: временной интервал (число + единица измерения s/m/h/d/M/y)
Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
# Примеры
## Реплицированный пул

View File

@@ -11,13 +11,21 @@
- name: etcd_address
type: string or array of strings
type_ru: строка или массив строк
online: true
info: |
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Note that https is not supported for etcd connections yet.
etcd connection endpoints can be changed online by updating global
configuration in etcd itself - this allows to switch the cluster to new
etcd addresses without downtime.
info_ru: |
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
- name: etcd_prefix
type: string
default: "/vitastor"
@@ -31,5 +39,6 @@
- name: log_level
type: int
default: 0
online: true
info: Log level. Raise if you want more verbose output.
info_ru: Уровень логгирования. Повысьте, если хотите более подробный вывод.

145
docs/config/src/include.js Executable file
View File

@@ -0,0 +1,145 @@
#!/usr/bin/nodejs
const fsp = require('fs').promises;
run(process.argv).catch(console.error);
async function run(argv)
{
if (argv.length < 3)
{
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
return;
}
const index_file = await fsp.realpath(argv[2]);
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
text = text.split(re);
let included = {};
let heading = 0, heading_name = '', m;
for (let i = 0; i < text.length; i++)
{
if (text[i].substr(0, 2) == '{{')
{
// Inclusion
let incfile = text[i].substr(2, text[i].length-4);
let section = null;
let indent = heading;
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
let inc_heading = section;
incfile = rel2abs(index_file, incfile);
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
inc = inc.split(re);
const indent_str = new Array(indent+1).join('#');
let section_start = -1, section_end = -1;
for (let j = 0; j < inc.length; j++)
{
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
{
if (!inc_heading)
{
inc_heading = m[3].trim();
}
if (section)
{
if (m[3].trim() == section)
section_start = j;
else if (section_start >= 0)
{
section_end = j;
break;
}
}
inc[j] = m[1] + indent_str + m[2] + m[3];
}
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const abs_m2 = rel2abs(incfile, m[2]);
const rel_m = abs2rel(__filename, abs_m2);
if (rel_m.substr(0, 9) == '../../../') // outside docs
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
else
inc[j] = m[1] + abs_m2 + m[3];
}
}
if (section)
{
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
}
if (inc.length)
{
if (!inc_heading)
inc_heading = heading_name||'';
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
inc[0] = inc[0].replace(/^\s+/, '');
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
}
text.splice(i, 1, ...inc);
i = i + inc.length - 1;
}
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
{
// Heading
heading = m[1].length;
heading_name = m[2].trim();
}
}
for (let i = 0; i < text.length; i++)
{
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const p = m[2].indexOf('#');
if (included[m[2]])
{
text[i] = m[1]+included[m[2]]+m[3];
}
else if (p >= 0 && included[m[2].substr(0, p)])
{
text[i] = m[1]+m[2].substr(p)+m[3];
}
}
}
console.log(text.join(''));
}
function rel2abs(ref, rel)
{
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
return killdots(rel).join('/');
}
function abs2rel(ref, abs)
{
ref = ref.split(/\/+/);
abs = abs.split(/\/+/);
while (ref.length > 1 && ref[0] == abs[0])
{
ref.shift();
abs.shift();
}
for (let i = 1; i < ref.length; i++)
{
abs.unshift('..');
}
return killdots(abs).join('/');
}
function killdots(rel)
{
for (let i = 0; i < rel.length; i++)
{
if (rel[i] == '.')
{
rel.splice(i, 1);
i--;
}
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
{
rel.splice(i-1, 2);
i -= 2;
}
}
return rel;
}

View File

@@ -0,0 +1,65 @@
# Vitastor
{{../../../README.md#The Idea}}
{{../../../README.md#Talks and presentations}}
{{../../intro/features.en.md}}
{{../../intro/quickstart.en.md}}
{{../../intro/architecture.en.md}}
## Installation
{{../../installation/packages.en.md}}
{{../../installation/proxmox.en.md}}
{{../../installation/openstack.en.md}}
{{../../installation/kubernetes.en.md}}
{{../../installation/source.en.md}}
{{../../config.en.md|indent=1}}
{{../../config/common.en.md|indent=2}}
{{../../config/network.en.md|indent=2}}
{{../../config/layout-cluster.en.md|indent=2}}
{{../../config/layout-osd.en.md|indent=2}}
{{../../config/osd.en.md|indent=2}}
{{../../config/monitor.en.md|indent=2}}
{{../../config/pool.en.md|indent=2}}
{{../../config/inode.en.md|indent=2}}
## Usage
{{../../usage/cli.en.md}}
{{../../usage/disk.en.md}}
{{../../usage/fio.en.md}}
{{../../usage/nbd.en.md}}
{{../../usage/qemu.en.md}}
{{../../usage/nfs.en.md}}
## Performance
{{../../performance/understanding.en.md}}
{{../../performance/theoretical.en.md}}
{{../../performance/comparison1.en.md}}
{{../../intro/author.en.md|indent=1}}

View File

@@ -0,0 +1,65 @@
# Vitastor
{{../../../README-ru.md#Идея|indent=0}}
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
{{../../intro/features.ru.md}}
{{../../intro/quickstart.ru.md}}
{{../../intro/architecture.ru.md}}
## Установка
{{../../installation/packages.ru.md}}
{{../../installation/proxmox.ru.md}}
{{../../installation/openstack.ru.md}}
{{../../installation/kubernetes.ru.md}}
{{../../installation/source.ru.md}}
{{../../config.ru.md|indent=1}}
{{../../config/common.ru.md|indent=2}}
{{../../config/network.ru.md|indent=2}}
{{../../config/layout-cluster.ru.md|indent=2}}
{{../../config/layout-osd.ru.md|indent=2}}
{{../../config/osd.ru.md|indent=2}}
{{../../config/monitor.ru.md|indent=2}}
{{../../config/pool.ru.md|indent=2}}
{{../../config/inode.ru.md|indent=2}}
## Использование
{{../../usage/cli.ru.md}}
{{../../usage/disk.ru.md}}
{{../../usage/fio.ru.md}}
{{../../usage/nbd.ru.md}}
{{../../usage/qemu.ru.md}}
{{../../usage/nfs.ru.md}}
## Производительность
{{../../performance/understanding.ru.md}}
{{../../performance/theoretical.ru.md}}
{{../../performance/comparison1.ru.md}}
{{../../intro/author.ru.md|indent=1}}

View File

@@ -14,6 +14,7 @@ const L = {
toc_config: '[Configuration](../config.en.md)',
toc_usage: 'Usage',
toc_performance: 'Performance',
online: 'Can be changed online: yes',
},
ru: {
Documentation: 'Документация',
@@ -28,6 +29,7 @@ const L = {
toc_config: '[Конфигурация](../config.ru.md)',
toc_usage: 'Использование',
toc_performance: 'Производительность',
online: 'Можно менять на лету: да',
},
};
const types = {
@@ -70,6 +72,8 @@ for (const file of params_files)
out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`;
if (c.min !== undefined)
out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`;
if (c.online)
out += `- ${L[lang]['online'] || 'Can be changed online: yes'}\n`;
out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, '');
}
const head = fs.readFileSync(__dirname+'/'+file+'.'+lang+'.md', { encoding: 'utf-8' });

View File

@@ -53,6 +53,12 @@
to work. For example, Mellanox ConnectX-3 and older adapters don't have
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
root to list available RDMA devices and their features.
Remember that you also have to configure your network switches if you use
RoCE/RoCEv2, otherwise you may experience unstable performance. Refer to
the manual of your network vendor for details about setting up the switch
for RoCEv2 correctly. Usually it means setting up Lossless Ethernet with
PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
info_ru: |
Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
Имейте в виду, что поддержка RDMA в Vitastor требует функций устройства
@@ -61,6 +67,13 @@
потому не поддерживаются в Vitastor. Запустите `ibv_devinfo -v` от имени
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
параметры и возможности.
Обратите внимание, что если вы используете RoCE/RoCEv2, вам также необходимо
правильно настроить для него коммутаторы, иначе вы можете столкнуться с
нестабильной производительностью. Подробную информацию о настройке
коммутатора для RoCEv2 ищите в документации производителя. Обычно это
подразумевает настройку сети без потерь на основе PFC (Priority Flow
Control) и ECN (Explicit Congestion Notification).
- name: rdma_port_num
type: int
default: 1
@@ -114,42 +127,58 @@
так что менять этот параметр обычно не нужно.
- name: rdma_max_msg
type: int
default: 1048576
default: 132096
info: Maximum size of a single RDMA send or receive operation in bytes.
info_ru: Максимальный размер одной RDMA-операции отправки или приёма.
- name: rdma_max_recv
type: int
default: 16
info: |
Maximum number of RDMA receive buffers per connection (RDMA requires
preallocated buffers to receive data). Each buffer is `rdma_max_msg` bytes
in size. So this setting directly affects memory usage: a single Vitastor
RDMA client uses `rdma_max_recv * rdma_max_msg * OSD_COUNT` bytes of memory.
Default is roughly 2 MB * number of OSDs.
info_ru: |
Максимальное число буферов для RDMA-приёма данных на одно соединение
(RDMA требует заранее выделенных буферов для приёма данных). Каждый буфер
имеет размер `rdma_max_msg` байт. Таким образом, настройка прямо влияет на
потребление памяти - один Vitastor-клиент с RDMA использует
`rdma_max_recv * rdma_max_msg * ЧИСЛО_OSD` байт памяти, по умолчанию -
примерно 2 МБ * число OSD.
- name: rdma_max_send
type: int
default: 8
info: |
Maximum number of parallel RDMA receive operations. Note that this number
of receive buffers `rdma_max_msg` in size are allocated for each client,
so this setting actually affects memory usage. This is because RDMA receive
operations are (sadly) still not zero-copy in Vitastor. It may be fixed in
later versions.
Maximum number of outstanding RDMA send operations per connection. Should be
less than `rdma_max_recv` so the receiving side doesn't run out of buffers.
Doesn't affect memory usage - additional memory isn't allocated for send
operations.
info_ru: |
Максимальное число параллельных RDMA-операций получения данных. Следует
иметь в виду, что данное число буферов размером `rdma_max_msg` выделяется
для каждого подключённого клиентского соединения, так что данная настройка
влияет на потребление памяти. Это так потому, что RDMA-приём данных в
Vitastor, увы, всё равно не является zero-copy, т.е. всё равно 1 раз
копирует данные в памяти. Данная особенность, возможно, будет исправлена в
более новых версиях Vitastor.
Максимальное число RDMA-операций отправки, отправляемых в очередь одного
соединения. Желательно, чтобы оно было меньше `rdma_max_recv`, чтобы
у принимающей стороны в процессе работы не заканчивались буферы на приём.
Не влияет на потребление памяти - дополнительная память на операции отправки
не выделяется.
- name: peer_connect_interval
type: sec
min: 1
default: 5
online: true
info: Interval before attempting to reconnect to an unavailable OSD.
info_ru: Время ожидания перед повторной попыткой соединиться с недоступным OSD.
- name: peer_connect_timeout
type: sec
min: 1
default: 5
online: true
info: Timeout for OSD connection attempts.
info_ru: Максимальное время ожидания попытки соединения с OSD.
- name: osd_idle_timeout
type: sec
min: 1
default: 5
online: true
info: |
OSD connection inactivity time after which clients and other OSDs send
keepalive requests to check state of the connection.
@@ -160,6 +189,7 @@
type: sec
min: 1
default: 5
online: true
info: |
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt
@@ -172,6 +202,7 @@
type: ms
min: 50
default: 500
online: true
info: |
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
@@ -185,6 +216,7 @@
- name: max_etcd_attempts
type: int
default: 5
online: true
info: |
Maximum number of attempts for etcd requests which can't be retried
indefinitely.
@@ -194,6 +226,7 @@
- name: etcd_quick_timeout
type: ms
default: 1000
online: true
info: |
Timeout for etcd requests which should complete quickly, like lease refresh.
info_ru: |
@@ -202,6 +235,7 @@
- name: etcd_slow_timeout
type: ms
default: 5000
online: true
info: Timeout for etcd requests which are allowed to wait for some time.
info_ru: |
Максимальное время выполнения запросов к etcd, для которых не обязательно
@@ -209,6 +243,7 @@
- name: etcd_keepalive_timeout
type: sec
default: max(30, etcd_report_interval*2)
online: true
info: |
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
etcd_report_interval to guarantee that keepalive actually works.
@@ -218,6 +253,7 @@
- name: etcd_ws_keepalive_timeout
type: sec
default: 30
online: true
info: |
etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly.
@@ -226,6 +262,7 @@
- name: client_dirty_limit
type: int
default: 33554432
online: true
info: |
Without immediate_commit=all this parameter sets the limit of "dirty"
(not committed by fsync) data allowed by the client before forcing an

View File

@@ -1,4 +1,5 @@
# Runtime OSD Parameters
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
initialization and can be changed with an OSD restart.
initialization and can be changed - either with an OSD restart or, for some of
them, even without restarting by updating configuration in etcd.

View File

@@ -2,4 +2,5 @@
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
момент с перезапуском OSD.
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
изменения конфигурации в etcd.

View File

@@ -66,6 +66,7 @@
- name: autosync_interval
type: sec
default: 5
online: true
info: |
Time interval at which automatic fsyncs/flushes are issued by each OSD when
the immediate_commit mode if disabled. fsyncs are required because without
@@ -83,6 +84,7 @@
- name: autosync_writes
type: int
default: 128
online: true
info: |
Same as autosync_interval, but sets the maximum number of uncommitted write
operations before issuing an fsync operation internally.
@@ -93,6 +95,7 @@
- name: recovery_queue_depth
type: int
default: 4
online: true
info: |
Maximum recovery operations per one primary OSD at any given moment of time.
Currently it's the only parameter available to tune the speed or recovery
@@ -102,9 +105,25 @@
момент времени. На данный момент единственный параметр, который можно менять
для ускорения или замедления восстановления и перебалансировки данных, но
в планах реализация других параметров.
- name: recovery_pg_switch
type: int
default: 128
online: true
info: |
Number of recovery operations before switching to recovery of the next PG.
The idea is to mix all PGs during recovery for more even space and load
distribution but still benefit from recovery queue depth greater than 1.
Degraded PGs are anyway scanned first.
info_ru: |
Число операций восстановления перед переключением на восстановление другой PG.
Идея заключается в том, чтобы восстанавливать все PG одновременно для более
равномерного распределения места и нагрузки, но при этом всё равно выигрывать
от глубины очереди восстановления, большей, чем 1. Деградированные PG в любом
случае сканируются первыми.
- name: recovery_sync_batch
type: int
default: 16
online: true
info: Maximum number of recovery operations before issuing an additional fsync.
info_ru: Максимальное число операций восстановления перед дополнительным fsync.
- name: readonly
@@ -119,6 +138,7 @@
- name: no_recovery
type: bool
default: false
online: true
info: |
Disable automatic background recovery of objects. Note that it doesn't
affect implicit recovery of objects happening during writes - a write is
@@ -131,6 +151,7 @@
- name: no_rebalance
type: bool
default: false
online: true
info: |
Disable background movement of data between different OSDs. Disabling it
means that PGs in the `has_misplaced` state will be left in it indefinitely.
@@ -141,6 +162,7 @@
- name: print_stats_interval
type: sec
default: 3
online: true
info: |
Time interval at which OSDs print simple human-readable operation
statistics on stdout.
@@ -150,6 +172,7 @@
- name: slow_log_interval
type: sec
default: 10
online: true
info: |
Time interval at which OSDs dump slow or stuck operations on stdout, if
they're any. Also it's the time after which an operation is considered
@@ -161,6 +184,7 @@
- name: inode_vanish_time
type: sec
default: 60
online: true
info: |
Number of seconds after which a deleted inode is removed from OSD statistics.
info_ru: |
@@ -168,6 +192,7 @@
- name: max_write_iodepth
type: int
default: 128
online: true
info: |
Parallel client write operation limit per one OSD. Operations that exceed
this limit are pushed to a temporary queue instead of being executed
@@ -179,6 +204,7 @@
- name: min_flusher_count
type: int
default: 1
online: true
info: |
Flusher is a micro-thread that moves data from the journal to the data
area of the device. Their number is auto-tuned between minimum and maximum.
@@ -190,6 +216,7 @@
- name: max_flusher_count
type: int
default: 256
online: true
info: |
Maximum number of journal flushers (see above min_flusher_count).
info_ru: |
@@ -270,6 +297,7 @@
- name: throttle_small_writes
type: bool
default: false
online: true
info: |
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
with fast journal/metadata devices and slow data devices. The idea is that
@@ -298,6 +326,7 @@
- name: throttle_target_iops
type: int
default: 100
online: true
info: |
Target maximum number of throttled operations per second under the condition
of full journal. Set it to approximate random write iops of your data devices
@@ -310,6 +339,7 @@
- name: throttle_target_mbs
type: int
default: 100
online: true
info: |
Target maximum bandwidth in MB/s of throttled operations per second under
the condition of full journal. Set it to approximate linear write
@@ -322,6 +352,7 @@
- name: throttle_target_parallelism
type: int
default: 1
online: true
info: |
Target maximum parallelism of throttled operations under the condition of
full journal. Set it to approximate internal parallelism of your data
@@ -334,6 +365,7 @@
- name: throttle_threshold_us
type: us
default: 50
online: true
info: |
Minimal computed delay to be applied to throttled operations. Usually
doesn't need to be changed.
@@ -343,10 +375,151 @@
- name: osd_memlock
type: bool
default: false
info: >
info: |
Lock all OSD memory to prevent it from being unloaded into swap with
mlockall(). Requires sufficient ulimit -l (max locked memory).
info_ru: >
info_ru: |
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
заблокированной памяти).
- name: auto_scrub
type: bool
default: false
online: true
info: |
Data scrubbing is the process of background verification of copies to find
and repair corrupted blocks. It's not run automatically by default since
it's a new feature. Set this parameter to true to enable automatic scrubs.
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
every `scrub_interval` (see below). You can also start/schedule scrubbing
manually by setting `next_scrub` JSON key to the desired UNIX time of the
next scrub in `/pg/history/...` values in etcd.
info_ru: |
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
запускаются автоматически, так как являются новой функцией. Чтобы включить
автоматическое планирование скрабов, установите данный параметр в true.
Включённый параметр заставляет OSD автоматически планировать фоновую
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
запустить или запланировать проверку вручную, установив значение ключа JSON
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
желаемой проверки.
- name: no_scrub
type: bool
default: false
online: true
info: |
Temporarily disable scrubbing and stop running scrubs.
info_ru: |
Временно отключить и остановить запущенные скрабы.
- name: scrub_interval
type: string
default: 30d
online: true
info: |
Default automatic scrubbing interval for all pools. Numbers without suffix
are treated as seconds, possible unit suffixes include 's' (seconds),
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
info_ru: |
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
Значения без указанной единицы измерения считаются в секундах, допустимые
символы единиц измерения в конце: 's' (секунды),
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
- name: scrub_queue_depth
type: int
default: 1
online: true
info: |
Number of parallel scrubbing operations per one OSD.
info_ru: |
Число параллельных операций фоновой проверки на один OSD.
- name: scrub_sleep
type: ms
default: 0
online: true
info: |
Additional interval between two consecutive scrubbing operations on one OSD.
Can be used to slow down scrubbing if it affects user load too much.
info_ru: |
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
одном OSD. Может использоваться для замедления скраба, если он слишком
сильно влияет на пользовательскую нагрузку.
- name: scrub_list_limit
type: int
default: 1000
online: true
info: |
Number of objects to list in one listing operation during scrub.
info_ru: |
Размер загружаемых за одну операцию списков объектов в процессе фоновой
проверки.
- name: scrub_find_best
type: bool
default: true
online: true
info: |
Find and automatically restore best versions of objects with unmatched
copies. In replicated setups, the best version is the version with most
matching replicas. In EC setups, the best version is the subset of data
and parity chunks without mismatches.
The hypothetical situation where you might want to disable it is when
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
corrupt an object in the same way (for example, zero it out) and only
1 HDD will remain good. In this case disabling scrub_find_best may help
you to recover the data! See also scrub_ec_max_bruteforce below.
info_ru: |
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
незаметно и одинаково повредить данные одного и того же объекта, например,
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
отключение этого параметра поможет вам восстановить данные! Смотрите также
описание следующего параметра - scrub_ec_max_bruteforce.
- name: scrub_ec_max_bruteforce
type: int
default: 100
online: true
info: |
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
chunk by brute-forcing all possible error locations. This configuration
value limits the maximum number of checked combinations. You can try to
increase it if you have EC N+K setup with N and K large enough for
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
than the default 100.
If there are too many possible combinations or if multiple combinations give
correct results then objects are marked inconsistent and aren't recovered
automatically.
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
the variant with most available equal copies is correct. For example, if
you have 3 replicas and 1 of them differs, this one is considered to be
corrupted. But if there is no "best" version with more copies than all
others have then the object is also marked as inconsistent.
info_ru: |
Vitastor старается определить повреждённые части объектов при использовании
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
его, если используете схему кодирования EC N+K с N и K, достаточно большими
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
было больше, чем стандартное значение 100.
Если возможных комбинаций слишком много или если корректная комбинаций не
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
и не восстанавливаются автоматически.
При использовании репликации перебор не нужен, Vitastor просто предполагает,
что вариант объекта с наибольшим количеством одинаковых копий корректен.
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
считается некорректной. Однако, если "лучшую" версию с числом доступных
копий большим, чем у всех других версий, найти невозможно, то объект тоже
маркируется неконсистентным.

View File

@@ -8,13 +8,13 @@
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
```
for i in ./???-*.yaml; do kubectl apply -f $i; done
```
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml).

View File

@@ -36,5 +36,5 @@ vitastor_pool_id = 1
image_upload_use_cinder_backend = True
```
To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
although the support has not been verified yet.

View File

@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
```
Чтобы помещать в Vitastor Glance-образы, нужно использовать
[https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
однако, поддержка этой функции ещё не проверялась.

View File

@@ -9,9 +9,10 @@
## Debian
- Trust Vitastor package signing key:
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Add Vitastor package repository to your /etc/apt/sources.list:
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- For Debian 10 (Buster) also enable backports repository:
`deb http://deb.debian.org/debian buster-backports main`
@@ -20,15 +21,18 @@
## CentOS
- Add Vitastor package repository:
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
- AlmaLinux 9 and other RHEL 9 clones (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
- Enable EPEL: `yum/dnf install epel-release`
- Enable additional CentOS repositories:
- CentOS 7: `yum install centos-release-scl`
- CentOS 8: `dnf install centos-release-advanced-virtualization`
- RHEL 9 clones: not required
- Enable elrepo-kernel:
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
- RHEL 9 clones: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
- Install packages: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
## Installation requirements
@@ -42,3 +46,10 @@
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
- node.js 10 or newer
## Version archive
All previous Vitastor and other components (QEMU, etcd...) package builds
can be found here:
https://vitastor.io/archive/

View File

@@ -9,9 +9,10 @@
## Debian
- Добавьте ключ репозитория Vitastor:
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Для Debian 10 (Buster) также включите репозиторий backports:
`deb http://deb.debian.org/debian buster-backports main`
@@ -20,15 +21,18 @@
## CentOS
- Добавьте в систему репозиторий Vitastor:
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
- AlmaLinux 9 и другие клоны RHEL 9 (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
- Включите EPEL: `yum/dnf install epel-release`
- Включите дополнительные репозитории CentOS:
- CentOS 7: `yum install centos-release-scl`
- CentOS 8: `dnf install centos-release-advanced-virtualization`
- Клоны RHEL 9: не нужно
- Включите elrepo-kernel:
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
- Клоны RHEL 9: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
- Установите пакеты: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
## Установочные требования
@@ -41,3 +45,10 @@
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
- node.js 10 или новее
## Архив предыдущих версий
Все предыдущие сборки пакетов Vitastor и других компонентов, таких, как QEMU
и etcd, можно скачать по следующей ссылке:
https://vitastor.io/archive/

View File

@@ -6,10 +6,10 @@
# Proxmox VE
To enable Vitastor support in Proxmox Virtual Environment (6.4 and 7.1 are supported):
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported):
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts
(buster for 6.4, bullseye for 7.1)
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
- Define storage in `/etc/pve/storage.cfg` (see below)
- Block network access from VMs to Vitastor network (to OSDs and etcd),
@@ -35,5 +35,5 @@ vitastor: vitastor
vitastor_nbd 0
```
\* Note: you can also manually copy [patches/PVE_VitastorPlugin.pm](patches/PVE_VitastorPlugin.pm) to Proxmox hosts
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.

View File

@@ -4,12 +4,12 @@
[Read in English](proxmox.en.md)
# Proxmox
# Proxmox VE
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4 и 7.1):
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0):
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox
(buster для 6.4, bullseye для 7.1)
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
@@ -35,5 +35,5 @@ vitastor: vitastor
```
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
[patches/PVE_VitastorPlugin.pm](patches/PVE_VitastorPlugin.pm) на хосты Proxmox как
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.

View File

@@ -44,7 +44,7 @@
depends linearly on drive capacity and data store block size which is 128 KB by default.
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled.
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
less deterministic, so CoW isn't used in Vitastor.

View File

@@ -156,7 +156,7 @@
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal).
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.

View File

@@ -29,12 +29,13 @@
- Snapshots and copy-on-write image clones
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
- [Scrubbing without checksums](../config/osd.en.md#auto_scrub) (verification of copies)
## Plugins and tools
- [Debian and CentOS packages](../installation/packages.en.md)
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
- [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
- Generic user-space client library
- [Native QEMU driver](../usage/qemu.en.md)
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
@@ -54,7 +55,6 @@ The following features are planned for the future:
- iSCSI proxy
- Multi-threaded client
- Faster failover
- Scrubbing without checksums (verification of replicas)
- Checksums
- Tiered storage (SSD caching)
- NVDIMM support

View File

@@ -13,7 +13,7 @@
## Серверные функции
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
- [Производительность](../comparison1.ru.md) ;-D
- [Производительность](../performance/comparison1.ru.md) ;-D
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
@@ -31,12 +31,13 @@
- Снапшоты и copy-on-write клоны
- [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
- [Фоновая проверка целостности без контрольных сумм](../config/osd.ru.md#auto_scrub) (сверка копий)
## Драйверы и инструменты
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
- [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
- Общая пользовательская клиентская библиотека для работы с кластером
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
@@ -54,7 +55,6 @@
- iSCSI-прокси
- Многопоточный клиент
- Более быстрое переключение при отказах
- Фоновая проверка целостности без контрольных сумм (сверка реплик)
- Контрольные суммы
- Поддержка SSD-кэширования (tiered storage)
- Поддержка NVDIMM

View File

@@ -45,7 +45,9 @@ On the monitor hosts:
}
```
- Initialize OSDs:
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
`--disable_data_fsync off` to leave disk cache enabled if you use desktop
SSDs without capacitors.
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Pass all your devices (HDD and SSD) to this script &mdash; it will partition disks and initialize journals on its own.
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
@@ -53,7 +55,9 @@ On the monitor hosts:
but some free unpartitioned space must be available because the script creates new partitions for journals.
- You can change OSD configuration in units or in `vitastor.conf`.
Check [Configuration Reference](../config.en.md) for parameter descriptions.
- If all your drives have capacitors, create global configuration in etcd: \
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
without `--disable_data_fsync off` at the first step, then put the following
setting into etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
- Start all OSDs: `systemctl start vitastor.target`
@@ -70,11 +74,15 @@ For EC pools the configuration should look like the following:
```
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
```
After you do this, one of the monitors will configure PGs and OSDs will start them.
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
The other option is to add it into /vitastor/config/global, in this case it will
apply to all pools by default.
## Check cluster status
`vitastor-cli status`

View File

@@ -45,7 +45,9 @@
}
```
- Инициализуйте OSD:
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
опцию `--disable_data_fsync off`.
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
@@ -54,8 +56,11 @@
для журналов, на SSD должно быть доступно свободное нераспределённое место.
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
смотрите в [справке по конфигурации](../config.ru.md).
- Если все ваши диски - серверные с конденсаторами, пропишите это в глобальную конфигурацию в etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
в глобальную конфигурацию в etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
- Запустите все OSD: `systemctl start vitastor.target`
## Создайте пул
@@ -71,11 +76,15 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
```
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
```
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
Также эту опцию можно добавить в /vitastor/config/global, в этом случае она будет
применяться ко всем пулам по умолчанию.
## Проверьте состояние кластера
`vitastor-cli status`

View File

@@ -35,15 +35,24 @@ Write amplification for 4 KB blocks is usually 3-5 in Vitastor:
If you manage to get an SSD which handles 512 byte blocks well (Optane?) you may
lower 1, 3 and 4 to 512 bytes (1/8 of data size) and get WA as low as 2.375.
Implemented NVDIMM support can basically eliminate WA at all - all extra writes will
go to DRAM memory. But this requires a test cluster with NVDIMM - please contact me
if you want to provide me with such cluster for tests.
Lazy fsync also reduces WA for parallel workloads because journal blocks are only
written when they fill up or fsync is requested.
## In Practice
In practice, using tests from [Understanding Performance](understanding.en.md)
and good server-grade SSD/NVMe drives, you should head for:
In practice, using tests from [Understanding Performance](understanding.en.md), decent TCP network,
good server-grade SSD/NVMe drives and disabled CPU power saving, you should head for:
- At least 5000 T1Q1 replicated read and write iops (maximum 0.2ms latency)
- At least 5000 T1Q1 EC read IOPS and at least 2200 EC write IOPS (maximum 0.45ms latency)
- At least ~80k parallel read iops or ~30k write iops per 1 core (1 OSD)
- Disk-speed or wire-speed linear reads and writes, whichever is the bottleneck in your case
Lower results may mean that you have bad drives, bad network or some kind of misconfiguration.
Current latency records:
- 9668 T1Q1 replicated write iops (0.103 ms latency) with TCP and NVMe
- 9143 T1Q1 replicated read iops (0.109 ms latency) with TCP and NVMe

View File

@@ -36,6 +36,25 @@ WA (мультипликатор записи) для 4 КБ блоков в Vit
Если вы найдёте SSD, хорошо работающий с 512-байтными блоками данных (Optane?),
то 1, 3 и 4 можно снизить до 512 байт (1/8 от размера данных) и получить WA всего 2.375.
Если реализовать поддержку NVDIMM, то WA можно, условно говоря, ликвидировать вообще - все
дополнительные операции записи смогут обслуживаться DRAM памятью. Но для этого необходим
тестовый кластер с NVDIMM - пишите, если готовы предоставить такой для тестов.
Кроме того, WA снижается при использовании отложенного/ленивого сброса при параллельной
нагрузке, т.к. блоки журнала записываются на диск только когда они заполняются или явным
образом запрашивается fsync.
## На практике
На практике, используя тесты fio со страницы [Понимание сути производительности систем хранения](understanding.ru.md),
нормальную TCP-сеть, хорошие серверные SSD/NVMe, при отключённом энергосбережении процессоров вы можете рассчитывать на:
- От 5000 IOPS в 1 поток (T1Q1) и на чтение, и на запись при использовании репликации (задержка до 0.2мс)
- От 5000 IOPS в 1 поток (T1Q1) на чтение и 2200 IOPS в 1 поток на запись при использовании EC (задержка до 0.45мс)
- От 80000 IOPS на чтение в параллельном режиме на 1 ядро, от 30000 IOPS на запись на 1 ядро (на 1 OSD)
- Скорость параллельного линейного чтения и записи, равная меньшему значению из скорости дисков или сети
Худшие результаты означают, что у вас либо медленные диски, либо медленная сеть, либо что-то неправильно настроено.
Зафиксированный на данный момент рекорд задержки:
- 9668 IOPS (0.103 мс задержка) в 1 поток (T1Q1) на запись с TCP и NVMe при использовании репликации
- 9143 IOPS (0.109 мс задержка) в 1 поток (T1Q1) на чтение с TCP и NVMe при использовании репликации

View File

@@ -14,12 +14,16 @@ It supports the following commands:
- [df](#df)
- [ls](#ls)
- [create](#create)
- [snap-create](#create)
- [modify](#modify)
- [rm](#rm)
- [flatten](#flatten)
- [rm-data](#rm-data)
- [merge-data](#merge-data)
- [describe](#describe)
- [fix](#fix)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
Global options:
@@ -122,6 +126,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
Create a snapshot of image `<name>` (either form can be used). May be used live if only a single writer is active.
See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
@@ -170,8 +176,64 @@ Merge layer data without changing metadata. Merge `<from>`..`<to>` to `<target>`
`<to>` must be a child of `<from>` and `<target>` may be one of the layers between
`<from>` and `<to>`, including `<from>` and `<to>`.
## describe
`vitastor-cli describe [--osds <osds>] [--object-state <states>] [--pool <pool>]
[--inode <ino>] [--min-inode <ino>] [--max-inode <ino>]
[--min-offset <offset>] [--max-offset <offset>]`
Describe unclean object locations in the cluster.
```
--osds <osds>
Only list objects from primary OSD(s) <osds>.
--object-state <states>
Only list objects in given state(s). State(s) may include:
degraded, misplaced, incomplete, corrupted, inconsistent.
--pool <pool name or number>
Only list objects in the given pool.
--inode, --min-inode, --max-inode
Restrict listing to specific inode numbers.
--min-offset, --max-offset
Restrict listing to specific offsets inside inodes.
```
## fix
`vitastor-cli fix [--objects <objects>] [--bad-osds <osds>] [--part <part>] [--check no]`
Fix inconsistent objects in the cluster by deleting some copies.
```
--objects <objects>
Objects to fix, either in plain text or JSON format. If not specified,
object list will be read from STDIN in one of the same formats.
Plain text format: 0x<inode>:0x<stripe> <any delimiter> 0x<inode>:0x<stripe> ...
JSON format: [{"inode":"0x...","stripe":"0x..."},...]
--bad-osds <osds>
Remove inconsistent copies/parts of objects from these OSDs, effectively
marking them bad and allowing Vitastor to recover objects from other copies.
--part <number>
Only remove EC part <number> (from 0 to pg_size-1), required for extreme
edge cases where one OSD has multiple parts of a EC object.
--check no
Do not recheck that requested objects are actually inconsistent,
delete requested copies/parts anyway.
```
## alloc-osd
`vitastor-cli alloc-osd`
Allocate a new OSD number and reserve it by creating empty `/osd/stats/<n>` key.
## rm-osd
`vitastor-cli rm-osd [--force] [--allow-data-loss] [--dry-run] <osd_id> [osd_id...]`
Remove metadata and configuration for specified OSD(s) from etcd.
Refuses to remove OSDs with data without `--force` and `--allow-data-loss`.
With `--dry-run` only checks if deletion is possible without data loss and
redundancy degradation.

View File

@@ -15,12 +15,14 @@ vitastor-cli - интерфейс командной строки для адм
- [df](#df)
- [ls](#ls)
- [create](#create)
- [snap-create](#create)
- [modify](#modify)
- [rm](#rm)
- [flatten](#flatten)
- [rm-data](#rm-data)
- [merge-data](#merge-data)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
Глобальные опции:
@@ -125,6 +127,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
Создать снимок образа `<name>` (можно использовать любую форму команды). Снимок можно создавать без остановки
клиентов, если пишущий клиент максимум 1.
Смотрите также информацию о том, [как экспортировать снимки](qemu.ru.md#экспорт-снимков).
## modify
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
@@ -180,9 +184,73 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
в целевой образ `<target>`. `<to>` должен быть дочерним образом `<from>`, а `<target>`
должен быть одним из слоёв между `<from>` и `<to>`, включая сами `<from>` и `<to>`.
## describe
`vitastor-cli describe [--osds <osds>] [--object-state <состояния>] [--pool <пул>]
[--inode <номер>] [--min-inode <номер>] [--max-inode <номер>]
[--min-offset <смещение>] [--max-offset <смещение>]`
Описать состояние "грязных" объектов в кластере, то есть таких объектов, копии
или части которых хранятся на наборе OSD, не равном целевому.
```
--osds <osds>
Перечислять только объекты с первичных OSD из списка <osds>.
--object-state <состояния>
Перечислять только объекты в указанных состояниях. Возможные состояния
объектов:
- degraded - деградированная избыточность
- misplaced - перемещённый
- incomplete - нечитаемый из-за потери большего числа частей, чем допустимо
- corrupted - с одной или более повреждённой частью
- inconsistent - неконсистентный, с неоднозначным расхождением копий/частей
--pool <имя или ID пула>
Перечислять только объекты из заданного пула.
--inode, --min-inode, --max-inode
Перечислять только объекты из указанных номеров инодов (образов).
--min-offset, --max-offset
Перечислять только объекты с заданных смещений внутри образов.
```
## fix
`vitastor-cli fix [--objects <объекты>] [--bad-osds <osds>] [--part <номер>] [--check no]`
Исправить неконсистентные (неоднозначные) объекты путём удаления части копий.
```
--objects <объекты>
Объекты для исправления - в простом текстовом или JSON формате. Если опция
не указана, список объектов читается со стандартного ввода в тех же форматах.
Простой формат: 0x<инод>:0x<смещение> <любой разделитель> 0x<инод>:0x<смещение> ...
Формат JSON: [{"inode":"0x<инод>","stripe":"0x<смещение>"},...]
--bad-osds <osds>
Удалить неконсистентные копии/части объектов с данных OSD, таким образом
признавая потерю этих копий и позволяя Vitastor-у восстановить объекты из
других копий.
--part <номер>
Удалить только части EC с заданным номером (от 0 до pg_size-1). Нужно только
в редких граничных случаях, когда один и тот же OSD содержит несколько частей
одного EC-объекта.
--check no
Не перепроверять, что заданные объекты действительно в неконсистентном
состоянии и просто удалять заданные части.
```
## alloc-osd
`vitastor-cli alloc-osd`
Атомарно выделить новый номер OSD и зарезервировать его, создав в etcd пустой
ключ `/osd/stats/<n>`.
## rm-osd
`vitastor-cli rm-osd [--force] [--allow-data-loss] [--dry-run] <osd_id> [osd_id...]`
Удалить метаданные и конфигурацию для заданных OSD из etcd.
Отказывается удалять OSD с данными без опций `--force` и `--allow-data-loss`.
С опцией `--dry-run` только проверяет, возможно ли удаление без потери данных и деградации
избыточности.

View File

@@ -1,4 +1,4 @@
[Documentation](../../README.md#documentation) → Usage → Disk Tool
[Documentation](../../README.md#documentation) → Usage → Disk management tool
-----
@@ -14,6 +14,7 @@ It supports the following commands:
- [upgrade-simple](#upgrade-simple)
- [resize](#resize)
- [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
- [purge](#purge)
- [read-sb](#read-sb)
- [write-sb](#write-sb)
- [udev](#udev)
@@ -155,11 +156,22 @@ Commands are passed to `systemctl` with `vitastor-osd@<num>` units as arguments.
When `--now` is added to enable/disable, OSDs are also immediately started/stopped.
## purge
`vitastor-disk purge [--force] [--allow-data-loss] <device> [device2 device3 ...]`
Purge Vitastor OSD(s) on specified device(s). Uses `vitastor-cli rm-osd` to check
if deletion is possible without data loss and to actually remove metadata from etcd.
`--force` and `--allow-data-loss` options may be used to ignore safety check results.
Requires `vitastor-cli`, `sfdisk` and `partprobe` (from parted) utilities.
## read-sb
`vitastor-disk read-sb <device>`
`vitastor-disk read-sb [--force] <device>`
Try to read Vitastor OSD superblock from `<device>` and print it in JSON format.
`--force` allows to ignore validation errors.
## write-sb

View File

@@ -1,4 +1,4 @@
[Документация](../../README-ru.md#документация) → Использование → Управление дисками
[Документация](../../README-ru.md#документация) → Использование → Инструмент управления дисками
-----
@@ -14,6 +14,7 @@ vitastor-disk - инструмент командной строки для уп
- [upgrade-simple](#upgrade-simple)
- [resize](#resize)
- [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
- [purge](#purge)
- [read-sb](#read-sb)
- [write-sb](#write-sb)
- [udev](#udev)
@@ -158,12 +159,25 @@ throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
Когда к командам включения/выключения добавляется параметр `--now`, OSD также сразу
запускаются/останавливаются.
## purge
`vitastor-disk purge [--force] [--allow-data-loss] <device> [device2 device3 ...]`
Удалить OSD на заданном диске/дисках. Использует `vitastor-cli rm-osd` для проверки
возможности удаления без потери данных и для удаления OSD из etcd. Опции `--force`
и `--allow-data-loss` служат для обхода данной защиты в случае необходимости.
Команде требуются утилиты `vitastor-cli`, `sfdisk` и `partprobe` (из состава parted).
## read-sb
`vitastor-disk read-sb <device>`
`vitastor-disk read-sb [--force] <device>`
Прочитать суперблок OSD с диска `<device>` и вывести его в формате JSON.
Опция `--force` позволяет читать суперблок, даже если он считается некорректным
из-за ошибок валидации.
## write-sb
`vitastor-disk write-sb <device>`

View File

@@ -25,6 +25,23 @@ It will output a block device name like /dev/nbd0 which you can then use as a no
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
Additional options for map command:
* `--nbd_timeout 30` \
Timeout for I/O operations in seconds after exceeding which the kernel stops
the device. You can set it to 0 to disable the timeout, but beware that you
won't be able to stop the device at all if vitastor-nbd process dies.
* `--nbd_max_devices 64 --nbd_max_part 3` \
Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
note that maximum allowed (nbds_max)*(1+max_part) is 256.
* `--logfile /path/to/log/file.txt` \
Write log messages to the specified file instead of dropping them (in background mode)
or printing them to the standard output (in foreground mode).
* `--dev_num N` \
Use the specified device /dev/nbdN instead of automatic selection.
* `--foreground 1` \
Stay in foreground, do not daemonize.
## Unmap image
To unmap the device run:
@@ -32,3 +49,27 @@ To unmap the device run:
```
vitastor-nbd unmap /dev/nbd0
```
## List mapped images
```
vitastor-nbd ls [--json]
```
Example output (normal format):
```
/dev/nbd0
image: bench
pid: 584536
/dev/nbd1
image: bench1
pid: 584546
```
Example output (JSON format):
```
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
```

View File

@@ -30,6 +30,27 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
Дополнительные опции для команды подключения NBD-устройства:
* `--nbd_timeout 30` \
Максимальное время выполнения любой операции чтения/записи в секундах, при
превышении которого ядро остановит NBD-устройство. Вы можете установить опцию
в 0, чтобы отключить ограничение времени, но имейте в виду, что в этом случае
вы вообще не сможете отключить NBD-устройство при нештатном завершении процесса
vitastor-nbd.
* `--nbd_max_devices 64 --nbd_max_part 3` \
Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd
(`nbds_max` и `max_part`). Имейте в виду, что (nbds_max)*(1+max_part)
обычно не должно превышать 256.
* `--logfile /path/to/log/file.txt` \
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
при фоновом режиме запуска или печати на стандартный вывод при запуске
в консоли с `--foreground 1`.
* `--dev_num N` \
Использовать заданное устройство `/dev/nbdN` вместо автоматического подбора.
* `--foreground 1` \
Не уводить процесс в фоновый режим.
## Отключить устройство
Для отключения устройства выполните:
@@ -37,3 +58,27 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
```
vitastor-nbd unmap /dev/nbd0
```
## Вывести подключённые устройства
```
vitastor-nbd ls [--json]
```
Пример вывода в обычном формате:
```
/dev/nbd0
image: bench
pid: 584536
/dev/nbd1
image: bench1
pid: 584546
```
Пример вывода в JSON-формате:
```
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
```

View File

@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
--pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
--foreground 1 не уходить в фон после запуска
```

View File

@@ -46,3 +46,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=192.168.7
You can also specify `:pool=<POOL>:inode=<INODE>:size=<SIZE>` instead of `:image=<IMAGE>`
if you don't want to use inode metadata.
### Exporting snapshots
Starting with 0.8.4, you can also export individual layers (snapshot diffs) using `qemu-img`.
Suppose you have an image `testimg` and a snapshot `testimg@0` created with `vitastor-cli snap-create testimg@0`.
Then you can export the `testimg@0` snapshot and the data written to `testimg` after creating
the snapshot separately using the following commands (key points are using `skip-parents=1` and
`-B backing_file` option):
```
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
-O qcow2 testimg_0.qcow2
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
```
In fact, with `cluster_size=4k` any QCOW2 file can be used instead `-B testimg_0.qcow2`, even an empty one.
QCOW2 `cluster_size=4k` option is required if you want `testimg.qcow2` to contain only the data
overwritten **exactly** in the child layer. With the default 64 KB QCOW2 cluster size you'll
get a bit of extra data from parent layers, i.e. a 4 KB overwrite will result in `testimg.qcow2`
containing 64 KB of data. And this extra data will be taken by `qemu-img` from the file passed
in `-B` option, so you really need 4 KB cluster if you use an empty image in `-B`.
After this procedure you'll get two chained QCOW2 images. To detach `testimg.qcow2` from
its parent, run:
```
qemu-img rebase -u -b '' testimg.qcow2
```
This can be used for backups. Just note that exporting an image that is currently being written to
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
on a live VM.

View File

@@ -50,3 +50,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=10.115.0.
Если вы не хотите обращаться к образу по имени, вместо `:image=<IMAGE>` можно указать номер пула, номер инода и размер:
`:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
### Экспорт снимков
Начиная с 0.8.4 вы можете экспортировать отдельные слои (изменения в снимках) с помощью `qemu-img`.
Допустим, что у вас есть образ `testimg` и его снимок `testimg@0`, созданный с помощью `vitastor-cli snap-create testimg@0`.
Тогда вы можете выгрузить снимок `testimg@0` и данные, изменённые в `testimg` после создания снимка, отдельно,
с помощью следующих команд (ключевые моменты - использование `skip-parents=1` и опции `-B backing_file.qcow2`):
```
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
-O qcow2 testimg_0.qcow2
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
```
На самом деле, с `cluster_size=4k` вместо `-B testimg_0.qcow2` можно использовать любой qcow2-файл,
даже пустой.
Опция QCOW2 `cluster_size=4k` нужна, если вы хотите, чтобы `testimg.qcow2` содержал **в точности**
данные, перезаписанные в дочернем слое. С размером кластера QCOW2 по умолчанию, составляющим 64 КБ,
вы получите немного "лишних" данных из родительских слоёв - перезапись 4 КБ будет приводить к тому,
что в `testimg.qcow2` будет появляться 64 КБ данных. Причём "лишние" данные qemu-img будет брать
как раз из файла, указанного в опции `-B`, так что если там указан пустой образ, кластер обязан быть 4 КБ.
После данной процедуры вы получите два QCOW2-образа, связанных в цепочку. Чтобы "отцепить" образ
`testimg.qcow2` от базового, выполните:
```
qemu-img rebase -u -b '' testimg.qcow2
```
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.

2
json11

Submodule json11 updated: 52a3af664f...fd37016cf8

View File

@@ -21,7 +21,7 @@ function add_pg_history(new_pg_history, new_pg, prev_pgs, prev_pg_history, old_p
{
for (const pg of oh.osd_sets)
{
nh.osd_sets[pg.join(' ')] = pg;
nh.osd_sets[pg.join(' ')] = pg.map(osd_num => Number(osd_num));
}
}
if (oh && oh.all_peers && oh.all_peers.length)
@@ -43,16 +43,16 @@ function finish_pg_history(merged_history)
merged_history.all_peers = Object.values(merged_history.all_peers);
}
function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
{
const old_pg_count = prev_pgs.length;
const old_pg_count = real_prev_pgs.length;
// Add all possibly intersecting PGs to the history of new PGs
if (!(new_pg_count % old_pg_count))
{
// New PG count is a multiple of old PG count
for (let i = 0; i < new_pg_count; i++)
{
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i % old_pg_count);
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i % old_pg_count);
finish_pg_history(new_pg_history[i]);
}
}
@@ -64,7 +64,7 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
{
for (let j = 0; j < mul; j++)
{
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i+j*new_pg_count);
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i+j*new_pg_count);
}
finish_pg_history(new_pg_history[i]);
}
@@ -76,7 +76,7 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
let merged_history = {};
for (let i = 0; i < old_pg_count; i++)
{
add_pg_history(merged_history, 1, prev_pgs, prev_pg_history, i);
add_pg_history(merged_history, 1, real_prev_pgs, prev_pg_history, i);
}
finish_pg_history(merged_history[1]);
for (let i = 0; i < new_pg_count; i++)
@@ -90,15 +90,15 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
new_pg_history[i] = null;
}
// Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
if (old_pg_count < new_pg_count)
if (prev_pgs.length < new_pg_count)
{
for (let i = old_pg_count; i < new_pg_count; i++)
for (let i = prev_pgs.length; i < new_pg_count; i++)
{
prev_pgs[i] = prev_pgs[i % old_pg_count];
prev_pgs[i] = prev_pgs[i % prev_pgs.length];
}
}
else if (old_pg_count > new_pg_count)
else if (prev_pgs.length > new_pg_count)
{
prev_pgs.splice(new_pg_count, old_pg_count-new_pg_count);
prev_pgs.splice(new_pg_count, prev_pgs.length-new_pg_count);
}
}

View File

@@ -550,8 +550,8 @@ function random_combinations(osd_tree, pg_size, count, ordered)
seed ^= seed << 5;
return seed + 2147483648;
};
const hosts = Object.keys(osd_tree).sort();
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
const r = {};
// Generate random combinations including each OSD at least once
for (let h = 0; h < hosts.length; h++)

View File

@@ -63,7 +63,7 @@ Wants=network-online.target local-fs.target time-sync.target
[Service]
Restart=always
ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
--advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
@@ -79,7 +79,7 @@ StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=local.target
WantedBy=multi-user.target
`);
await system(`useradd etcd`);
await system(`systemctl daemon-reload`);

View File

@@ -51,8 +51,9 @@ const etcd_tree = {
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
// etcd connection
config_path: "/etc/vitastor/vitastor.conf",
etcd_address: "10.0.115.10:2379/v3",
etcd_prefix: "/vitastor",
// etcd connection - configurable online
etcd_address: "10.0.115.10:2379/v3",
// mon
etcd_mon_ttl: 30, // min: 10
etcd_mon_timeout: 1000, // ms. min: 0
@@ -70,14 +71,15 @@ const etcd_tree = {
rdma_gid_index: 0,
rdma_mtu: 4096,
rdma_max_sge: 128,
rdma_max_send: 32,
rdma_max_recv: 8,
rdma_max_msg: 1048576,
log_level: 0,
rdma_max_send: 8,
rdma_max_recv: 16,
rdma_max_msg: 132096,
block_size: 131072,
disk_alignment: 4096,
bitmap_granularity: 4096,
immediate_commit: false, // 'all' or 'small'
// client and osd - configurable online
log_level: 0,
client_dirty_limit: 33554432,
peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1
@@ -95,18 +97,28 @@ const etcd_tree = {
osd_network: null, // "192.168.7.0/24" or an array of masks
bind_address: "0.0.0.0",
bind_port: 0,
readonly: false,
osd_memlock: false,
// osd - configurable online
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 4,
recovery_pg_switch: 128,
recovery_sync_batch: 16,
readonly: false,
no_recovery: false,
no_rebalance: false,
print_stats_interval: 3,
slow_log_interval: 10,
inode_vanish_time: 60,
osd_memlock: false,
auto_scrub: false,
no_scrub: false,
scrub_interval: '30d', // 1s/1m/1h/1d
scrub_queue_depth: 1,
scrub_sleep: 0, // milliseconds
scrub_list_limit: 1000, // objects to list on one scrub iteration
scrub_find_best: true,
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
// blockstore - fixed in superblock
block_size,
disk_alignment,
@@ -125,14 +137,15 @@ const etcd_tree = {
meta_offset,
disable_meta_fsync,
disable_device_lock,
// blockstore - configurable
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
// blockstore - configurable offline
inmemory_metadata,
inmemory_journal,
journal_sector_buffer_count,
journal_no_same_sector_overwrites,
// blockstore - configurable online
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
throttle_small_writes: false,
throttle_target_iops: 100,
throttle_target_mbs: 100,
@@ -168,6 +181,8 @@ const etcd_tree = {
osd_tags?: 'nvme' | [ 'nvme', ... ],
// prefer to put primary on OSD with these tags
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
// scrub interval
scrub_interval?: '30d',
},
...
}, */
@@ -261,9 +276,9 @@ const etcd_tree = {
/* <pool_id>: {
<pg_id>: {
primary: osd_num_t,
state: ("starting"|"peering"|"peered"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"left_on_dead")[],
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
}
}, */
},
@@ -285,6 +300,7 @@ const etcd_tree = {
osd_sets: osd_num_t[][],
all_peers: osd_num_t[],
epoch: uint64_t,
next_scrub: uint64_t,
},
}, */
},
@@ -375,6 +391,7 @@ class Mon
this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000;
this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree));
this.signals_set = false;
this.stat_time = Date.now();
this.ws = null;
this.ws_alive = false;
this.ws_keepalive_timer = null;
@@ -663,12 +680,15 @@ class Mon
async save_last_clean()
{
// last_clean_pgs is used to avoid extra data move when observing a series of changes in the cluster
const new_clean_pgs = { items: {} };
next_pool:
for (const pool_id in this.state.config.pools)
{
new_clean_pgs.items[pool_id] = (this.state.history.last_clean_pgs.items||{})[pool_id];
const pool_cfg = this.state.config.pools[pool_id];
if (!this.validate_pool_cfg(pool_id, pool_cfg, false))
{
continue;
continue next_pool;
}
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
@@ -677,17 +697,18 @@ class Mon
!(this.state.pg.state[pool_id][pg_num].state instanceof Array))
{
// Unclean
return;
continue next_pool;
}
let st = this.state.pg.state[pool_id][pg_num].state.join(',');
if (st != 'active' && st != 'active,left_on_dead' && st != 'left_on_dead,active')
{
// Unclean
return;
continue next_pool;
}
}
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
}
this.state.history.last_clean_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
this.state.history.last_clean_pgs = new_clean_pgs;
await this.etcd_call('/kv/txn', {
success: [ { requestPut: {
key: b64(this.etcd_prefix+'/history/last_clean_pgs'),
@@ -840,7 +861,7 @@ class Mon
}
for (const node_id in tree)
{
if (node_id === '')
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
{
continue;
}
@@ -948,9 +969,9 @@ class Mon
return alive_set[this.rng() % alive_set.length];
}
save_new_pgs_txn(request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
save_new_pgs_txn(save_to, request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
{
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id], up_osds, osd_tree);
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id] || {}, up_osds, osd_tree);
const pg_items = {};
this.reset_rng();
new_pgs.map((osd_set, i) =>
@@ -1001,14 +1022,14 @@ class Mon
});
}
}
this.state.config.pgs.items = this.state.config.pgs.items || {};
save_to.items = save_to.items || {};
if (!new_pgs.length)
{
delete this.state.config.pgs.items[pool_id];
delete save_to.items[pool_id];
}
else
{
this.state.config.pgs.items[pool_id] = pg_items;
save_to.items[pool_id] = pg_items;
}
}
@@ -1152,6 +1173,7 @@ class Mon
if (this.state.config.pgs.hash != tree_hash)
{
// Something has changed
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
const etcd_request = { compare: [], success: [] };
for (const pool_id in (this.state.config.pgs||{}).items||{})
{
@@ -1172,7 +1194,7 @@ class Mon
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
} });
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
}
}
for (const pool_id in this.state.config.pools)
@@ -1226,7 +1248,7 @@ class Mon
return;
}
const new_pg_history = [];
PGUtil.scale_pg_count(prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
PGUtil.scale_pg_count(prev_pgs, real_prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
pg_history = new_pg_history;
}
for (const pg of prev_pgs)
@@ -1279,14 +1301,15 @@ class Mon
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(this.state.pool.stats[pool_id])),
} });
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
}
this.state.config.pgs.hash = tree_hash;
await this.save_pg_config(etcd_request);
new_config_pgs.hash = tree_hash;
await this.save_pg_config(new_config_pgs, etcd_request);
}
else
{
// Nothing changed, but we still want to recheck the distribution of primaries
let new_config_pgs;
let changed = false;
for (const pool_id in this.state.config.pools)
{
@@ -1306,31 +1329,35 @@ class Mon
const new_primary = this.pick_primary(pool_id, pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_config_pgs)
{
new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
changed = true;
pg_cfg.primary = new_primary;
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
if (changed)
{
await this.save_pg_config();
await this.save_pg_config(new_config_pgs);
}
}
}
async save_pg_config(etcd_request = { compare: [], success: [] })
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
{
etcd_request.compare.push(
{ key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
);
etcd_request.success.push(
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(this.state.config.pgs)) } },
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
);
const res = await this.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
if (!res.succeeded)
@@ -1374,77 +1401,85 @@ class Mon
// This is required for multiple change events to trigger at most 1 recheck in 1s
schedule_recheck()
{
if (this.recheck_timer)
if (!this.recheck_timer)
{
clearTimeout(this.recheck_timer);
this.recheck_timer = null;
this.recheck_timer = setTimeout(() =>
{
this.recheck_timer = null;
this.recheck_pgs().catch(this.die);
}, this.config.mon_change_timeout || 1000);
}
this.recheck_timer = setTimeout(() =>
}
derive_osd_stats(st, prev)
{
const zero_stats = { op: { bps: 0n, iops: 0n, lat: 0n }, subop: { iops: 0n, lat: 0n }, recovery: { bps: 0n, iops: 0n } };
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!st || !st.time || prev && (prev.time || this.stat_time/1000) >= st.time)
{
this.recheck_timer = null;
this.recheck_pgs().catch(this.die);
}, this.config.mon_change_timeout || 1000);
return diff;
}
const timediff = BigInt(st.time*1000 - (prev && prev.time*1000 || this.stat_time));
for (const op in st.op_stats||{})
{
const pr = prev && prev.op_stats && prev.op_stats[op];
let c = st.op_stats[op];
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
const pr = prev && prev.subop_stats && prev.subop_stats[op];
let c = st.subop_stats[op];
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
let c = st.recovery_stats[op];
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
return diff;
}
sum_op_stats(timestamp, prev_stats)
{
const op_stats = {}, subop_stats = {}, recovery_stats = {};
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!prev_stats || prev_stats.timestamp >= timestamp)
{
return sum_diff;
}
const tm = BigInt(timestamp - (prev_stats.timestamp || 0));
// Sum derived values instead of deriving summed
for (const osd in this.state.osd.stats)
{
const st = this.state.osd.stats[osd]||{};
for (const op in st.op_stats||{})
const derived = this.derive_osd_stats(this.state.osd.stats[osd],
this.prev_stats && this.prev_stats.osd_stats && this.prev_stats.osd_stats[osd]);
for (const type in derived)
{
op_stats[op] = op_stats[op] || { count: 0n, usec: 0n, bytes: 0n };
op_stats[op].count += BigInt(st.op_stats[op].count||0);
op_stats[op].usec += BigInt(st.op_stats[op].usec||0);
op_stats[op].bytes += BigInt(st.op_stats[op].bytes||0);
}
for (const op in st.subop_stats||{})
{
subop_stats[op] = subop_stats[op] || { count: 0n, usec: 0n };
subop_stats[op].count += BigInt(st.subop_stats[op].count||0);
subop_stats[op].usec += BigInt(st.subop_stats[op].usec||0);
}
for (const op in st.recovery_stats||{})
{
recovery_stats[op] = recovery_stats[op] || { count: 0n, bytes: 0n };
recovery_stats[op].count += BigInt(st.recovery_stats[op].count||0);
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
for (const op in derived[type])
{
for (const k in derived[type][op])
{
sum_diff[type][op] = sum_diff[type][op] || {};
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
}
}
}
}
if (prev_stats && prev_stats.timestamp >= timestamp)
{
prev_stats = null;
}
const tm = prev_stats ? BigInt(timestamp - prev_stats.timestamp) : 0;
for (const op in op_stats)
{
if (prev_stats && prev_stats.op_stats && prev_stats.op_stats[op])
{
op_stats[op].bps = (op_stats[op].bytes - prev_stats.op_stats[op].bytes) * 1000n / tm;
op_stats[op].iops = (op_stats[op].count - prev_stats.op_stats[op].count) * 1000n / tm;
op_stats[op].lat = (op_stats[op].usec - prev_stats.op_stats[op].usec)
/ ((op_stats[op].count - prev_stats.op_stats[op].count) || 1n);
}
}
for (const op in subop_stats)
{
if (prev_stats && prev_stats.subop_stats && prev_stats.subop_stats[op])
{
subop_stats[op].iops = (subop_stats[op].count - prev_stats.subop_stats[op].count) * 1000n / tm;
subop_stats[op].lat = (subop_stats[op].usec - prev_stats.subop_stats[op].usec)
/ ((subop_stats[op].count - prev_stats.subop_stats[op].count) || 1n);
}
}
for (const op in recovery_stats)
{
if (prev_stats && prev_stats.recovery_stats && prev_stats.recovery_stats[op])
{
recovery_stats[op].bps = (recovery_stats[op].bytes - prev_stats.recovery_stats[op].bytes) * 1000n / tm;
recovery_stats[op].iops = (recovery_stats[op].count - prev_stats.recovery_stats[op].count) * 1000n / tm;
}
}
return { op_stats, subop_stats, recovery_stats };
return sum_diff;
}
sum_object_counts()
@@ -1603,7 +1638,8 @@ class Mon
this.prev_stats ? this.prev_stats.inode_stats : null,
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
);
this.prev_stats = { timestamp, ...stats, inode_stats };
this.prev_stats = { timestamp, inode_stats, osd_stats: { ...this.state.osd.stats } };
this.stat_time = Date.now();
stats.object_counts = object_counts;
stats.object_bytes = object_bytes;
stats = this.serialize_bigints(stats);
@@ -1693,6 +1729,7 @@ class Mon
// Do not clear these to null
kv.value = kv.value || {};
}
const old = cur[key_parts[key_parts.length-1]];
cur[key_parts[key_parts.length-1]] = kv.value;
if (key === 'config/global')
{
@@ -1717,9 +1754,15 @@ class Mon
}
else if (key_parts[0] === 'osd' && key_parts[1] === 'stats')
{
// Recheck PGs <osd_out_time> later
// Recheck OSD tree on OSD addition/deletion
const osd_num = key_parts[2];
if ((!old) != (!kv.value) || old && kv.value && old.size != kv.value.size)
{
this.schedule_recheck();
}
// Recheck PGs <osd_out_time> after last OSD statistics report
this.schedule_next_recheck_at(
!this.state.osd.stats[key[2]] ? 0 : this.state.osd.stats[key[2]].time+this.config.osd_out_time
!this.state.osd.stats[osd_num] ? 0 : this.state.osd.stats[osd_num].time+this.config.osd_out_time
);
}
}
@@ -1805,6 +1848,7 @@ function POST(url, body, timeout)
clearTimeout(timer_id);
let res_body = '';
res.setEncoding('utf8');
res.on('error', (error) => ok({ error }));
res.on('data', chunk => { res_body += chunk; });
res.on('end', () =>
{
@@ -1824,6 +1868,8 @@ function POST(url, body, timeout)
}
});
});
req.on('error', (error) => ok({ error }));
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
req.write(body_text);
req.end();
});

View File

@@ -15,4 +15,4 @@ StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=vitastor.target
WantedBy=multi-user.target

View File

@@ -16,6 +16,11 @@ use PVE::Tools qw(run_command);
use base qw(PVE::Storage::Plugin);
if (@PVE::Storage::Plugin::SHARED_STORAGE)
{
push @PVE::Storage::Plugin::SHARED_STORAGE, 'vitastor';
}
sub api
{
# Trick it :)
@@ -133,9 +138,11 @@ sub properties
sub options
{
return {
shared => { optional => 1 },
content => { optional => 1 },
nodes => { optional => 1 },
disable => { optional => 1 },
vitastor_etcd_address => { optional => 1},
vitastor_etcd_address => { optional => 1 },
vitastor_etcd_prefix => { optional => 1 },
vitastor_config_path => { optional => 1 },
vitastor_prefix => { optional => 1 },
@@ -381,8 +388,6 @@ sub unmap_volume
my ($class, $storeid, $scfg, $volname, $snapname) = @_;
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
return 1 if !$scfg->{vitastor_nbd};
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
$name .= '@'.$snapname if $snapname;
@@ -406,7 +411,7 @@ sub activate_volume
sub deactivate_volume
{
my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
$class->unmap_volume($storeid, $scfg, $volname, $snapname);
$class->unmap_volume($storeid, $scfg, $volname, $snapname) if $scfg->{vitastor_nbd};
return 1;
}

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '0.8.1'
VERSION = '0.9.2'
LOG = logging.getLogger(__name__)

View File

@@ -0,0 +1,644 @@
commit e6f935157944279c2c0634915c3c00feeec748c9
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
Date: Mon Jun 19 00:58:19 2023 +0300
Add Vitastor support
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
index aaad4a3..5f5daa8 100644
--- a/include/libvirt/libvirt-storage.h
+++ b/include/libvirt/libvirt-storage.h
@@ -326,6 +326,7 @@ typedef enum {
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
} virConnectListAllStoragePoolsFlags;
int virConnectListAllStoragePools(virConnectPtr conn,
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 45965fa..b7c23d3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7103,7 +7103,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
src->configFile = virXPathString("string(./config/@file)", ctxt);
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
src->query = virXMLPropString(node, "query");
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
@@ -30121,6 +30122,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
case VIR_STORAGE_POOL_MPATH:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 5a9bf20..05058b8 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -494,6 +494,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_RBD:
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
}
}
- /* internal snapshots and config files are currently supported only with rbd: */
+ /* internal snapshots are currently supported only with rbd: */
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
if (src->snapshot) {
@@ -550,11 +551,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
"only with 'rbd' disks"));
return -1;
}
-
+ }
+ /* config files are currently supported only with rbd and vitastor: */
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
if (src->configFile) {
virReportError(VIR_ERR_XML_ERROR, "%s",
_("<config> element is currently supported "
- "only with 'rbd' disks"));
+ "only with 'rbd' and 'vitastor' disks"));
return -1;
}
}
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index 6cb0a20..8bf7de9 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -1972,6 +1972,35 @@
</element>
</define>
+ <define name="diskSourceNetworkProtocolVitastor">
+ <element name="source">
+ <interleave>
+ <attribute name="protocol">
+ <value>vitastor</value>
+ </attribute>
+ <ref name="diskSourceCommon"/>
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <optional>
+ <attribute name="query"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="diskSourceNetworkHost"/>
+ </zeroOrMore>
+ <optional>
+ <element name="config">
+ <attribute name="file">
+ <ref name="absFilePath"/>
+ </attribute>
+ <empty/>
+ </element>
+ </optional>
+ <empty/>
+ </interleave>
+ </element>
+ </define>
+
<define name="diskSourceNetworkProtocolISCSI">
<element name="source">
<attribute name="protocol">
@@ -2264,6 +2293,7 @@
<ref name="diskSourceNetworkProtocolSimple"/>
<ref name="diskSourceNetworkProtocolVxHS"/>
<ref name="diskSourceNetworkProtocolNFS"/>
+ <ref name="diskSourceNetworkProtocolVitastor"/>
</choice>
</define>
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index f5a9636..8339bc4 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
"logical", "disk", "iscsi",
"iscsi-direct", "scsi", "mpath",
"rbd", "sheepdog", "gluster",
- "zfs", "vstorage",
+ "zfs", "vstorage", "vitastor",
);
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
.formatToString = virStorageFileFormatTypeToString,
}
},
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
+ .poolOptions = {
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
+ VIR_STORAGE_POOL_SOURCE_NAME),
+ },
+ .volOptions = {
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
+ .formatFromString = virStorageVolumeFormatFromString,
+ .formatToString = virStorageFileFormatTypeToString,
+ }
+ },
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
.poolOptions = {
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
@@ -542,6 +554,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
_("element 'name' is mandatory for RBD pool"));
return -1;
}
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("element 'name' is mandatory for Vitastor pool"));
+ return -1;
+ }
if (options->formatFromString) {
g_autofree char *format = NULL;
@@ -1132,6 +1149,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
* files, so they don't have a target */
if (def->type != VIR_STORAGE_POOL_RBD &&
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
def->type != VIR_STORAGE_POOL_GLUSTER &&
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index fc67957..720c07e 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -103,6 +103,7 @@ typedef enum {
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
VIR_STORAGE_POOL_ZFS, /* ZFS */
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
VIR_STORAGE_POOL_LAST,
} virStoragePoolType;
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index cecd7e8..d7b79a4 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -87,6 +87,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
"ssh",
"vxhs",
"nfs",
+ "vitastor",
);
@@ -1286,6 +1287,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
return 24007;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_RBD:
/* we don't provide a default for RBD */
return 0;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index 14a6825..eb4acac 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -128,6 +128,7 @@ typedef enum {
VIR_STORAGE_NET_PROTOCOL_SSH,
VIR_STORAGE_NET_PROTOCOL_VXHS,
VIR_STORAGE_NET_PROTOCOL_NFS,
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
VIR_STORAGE_NET_PROTOCOL_LAST
} virStorageNetProtocol;
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index e6c187e..035b423 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -1433,6 +1433,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
return 1;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_RBD:
case VIR_STORAGE_POOL_LAST:
@@ -1918,6 +1919,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index 8490034..ab2cdaa 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 17ac880..59711b5 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -970,6 +970,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
index 6919325..55ffc32 100644
--- a/src/libxl/xen_xl.c
+++ b/src/libxl/xen_xl.c
@@ -1445,6 +1445,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index e865aa1..40162af 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -604,6 +604,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
}
+static virJSONValue *
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
+{
+ virJSONValue *ret = NULL;
+ virStorageNetHostDef *host;
+ size_t i;
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+ g_autofree char *etcd = NULL;
+
+ for (i = 0; i < src->nhosts; i++) {
+ host = src->hosts + i;
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
+ return NULL;
+ }
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
+ }
+ if (src->nhosts > 0) {
+ etcd = virBufferContentAndReset(&buf);
+ }
+
+ if (virJSONValueObjectAdd(&ret,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
+
+ return ret;
+}
+
+
static virJSONValue *
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
{
@@ -917,6 +949,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
return NULL;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
+ return NULL;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
@@ -1860,6 +1898,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
case VIR_STORAGE_NET_PROTOCOL_SSH:
@@ -2242,6 +2281,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2eb5653..60ee82d 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -4958,7 +4958,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
if (src->query &&
(actualType != VIR_STORAGE_TYPE_NETWORK ||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("query is supported only with HTTP(S) protocols"));
return -1;
@@ -10129,6 +10130,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
break;
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index b841680..a6be771 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -373,6 +373,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
@@ -578,6 +579,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index d90c1c9..e853457 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1627,6 +1627,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
index e48ae72..2017ccc 100644
--- a/src/storage_file/storage_source_backingstore.c
+++ b/src/storage_file/storage_source_backingstore.c
@@ -284,6 +284,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
}
+static int
+virStorageSourceParseVitastorColonString(const char *colonstr,
+ virStorageSource *src)
+{
+ char *p, *e, *next;
+ g_autofree char *options = NULL;
+
+ /* optionally skip the "vitastor:" prefix if provided */
+ if (STRPREFIX(colonstr, "vitastor:"))
+ colonstr += strlen("vitastor:");
+
+ options = g_strdup(colonstr);
+
+ p = options;
+ while (*p) {
+ /* find : delimiter or end of string */
+ for (e = p; *e && *e != ':'; ++e) {
+ if (*e == '\\') {
+ e++;
+ if (*e == '\0')
+ break;
+ }
+ }
+ if (*e == '\0') {
+ next = e; /* last kv pair */
+ } else {
+ next = e + 1;
+ *e = '\0';
+ }
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
+ sep[1] == ';' ||
+ sep[1] == ' ')) {
+ *sep = '\0';
+ sep += 2;
+ break;
+ }
+ }
+
+ if (virStorageSourceRBDAddHost(src, h) < 0)
+ return -1;
+
+ h = sep;
+ }
+ }
+
+ p = next;
+ }
+
+ if (!src->path) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int
virStorageSourceParseNBDColonString(const char *nbdstr,
virStorageSource *src)
@@ -396,6 +465,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
@@ -984,6 +1058,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
return 0;
}
+static int
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
+ virJSONValue *json,
+ const char *jsonstr G_GNUC_UNUSED,
+ int opaque G_GNUC_UNUSED)
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;
+
+ src->type = VIR_STORAGE_TYPE_NETWORK;
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
+
+ /* legacy syntax passed via 'filename' option */
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
+ return virStorageSourceParseVitastorColonString(filename, src);
+
+ if (!image) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("missing image name in Vitastor backing volume "
+ "JSON specification"));
+ return -1;
+ }
+
+ src->path = g_strdup(image);
+ src->configFile = g_strdup(conf);
+ src->query = g_strdup(etcd_prefix);
+
+ if (servers) {
+ nservers = virJSONValueArraySize(servers);
+
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
+ src->nhosts = nservers;
+
+ for (i = 0; i < nservers; i++) {
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
+ virJSONValueArrayGet(servers, i)) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
virJSONValue *json,
@@ -1162,6 +1284,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index bd6f063..cce34e1 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -7338,6 +7338,7 @@ testStorageVolumeTypeForPool(int pooltype)
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
return VIR_STORAGE_VOL_NETWORK;
case VIR_STORAGE_POOL_LOGICAL:
case VIR_STORAGE_POOL_DISK:
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
index eee75af..8bd0a57 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='no'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
index 805950a..852df0d 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='yes'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
index e8e40d6..db55fe5 100644
--- a/tests/storagepoolxml2argvtest.c
+++ b/tests/storagepoolxml2argvtest.c
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_VSTORAGE:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_LAST:
default:
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
index 8a98c6a..4b1bbd4 100644
--- a/tools/virsh-pool.c
+++ b/tools/virsh-pool.c
@@ -1221,6 +1221,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
case VIR_STORAGE_POOL_VSTORAGE:
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
+ break;
case VIR_STORAGE_POOL_LAST:
break;
}

View File

@@ -0,0 +1,169 @@
Index: qemu/block/meson.build
===================================================================
--- qemu.orig/block/meson.build
+++ qemu/block/meson.build
@@ -91,6 +91,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: qemu/meson.build
===================================================================
--- qemu.orig/meson.build
+++ qemu/meson.build
@@ -838,6 +838,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1459,6 +1479,7 @@ config_host_data.set('CONFIG_LINUX_AIO',
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3424,6 +3445,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
Index: qemu/meson_options.txt
===================================================================
--- qemu.orig/meson_options.txt
+++ qemu/meson_options.txt
@@ -121,6 +121,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
Index: qemu/qapi/block-core.json
===================================================================
--- qemu.orig/qapi/block-core.json
+++ qemu/qapi/block-core.json
@@ -3179,7 +3179,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4125,6 +4125,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4520,6 +4542,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4910,6 +4933,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5108,6 +5142,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -183,6 +183,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \

View File

@@ -0,0 +1,169 @@
Index: qemu/block/meson.build
===================================================================
--- qemu.orig/block/meson.build
+++ qemu/block/meson.build
@@ -111,6 +111,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: qemu/meson.build
===================================================================
--- qemu.orig/meson.build
+++ qemu/meson.build
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1802,6 +1822,7 @@ config_host_data.set('CONFIG_NUMA', numa
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3965,6 +3986,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: qemu/meson_options.txt
===================================================================
--- qemu.orig/meson_options.txt
+++ qemu/meson_options.txt
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: qemu/qapi/block-core.json
===================================================================
--- qemu.orig/qapi/block-core.json
+++ qemu/qapi/block-core.json
@@ -3209,7 +3209,7 @@
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -4149,6 +4149,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4593,6 +4615,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4985,6 +5008,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5182,6 +5216,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \

View File

@@ -0,0 +1,169 @@
Index: pve-qemu-kvm-7.2.0/block/meson.build
===================================================================
--- pve-qemu-kvm-7.2.0.orig/block/meson.build
+++ pve-qemu-kvm-7.2.0/block/meson.build
@@ -113,6 +113,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-7.2.0/meson.build
===================================================================
--- pve-qemu-kvm-7.2.0.orig/meson.build
+++ pve-qemu-kvm-7.2.0/meson.build
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1865,6 +1885,7 @@ config_host_data.set('CONFIG_NUMA', numa
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3957,6 +3978,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: pve-qemu-kvm-7.2.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-7.2.0.orig/meson_options.txt
+++ pve-qemu-kvm-7.2.0/meson_options.txt
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: pve-qemu-kvm-7.2.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-7.2.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-7.2.0/qapi/block-core.json
@@ -3213,7 +3213,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4223,6 +4223,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4671,6 +4693,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5072,6 +5095,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5269,6 +5303,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- pve-qemu-kvm-7.2.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..af6207dbce 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -114,6 +114,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index c44d05a13f..ebedb42843 100644
--- a/meson.build
+++ b/meson.build
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1882,6 +1902,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..c4ac55c283 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c05ad0c07e..f5eb701604 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3308,7 +3308,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4338,6 +4338,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4787,6 +4809,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5187,6 +5210,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5385,6 +5419,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 6e8983f39c..1b0b9fcf3e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -32,7 +32,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..95914e6ebc 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -144,6 +144,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -392,6 +393,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -0,0 +1,169 @@
diff --git a/block/meson.build b/block/meson.build
index deb73ca389..e269f599a1 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -78,6 +78,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 96de1a6ef9..2e3994777d 100644
--- a/meson.build
+++ b/meson.build
@@ -838,6 +838,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1455,6 +1475,7 @@ config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3412,6 +3433,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
diff --git a/meson_options.txt b/meson_options.txt
index e392323732..5b56007475 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -121,6 +121,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1d3dd9cb48..88453405e5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2930,7 +2930,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3864,6 +3864,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4259,6 +4281,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4647,6 +4670,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4846,6 +4880,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7a17ff4218..cdddbf32aa 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -69,6 +69,7 @@ meson_options_help() {
printf "%s\n" ' oss OSS sound support'
printf "%s\n" ' pa PulseAudio sound support'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' sdl SDL user interface'
printf "%s\n" ' sdl-image SDL Image support for icons'
printf "%s\n" ' seccomp seccomp support'
@@ -210,6 +211,8 @@ _meson_option_parse() {
--disable-pa) printf "%s" -Dpa=disabled ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-sdl) printf "%s" -Dsdl=enabled ;;
--disable-sdl) printf "%s" -Dsdl=disabled ;;
--enable-sdl-image) printf "%s" -Dsdl_image=enabled ;;

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index 0b2a60c99b..d923713804 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -98,6 +98,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 861de93c4f..272f72af11 100644
--- a/meson.build
+++ b/meson.build
@@ -884,6 +884,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1546,6 +1566,7 @@ config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3709,6 +3730,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index 52b11cead4..d8d0868174 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -149,6 +149,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index beeb91952a..1c98dc0e12 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2929,7 +2929,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3863,6 +3863,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4277,6 +4299,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4665,6 +4688,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4864,6 +4898,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 9850dd4444..72b1287520 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -181,6 +181,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 1e26f4571e..370898d48c 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -98,6 +98,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
printf "%s\n" ' sdl-image SDL Image support for icons'
@@ -289,6 +290,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;
--disable-replication) printf "%s" -Dreplication=disabled ;;
--enable-rng-none) printf "%s" -Drng_none=true ;;

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index 60bc305597..89a042216f 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -98,6 +98,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 20fddbd707..600db4e2fb 100644
--- a/meson.build
+++ b/meson.build
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1799,6 +1819,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3954,6 +3975,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index e58e158396..9747b38fd0 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..5a4900b322 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2955,7 +2955,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3883,6 +3883,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4327,6 +4349,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4717,6 +4740,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4915,6 +4949,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index a7f92aff90..53dc55be2e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 359b04e0e6..f5b85ba78c 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -135,6 +135,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -370,6 +371,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index b7c68b83a3..95d8a6f15d 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -100,6 +100,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 5c6b5a1c75..f31f73612e 100644
--- a/meson.build
+++ b/meson.build
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1861,6 +1881,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3945,6 +3966,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index 4b749ca549..6b37bd6b77 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95ac4fa634..7a240827e4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2959,7 +2959,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -3957,6 +3957,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4405,6 +4427,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -4804,6 +4827,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5002,6 +5036,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index a7f92aff90..53dc55be2e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index aa6e30ea91..c45d21c40f 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -135,6 +135,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -376,6 +377,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..af6207dbce 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -101,6 +101,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index c44d05a13f..ebedb42843 100644
--- a/meson.build
+++ b/meson.build
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1878,6 +1898,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -4002,6 +4023,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..c4ac55c283 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c05ad0c07e..f5eb701604 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3054,7 +3054,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4073,6 +4073,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4521,6 +4543,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -4920,6 +4943,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5118,6 +5152,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 6e8983f39c..1b0b9fcf3e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -32,7 +32,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..95914e6ebc 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -142,6 +142,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -388,6 +389,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -9,7 +9,7 @@ for i in "$DIR"/qemu-*-vitastor.patch "$DIR"/pve-qemu-*-vitastor.patch; do
echo '===================================================================' >> $i
echo '--- /dev/null' >> $i
echo '+++ a/block/vitastor.c' >> $i
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c)' @@' >> $i
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i
cat "$DIR"/../src/qemu_driver.c | sed 's/^/+/' >> $i
fi
done

View File

@@ -7,13 +7,12 @@ set -e
VITASTOR=$(dirname $0)
VITASTOR=$(realpath "$VITASTOR/..")
if [ -d /opt/rh/gcc-toolset-9 ]; then
EL=$(rpm --eval '%dist')
if [ "$EL" = ".el8" ]; then
# CentOS 8
EL=8
. /opt/rh/gcc-toolset-9/enable
else
elif [ "$EL" = ".el7" ]; then
# CentOS 7
EL=7
. /opt/rh/devtoolset-9/enable
fi
cd ~/rpmbuild/SPECS
@@ -25,4 +24,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.8.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.1$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *

View File

@@ -58,7 +58,7 @@
+BuildRequires: gperftools-devel
+BuildRequires: libusbx-devel >= 1.0.21
%if %{have_usbredir}
BuildRequires: usbredir-devel >= 0.8.1
BuildRequires: usbredir-devel >= 0.8.2
%endif
@@ -856,12 +861,13 @@ BuildRequires: virglrenderer-devel
# For smartcard NSS support

View File

@@ -0,0 +1,93 @@
--- qemu-kvm.spec.orig 2023-02-28 08:04:06.000000000 +0000
+++ qemu-kvm.spec 2023-04-27 22:29:18.094878829 +0000
@@ -100,8 +100,6 @@
%endif
%global target_list %{kvm_target}-softmmu
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
%define qemudocdir %{_docdir}/%{name}
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
@@ -129,6 +127,7 @@ Requires: %{name}-device-usb-host = %{ep
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
%endif \
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
# Since SPICE is removed from RHEL-9, the following Obsoletes:
@@ -151,7 +150,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 7.0.0
-Release: 13%{?rcrel}%{?dist}%{?cc_suffix}.2
+Release: 13.vitastor%{?rcrel}%{?dist}%{?cc_suffix}
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@@ -174,6 +173,7 @@ Source28: 95-kvm-memlock.conf
Source30: kvm-s390x.conf
Source31: kvm-x86.conf
Source36: README.tests
+Source37: qemu-vitastor.c
Patch0004: 0004-Initial-redhat-build.patch
@@ -498,6 +498,7 @@ Patch171: kvm-i386-do-kvm_put_msr_featur
Patch172: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch
# For bz#2168221 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-9.1.0.z]
Patch173: kvm-migration-Read-state-once.patch
+Patch174: qemu-7.0-vitastor.patch
# Source-git patches
@@ -531,6 +532,7 @@ BuildRequires: libcurl-devel
%if %{have_block_rbd}
BuildRequires: librbd-devel
%endif
+BuildRequires: vitastor-client-devel
# We need both because the 'stap' binary is probed for by configure
BuildRequires: systemtap
BuildRequires: systemtap-sdt-devel
@@ -718,6 +720,14 @@ using the rbd protocol.
%endif
+%package block-vitastor
+Summary: QEMU Vitastor block driver
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
+
+%description block-vitastor
+This package provides the additional Vitastor block driver for QEMU.
+
+
%package audio-pa
Summary: QEMU PulseAudio audio driver
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
@@ -811,6 +821,7 @@ This package provides usbredir support.
%prep
%setup -q -n qemu-%{version}%{?rcstr}
%autopatch -p1
+cp %{SOURCE37} ./block/vitastor.c
%global qemu_kvm_build qemu_kvm_build
mkdir -p %{qemu_kvm_build}
@@ -1032,6 +1043,7 @@ run_configure \
%if %{have_block_rbd}
--enable-rbd \
%endif
+ --enable-vitastor \
%if %{have_librdma}
--enable-rdma \
%endif
@@ -1511,6 +1523,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
%files block-rbd
%{_libdir}/%{name}/block-rbd.so
%endif
+%files block-vitastor
+%{_libdir}/%{name}/block-vitastor.so
+
%files audio-pa
%{_libdir}/%{name}/audio-pa.so

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.8.1.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.8.1
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.8.1.el7.tar.gz
Source0: vitastor-0.9.2.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -35,6 +35,7 @@ Summary: Vitastor - OSD
Requires: libJerasure2
Requires: libisa-l
Requires: liburing >= 0.6
Requires: liburing < 2
Requires: vitastor-client = %{version}-%{release}
Requires: util-linux
Requires: parted
@@ -59,6 +60,7 @@ scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
Requires: liburing >= 0.6
Requires: liburing < 2
%description -n vitastor-client

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.8.1.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.8.1
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.8.1.el8.tar.gz
Source0: vitastor-0.9.2.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -34,6 +34,7 @@ Summary: Vitastor - OSD
Requires: libJerasure2
Requires: libisa-l
Requires: liburing >= 0.6
Requires: liburing < 2
Requires: vitastor-client = %{version}-%{release}
Requires: util-linux
Requires: parted
@@ -57,6 +58,7 @@ scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
Requires: liburing >= 0.6
Requires: liburing < 2
%description -n vitastor-client

View File

@@ -0,0 +1,28 @@
# Build packages for AlmaLinux 9 inside a container
# cd ..; podman build -t vitastor-el9 -v `pwd`/packages:/root/packages -f rpm/vitastor-el9.Dockerfile .
FROM almalinux:9
WORKDIR /root
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
RUN dnf -y install epel-release dnf-plugins-core
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake
RUN dnf download --source fio
RUN rpm --nomd5 -i fio*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \
mkdir -p /root/packages/vitastor-el9; \
rm -rf /root/packages/vitastor-el9/*; \
cp ~/rpmbuild/RPMS/*/vitastor* /root/packages/vitastor-el9/; \
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el9/

158
rpm/vitastor-el9.spec Normal file
View File

@@ -0,0 +1,158 @@
Name: vitastor
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.2.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
BuildRequires: gcc-c++
BuildRequires: nodejs >= 10
BuildRequires: jerasure-devel
BuildRequires: libisa-l-devel
BuildRequires: gf-complete-devel
BuildRequires: rdma-core-devel
BuildRequires: cmake
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}
Requires: vitastor-client-devel = %{version}-%{release}
Requires: vitastor-fio = %{version}-%{release}
%description
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
architecturally similar to Ceph which means strong consistency, primary-replication,
symmetric clustering and automatic data distribution over any number of drives of any
size with configurable redundancy (replication or erasure codes/XOR).
%package -n vitastor-osd
Summary: Vitastor - OSD
Requires: vitastor-client = %{version}-%{release}
Requires: util-linux
Requires: parted
%description -n vitastor-osd
Vitastor object storage daemon, i.e. server program that stores data.
%package -n vitastor-mon
Summary: Vitastor - monitor
Requires: nodejs >= 10
Requires: lpsolve
%description -n vitastor-mon
Vitastor monitor, i.e. server program responsible for watching cluster state and
scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
%description -n vitastor-client
Vitastor client library and command-line interface.
%package -n vitastor-client-devel
Summary: Vitastor - development files
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-client-devel
Vitastor library headers for development.
%package -n vitastor-fio
Summary: Vitastor - fio drivers
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: fio = 3.27-8.el9
%description -n vitastor-fio
Vitastor fio drivers for benchmarking.
%prep
%setup -q
%build
%cmake
%cmake_build
%install
rm -rf $RPM_BUILD_ROOT
%cmake_install
cd mon
npm install
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
%files
%doc GPL-2.0.txt VNPL-1.1.txt README.md README-ru.md
%files -n vitastor-osd
%_bindir/vitastor-osd
%_bindir/vitastor-disk
%_bindir/vitastor-dump-journal
/lib/systemd/system/vitastor-osd@.service
/lib/systemd/system/vitastor.target
/lib/udev/rules.d/90-vitastor.rules
%pre -n vitastor-osd
groupadd -r -f vitastor 2>/dev/null ||:
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
install -o vitastor -g vitastor -d /var/log/vitastor
mkdir -p /etc/vitastor
%files -n vitastor-mon
/usr/lib/vitastor/mon
/lib/systemd/system/vitastor-mon.service
%pre -n vitastor-mon
groupadd -r -f vitastor 2>/dev/null ||:
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
mkdir -p /etc/vitastor
%files -n vitastor-client
%_bindir/vitastor-nbd
%_bindir/vitastor-nfs
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%files -n vitastor-client-devel
%_includedir/vitastor_c.h
%_libdir/pkgconfig
%files -n vitastor-fio
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so
%_libdir/libfio_vitastor_sec.so
%changelog

View File

@@ -1,8 +1,9 @@
cmake_minimum_required(VERSION 2.8)
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
include(GNUInstallDirs)
include(CTest)
set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
set(WITH_FIO true CACHE BOOL "Build FIO driver")
@@ -15,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="0.8.1")
add_definitions(-DVERSION="0.9.2")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -55,6 +56,14 @@ if (ISAL_LIBRARIES)
add_definitions(-DWITH_ISAL)
endif (ISAL_LIBRARIES)
add_custom_target(build_tests)
add_custom_target(test
COMMAND
echo leak:tcmalloc > ${CMAKE_CURRENT_BINARY_DIR}/lsan-suppress.txt &&
env LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_BINARY_DIR}/lsan-suppress.txt ${CMAKE_CTEST_COMMAND}
)
add_dependencies(test build_tests)
include_directories(
../
/usr/include/jerasure
@@ -102,7 +111,7 @@ target_compile_options(vitastor_common PUBLIC -fPIC)
add_executable(vitastor-osd
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
osd_cluster.cpp osd_rmw.cpp
osd_cluster.cpp osd_rmw.cpp osd_scrub.cpp osd_primary_describe.cpp
)
target_link_libraries(vitastor-osd
vitastor_common
@@ -132,6 +141,8 @@ add_library(vitastor_client SHARED
cli_common.cpp
cli_alloc_osd.cpp
cli_status.cpp
cli_describe.cpp
cli_fix.cpp
cli_df.cpp
cli_ls.cpp
cli_create.cpp
@@ -140,11 +151,11 @@ add_library(vitastor_client SHARED
cli_merge.cpp
cli_rm_data.cpp
cli_rm.cpp
cli_rm_osd.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
target_link_libraries(vitastor_client
vitastor_common
tcmalloc_minimal
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
@@ -234,8 +245,18 @@ add_executable(osd_test osd_test.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(osd_test tcmalloc_minimal)
# osd_rmw_test
add_executable(osd_rmw_test osd_rmw_test.cpp allocator.cpp)
add_executable(osd_rmw_test EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test)
add_test(NAME osd_rmw_test COMMAND osd_rmw_test)
if (ISAL_LIBRARIES)
add_executable(osd_rmw_test_je EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_compile_definitions(osd_rmw_test_je PUBLIC -DNO_ISAL)
target_link_libraries(osd_rmw_test_je Jerasure tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test_je)
add_test(NAME osd_rmw_test_jerasure COMMAND osd_rmw_test_je)
endif (ISAL_LIBRARIES)
# stub_uring_osd
add_executable(stub_uring_osd
@@ -249,11 +270,15 @@ target_link_libraries(stub_uring_osd
)
# osd_peering_pg_test
add_executable(osd_peering_pg_test osd_peering_pg_test.cpp osd_peering_pg.cpp)
add_executable(osd_peering_pg_test EXCLUDE_FROM_ALL osd_peering_pg_test.cpp osd_peering_pg.cpp)
target_link_libraries(osd_peering_pg_test tcmalloc_minimal)
add_dependencies(build_tests osd_peering_pg_test)
add_test(NAME osd_peering_pg_test COMMAND osd_peering_pg_test)
# test_allocator
add_executable(test_allocator test_allocator.cpp allocator.cpp)
add_executable(test_allocator EXCLUDE_FROM_ALL test_allocator.cpp allocator.cpp)
add_dependencies(build_tests test_allocator)
add_test(NAME test_allocator COMMAND test_allocator)
# test_cas
add_executable(test_cas
@@ -273,12 +298,15 @@ target_link_libraries(test_crc32
# test_cluster_client
add_executable(test_cluster_client
EXCLUDE_FROM_ALL
test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp timerfd_manager.cpp ../json11/json11.cpp
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)
add_dependencies(build_tests test_cluster_client)
add_test(NAME test_cluster_client COMMAND test_cluster_client)
## test_blockstore, test_shit
#add_executable(test_blockstore test_blockstore.cpp)

View File

@@ -13,6 +13,11 @@ blockstore_t::~blockstore_t()
delete impl;
}
void blockstore_t::parse_config(blockstore_config_t & config)
{
impl->parse_config(config, false);
}
void blockstore_t::loop()
{
impl->loop();

View File

@@ -73,7 +73,10 @@ Input:
write request is copied into the metadata area bitwise and stored there.
Output:
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
- retval = number of bytes actually read/written or negative error number
-EINVAL = invalid input parameters
-ENOENT = requested object/version does not exist for reads
-ENOSPC = no space left in the store for writes
- version = the version actually read or written
## BS_OP_DELETE
@@ -107,7 +110,7 @@ Input:
- buf = pre-allocated obj_ver_id array <len> units long
Output:
- retval = 0 or negative error number (-EINVAL, -ENOENT if no such version or -EBUSY if not synced)
- retval = 0 or negative error number (-ENOENT if no such version for stabilize)
## BS_OP_SYNC_STAB_ALL
@@ -122,11 +125,14 @@ Output:
Get a list of all objects in this Blockstore.
Input:
- oid.stripe = PG alignment
- len = PG count or 0 to list all objects
- offset = PG number
- oid.inode = min inode number or 0 to list all inodes
- version = max inode number or 0 to list all inodes
- pg_alignment = PG alignment
- pg_count = PG count or 0 to list all objects
- pg_number = PG number
- list_stable_limit = max number of clean objects in the reply
it's guaranteed that dirty objects are returned from the same interval,
i.e. from (min_oid .. min(max_oid, max(returned stable OIDs)))
- min_oid = min inode/stripe or 0 to list all objects
- max_oid = max inode/stripe or 0 to list all objects
Output:
- retval = total obj_ver_id count
@@ -143,10 +149,27 @@ struct blockstore_op_t
uint64_t opcode;
// finish callback
std::function<void (blockstore_op_t*)> callback;
object_id oid;
uint64_t version;
uint32_t offset;
uint32_t len;
union __attribute__((__packed__))
{
// R/W
struct __attribute__((__packed__))
{
object_id oid;
uint64_t version;
uint32_t offset;
uint32_t len;
};
// List
struct __attribute__((__packed__))
{
object_id min_oid;
object_id max_oid;
uint32_t pg_alignment;
uint32_t pg_count;
uint32_t pg_number;
uint32_t list_stable_limit;
};
};
void *buf;
void *bitmap;
int retval;
@@ -165,6 +188,9 @@ public:
blockstore_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
~blockstore_t();
// Update configuration
void parse_config(blockstore_config_t & config);
// Event loop
void loop();

View File

@@ -35,24 +35,14 @@ journal_flusher_co::journal_flusher_co()
{
bs->live = true;
if (data->res != data->iov.iov_len)
{
throw std::runtime_error(
"data read operation failed during flush ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
"). can't continue, sorry :-("
);
}
bs->disk_error_abort("read operation during flush", data->res, data->iov.iov_len);
wait_count--;
};
simple_callback_w = [this](ring_data_t* data)
{
bs->live = true;
if (data->res != data->iov.iov_len)
{
throw std::runtime_error(
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
"). state "+std::to_string(wait_state)+". in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
);
}
bs->disk_error_abort("write operation during flush", data->res, data->iov.iov_len);
wait_count--;
};
}
@@ -87,7 +77,7 @@ void journal_flusher_t::loop()
cur_flusher_count--;
}
}
for (int i = 0; (active_flushers > 0 || dequeuing) && i < cur_flusher_count; i++)
for (int i = 0; (active_flushers > 0 || dequeuing || trim_wanted > 0) && i < cur_flusher_count; i++)
co[i].loop();
}
@@ -172,7 +162,8 @@ void journal_flusher_t::mark_trim_possible()
if (trim_wanted > 0)
{
dequeuing = true;
journal_trim_counter++;
if (!journal_trim_counter)
journal_trim_counter = journal_trim_interval;
bs->ringloop->wakeup();
}
}
@@ -306,6 +297,8 @@ bool journal_flusher_co::loop()
goto resume_20;
else if (wait_state == 21)
goto resume_21;
else if (wait_state == 22)
goto resume_22;
resume_0:
if (flusher->flush_queue.size() < flusher->min_flusher_count && !flusher->trim_wanted ||
!flusher->flush_queue.size() || !flusher->dequeuing)
@@ -511,6 +504,13 @@ resume_1:
);
wait_count++;
}
// Wait for data writes before fsyncing it
resume_22:
if (wait_count > 0)
{
wait_state = 22;
return false;
}
// Sync data before writing metadata
resume_16:
resume_17:
@@ -521,7 +521,7 @@ resume_1:
return false;
}
resume_5:
// And metadata writes, but only after data writes complete
// Submit metadata writes, but only when data is written and fsynced
if (!bs->inmemory_meta && meta_new.it->second.state == 0 || wait_count > 0)
{
// metadata sector is still being read or data is still being written, wait for it
@@ -536,14 +536,27 @@ resume_1:
return false;
}
// zero out old metadata entry
{
clean_disk_entry *old_entry = (clean_disk_entry*)((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size);
if (old_entry->oid.inode != 0 && old_entry->oid != cur.oid)
{
printf("Fatal error (metadata corruption or bug): tried to wipe metadata entry %lu (%lx:%lx v%lu) as old location of %lx:%lx\n",
old_clean_loc >> bs->dsk.block_order, old_entry->oid.inode, old_entry->oid.stripe,
old_entry->version, cur.oid.inode, cur.oid.stripe);
exit(1);
}
}
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
await_sqe(15);
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_old.sector
);
wait_count++;
if (meta_old.sector != meta_new.sector)
{
await_sqe(15);
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_old.sector
);
wait_count++;
}
}
if (has_delete)
{
@@ -616,7 +629,8 @@ resume_1:
for (it = v.begin(); it != v.end(); it++)
{
// Free it if it's not taken from the journal
if (it->buf && (!bs->journal.inmemory || it->buf < bs->journal.buffer || it->buf >= bs->journal.buffer + bs->journal.len))
if (it->buf && (!bs->journal.inmemory || it->buf < bs->journal.buffer ||
it->buf >= (uint8_t*)bs->journal.buffer + bs->journal.len))
{
free(it->buf);
}
@@ -661,53 +675,58 @@ resume_1:
return false;
}
flusher->trimming = true;
// First update journal "superblock" and only then update <used_start> in memory
await_sqe(12);
*((journal_entry_start*)flusher->journal_superblock) = {
.crc32 = 0,
.magic = JOURNAL_MAGIC,
.type = JE_START,
.size = sizeof(journal_entry_start),
.reserved = 0,
.journal_start = new_trim_pos,
.version = JOURNAL_VERSION,
};
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
data->iov = (struct iovec){ flusher->journal_superblock, bs->dsk.journal_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
wait_count++;
resume_13:
if (wait_count > 0)
// Recheck the position with the "lock" taken
new_trim_pos = bs->journal.get_trim_pos();
if (new_trim_pos != bs->journal.used_start)
{
wait_state = 13;
return false;
}
if (!bs->disable_journal_fsync)
{
await_sqe(20);
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
// First update journal "superblock" and only then update <used_start> in memory
await_sqe(12);
*((journal_entry_start*)flusher->journal_superblock) = {
.crc32 = 0,
.magic = JOURNAL_MAGIC,
.type = JE_START,
.size = sizeof(journal_entry_start),
.reserved = 0,
.journal_start = new_trim_pos,
.version = JOURNAL_VERSION,
};
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
data->iov = (struct iovec){ flusher->journal_superblock, bs->dsk.journal_block_size };
data->callback = simple_callback_w;
resume_21:
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
wait_count++;
resume_13:
if (wait_count > 0)
{
wait_state = 21;
wait_state = 13;
return false;
}
}
bs->journal.used_start = new_trim_pos;
if (!bs->disable_journal_fsync)
{
await_sqe(20);
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
data->callback = simple_callback_w;
resume_21:
if (wait_count > 0)
{
wait_state = 21;
return false;
}
}
bs->journal.used_start = new_trim_pos;
#ifdef BLOCKSTORE_DEBUG
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
#endif
if (bs->journal.flush_journal && !flusher->flush_queue.size())
{
assert(bs->journal.used_start == bs->journal.next_free);
printf("Journal flushed\n");
exit(0);
}
}
flusher->trimming = false;
}
if (bs->journal.flush_journal && !flusher->flush_queue.size())
{
assert(bs->journal.used_start == bs->journal.next_free);
printf("Journal flushed\n");
exit(0);
}
}
// All done
flusher->active_flushers--;
@@ -931,7 +950,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
resume_1:
if (!cur_sync->state)
{
if (flusher->syncing_flushers >= flusher->cur_flusher_count || !flusher->flush_queue.size())
if (flusher->syncing_flushers >= flusher->active_flushers || !flusher->flush_queue.size())
{
// Sync batch is ready. Do it.
await_sqe(0);

View File

@@ -11,7 +11,7 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
ring_consumer.loop = [this]() { loop(); };
ringloop->register_consumer(&ring_consumer);
initialized = 0;
parse_config(config);
parse_config(config, true);
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
try
{
@@ -107,7 +107,7 @@ void blockstore_impl_t::loop()
// has_writes == 0 - no writes before the current queue item
// has_writes == 1 - some writes in progress
// has_writes == 2 - tried to submit some writes, but failed
int has_writes = 0, op_idx = 0, new_idx = 0, done_lists = 0;
int has_writes = 0, op_idx = 0, new_idx = 0;
for (; op_idx < submit_queue.size(); op_idx++, new_idx++)
{
auto op = submit_queue[op_idx];
@@ -171,7 +171,7 @@ void blockstore_impl_t::loop()
// Can't submit SYNC before previous writes
continue;
}
wr_st = continue_sync(op, false);
wr_st = continue_sync(op);
if (wr_st != 2)
{
has_writes = wr_st > 0 ? 1 : 2;
@@ -188,16 +188,12 @@ void blockstore_impl_t::loop()
else if (op->opcode == BS_OP_LIST)
{
// LIST doesn't have to be blocked by previous modifications
// But don't do a lot of LISTs at once, because they're blocking and potentially slow
if (single_tick_list_limit <= 0 || done_lists < single_tick_list_limit)
{
process_list(op);
done_lists++;
wr_st = 2;
}
process_list(op);
wr_st = 2;
}
if (wr_st == 2)
{
submit_queue[op_idx] = NULL;
new_idx--;
}
if (wr_st == 0)
@@ -313,13 +309,14 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
}
else if (PRIV(op)->wait_for == WAIT_FREE)
{
if (!data_alloc->get_free_count() && flusher->is_active())
if (!data_alloc->get_free_count() && big_to_flush > 0)
{
#ifdef BLOCKSTORE_DEBUG
printf("Still waiting for free space on the data device\n");
#endif
return;
}
flusher->release_trim();
PRIV(op)->wait_for = 0;
}
else
@@ -340,7 +337,7 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
{
// Basic verification not passed
op->retval = -EINVAL;
std::function<void (blockstore_op_t*)>(op->callback)(op);
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
return;
}
if (op->opcode == BS_OP_SYNC_STAB_ALL)
@@ -383,16 +380,21 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
}
if ((op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE || op->opcode == BS_OP_DELETE) && !enqueue_write(op))
{
std::function<void (blockstore_op_t*)>(op->callback)(op);
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
return;
}
init_op(op);
submit_queue.push_back(op);
ringloop->wakeup();
}
void blockstore_impl_t::init_op(blockstore_op_t *op)
{
// Call constructor without allocating memory. We'll call destructor before returning op back
new ((void*)op->private_data) blockstore_op_private_t;
PRIV(op)->wait_for = 0;
PRIV(op)->op_state = 0;
PRIV(op)->pending_ops = 0;
submit_queue.push_back(op);
ringloop->wakeup();
}
static bool replace_stable(object_id oid, uint64_t version, int search_start, int search_end, obj_ver_id* list)
@@ -460,11 +462,11 @@ void blockstore_impl_t::reshard_clean_db(pool_id_t pool, uint32_t pg_count, uint
void blockstore_impl_t::process_list(blockstore_op_t *op)
{
uint32_t list_pg = op->offset+1;
uint32_t pg_count = op->len;
uint64_t pg_stripe_size = op->oid.stripe;
uint64_t min_inode = op->oid.inode;
uint64_t max_inode = op->version;
uint32_t list_pg = op->pg_number+1;
uint32_t pg_count = op->pg_count;
uint64_t pg_stripe_size = op->pg_alignment;
uint64_t min_inode = op->min_oid.inode;
uint64_t max_inode = op->max_oid.inode;
// Check PG
if (pg_count != 0 && (pg_stripe_size < MIN_DATA_BLOCK_SIZE || list_pg > pg_count))
{
@@ -511,7 +513,13 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
stable_alloc += clean_db.size();
}
}
else
if (op->list_stable_limit > 0)
{
stable_alloc = op->list_stable_limit;
if (stable_alloc > 1024*1024)
stable_alloc = 1024*1024;
}
if (stable_alloc < 32768)
{
stable_alloc = 32768;
}
@@ -522,22 +530,22 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
FINISH_OP(op);
return;
}
auto max_oid = op->max_oid;
bool limited = false;
pool_pg_id_t last_shard_id = 0;
for (auto shard_it = clean_db_shards.lower_bound(first_shard);
shard_it != clean_db_shards.end() && shard_it->first <= last_shard;
shard_it++)
{
auto & clean_db = shard_it->second;
auto clean_it = clean_db.begin(), clean_end = clean_db.end();
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
{
clean_it = clean_db.lower_bound({
.inode = min_inode,
.stripe = 0,
});
clean_end = clean_db.upper_bound({
.inode = max_inode,
.stripe = UINT64_MAX,
});
clean_it = clean_db.lower_bound(op->min_oid);
}
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
{
clean_end = clean_db.upper_bound(max_oid);
}
for (; clean_it != clean_end; clean_it++)
{
@@ -556,11 +564,29 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
.oid = clean_it->first,
.version = clean_it->second.version,
};
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
{
if (!limited)
{
limited = true;
max_oid = stable[stable_count-1].oid;
}
break;
}
}
if (op->list_stable_limit > 0)
{
// To maintain the order, we have to include objects in the same range from other shards
if (last_shard_id != 0 && last_shard_id != shard_it->first)
std::sort(stable, stable+stable_count);
if (stable_count > op->list_stable_limit)
stable_count = op->list_stable_limit;
}
last_shard_id = shard_it->first;
}
if (first_shard != last_shard)
if (op->list_stable_limit == 0 && first_shard != last_shard)
{
// If that's not a per-PG listing, sort clean entries
// If that's not a per-PG listing, sort clean entries (already sorted if list_stable_limit != 0)
std::sort(stable, stable+stable_count);
}
int clean_stable_count = stable_count;
@@ -569,20 +595,17 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
obj_ver_id *unstable = NULL;
{
auto dirty_it = dirty_db.begin(), dirty_end = dirty_db.end();
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
{
dirty_it = dirty_db.lower_bound({
.oid = {
.inode = min_inode,
.stripe = 0,
},
.oid = op->min_oid,
.version = 0,
});
}
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
{
dirty_end = dirty_db.upper_bound({
.oid = {
.inode = max_inode,
.stripe = UINT64_MAX,
},
.oid = max_oid,
.version = UINT64_MAX,
});
}
@@ -598,7 +621,7 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
replace_stable(dirty_it->first.oid, 0, clean_stable_count, stable_count, stable);
}
}
else if (IS_STABLE(dirty_it->second.state))
else if (IS_STABLE(dirty_it->second.state) || (dirty_it->second.state & BS_ST_INSTANT))
{
// First try to replace a clean stable version in the first part of the list
if (!replace_stable(dirty_it->first.oid, dirty_it->first.version, 0, clean_stable_count, stable))
@@ -626,6 +649,11 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
stable[stable_count++] = dirty_it->first;
}
}
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
{
// Stop here
break;
}
}
else
{
@@ -687,3 +715,16 @@ void blockstore_impl_t::dump_diagnostics()
journal.dump_diagnostics();
flusher->dump_diagnostics();
}
void blockstore_impl_t::disk_error_abort(const char *op, int retval, int expected)
{
if (retval == -EAGAIN)
{
fprintf(stderr, "EAGAIN error received from a disk %s during flush."
" It must never happen with io_uring and indicates a kernel bug."
" Please upgrade your kernel. Aborting.\n", op);
exit(1);
}
fprintf(stderr, "Disk %s failed: result is %d, expected %d. Can't continue, sorry :-(\n", op, retval, expected);
exit(1);
}

View File

@@ -218,6 +218,11 @@ struct pool_shard_settings_t
uint32_t pg_stripe_size;
};
#define STAB_SPLIT_DONE 1
#define STAB_SPLIT_WAIT 2
#define STAB_SPLIT_SYNC 3
#define STAB_SPLIT_TODO 4
class blockstore_impl_t
{
blockstore_disk_t dsk;
@@ -242,8 +247,6 @@ class blockstore_impl_t
int throttle_target_parallelism = 1;
// Minimum difference in microseconds between target and real execution times to throttle the response
int throttle_threshold_us = 50;
// Maximum number of LIST operations to be processed between
int single_tick_list_limit = 1;
/******* END OF OPTIONS *******/
struct ring_consumer_t ring_consumer;
@@ -262,6 +265,7 @@ class blockstore_impl_t
struct journal_t journal;
journal_flusher_t *flusher;
int big_to_flush = 0;
int write_iodepth = 0;
bool live = false, queue_stall = false;
@@ -281,7 +285,6 @@ class blockstore_impl_t
friend class journal_flusher_t;
friend class journal_flusher_co;
void parse_config(blockstore_config_t & config);
void calc_lengths();
void open_data();
void open_meta();
@@ -294,6 +297,7 @@ class blockstore_impl_t
// Journaling
void prepare_journal_sector_write(int sector, blockstore_op_t *op);
void handle_journal_write(ring_data_t *data, uint64_t flush_id);
void disk_error_abort(const char *op, int retval, int expected);
// Asynchronous init
int initialized;
@@ -302,6 +306,7 @@ class blockstore_impl_t
blockstore_init_journal* journal_init_reader;
void check_wait(blockstore_op_t *op);
void init_op(blockstore_op_t *op);
// Read
int dequeue_read(blockstore_op_t *read_op);
@@ -321,7 +326,7 @@ class blockstore_impl_t
void handle_write_event(ring_data_t *data, blockstore_op_t *op);
// Sync
int continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync);
int continue_sync(blockstore_op_t *op);
void ack_sync(blockstore_op_t *op);
// Stabilize
@@ -329,6 +334,8 @@ class blockstore_impl_t
int continue_stable(blockstore_op_t *op);
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
void stabilize_object(object_id oid, uint64_t max_ver);
blockstore_op_t* selective_sync(blockstore_op_t *op);
int split_stab_op(blockstore_op_t *op, std::function<int(obj_ver_id v)> decider);
// Rollback
int dequeue_rollback(blockstore_op_t *op);
@@ -344,6 +351,8 @@ public:
blockstore_impl_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
~blockstore_impl_t();
void parse_config(blockstore_config_t & config, bool init);
// Event loop
void loop();

View File

@@ -48,14 +48,12 @@ void blockstore_init_meta::handle_event(ring_data_t *data, int buf_num)
int blockstore_init_meta::loop()
{
if (wait_state == 1)
goto resume_1;
else if (wait_state == 2)
goto resume_2;
else if (wait_state == 3)
goto resume_3;
else if (wait_state == 4)
goto resume_4;
if (wait_state == 1) goto resume_1;
else if (wait_state == 2) goto resume_2;
else if (wait_state == 3) goto resume_3;
else if (wait_state == 4) goto resume_4;
else if (wait_state == 5) goto resume_5;
else if (wait_state == 6) goto resume_6;
printf("Reading blockstore metadata\n");
if (bs->inmemory_meta)
metadata_buffer = bs->metadata_buffer;
@@ -140,6 +138,7 @@ resume_1:
// Skip superblock
md_offset = bs->dsk.meta_block_size;
next_offset = md_offset;
entries_per_block = bs->dsk.meta_block_size / bs->dsk.clean_entry_size;
// Read the rest of the metadata
resume_2:
if (next_offset < bs->dsk.meta_len && submitted == 0)
@@ -179,17 +178,15 @@ resume_2:
if (bufs[i].state == INIT_META_READ_DONE)
{
// Handle result
unsigned entries_per_block = bs->dsk.meta_block_size / bs->dsk.clean_entry_size;
bool changed = false;
for (uint64_t sector = 0; sector < bufs[i].size; sector += bs->dsk.meta_block_size)
{
// handle <count> entries
changed = changed || handle_entries(
bufs[i].buf + sector, entries_per_block,
((bufs[i].offset + sector - md_offset) / bs->dsk.meta_block_size) * entries_per_block
);
if (handle_meta_block(bufs[i].buf + sector, entries_per_block,
((bufs[i].offset + sector - md_offset) / bs->dsk.meta_block_size) * entries_per_block))
changed = true;
}
if (changed && !bs->inmemory_meta)
if (changed && !bs->inmemory_meta && !bs->readonly)
{
// write the modified buffer back
GET_SQE();
@@ -211,6 +208,43 @@ resume_2:
wait_state = 2;
return 1;
}
if (entries_to_zero.size() && !bs->inmemory_meta && !bs->readonly)
{
// we have to zero out additional entries
for (i = 0; i < entries_to_zero.size(); )
{
next_offset = entries_to_zero[i]/entries_per_block;
for (j = i; j < entries_to_zero.size() && entries_to_zero[j]/entries_per_block == next_offset; j++) {}
GET_SQE();
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
submitted++;
resume_5:
if (submitted > 0)
{
wait_state = 5;
return 1;
}
for (; i < j; i++)
{
uint64_t pos = (entries_to_zero[i] % entries_per_block);
memset((uint8_t*)metadata_buffer + pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
}
GET_SQE();
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
submitted++;
resume_6:
if (submitted > 0)
{
wait_state = 6;
return 1;
}
}
entries_to_zero.clear();
}
// metadata read finished
printf("Metadata entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->dsk.block_count);
if (!bs->inmemory_meta)
@@ -236,10 +270,13 @@ resume_2:
return 0;
}
bool blockstore_init_meta::handle_entries(uint8_t *buf, uint64_t count, uint64_t done_cnt)
bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_block, uint64_t done_cnt)
{
bool updated = false;
for (uint64_t i = 0; i < count; i++)
uint64_t max_i = entries_per_block;
if (max_i > bs->dsk.block_count-done_cnt)
max_i = bs->dsk.block_count-done_cnt;
for (uint64_t i = 0; i < max_i; i++)
{
clean_disk_entry *entry = (clean_disk_entry*)(buf + i*bs->dsk.clean_entry_size);
if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
@@ -255,17 +292,35 @@ bool blockstore_init_meta::handle_entries(uint8_t *buf, uint64_t count, uint64_t
if (clean_it != clean_db.end())
{
// free the previous block
// here we have to zero out the entry because otherwise we'll hit
// here we have to zero out the previous entry because otherwise we'll hit
// "tried to overwrite non-zero metadata entry" later
updated = true;
memset(entry, 0, bs->dsk.clean_entry_size);
uint64_t old_clean_loc = clean_it->second.location >> bs->dsk.block_order;
if (bs->inmemory_meta)
{
uint64_t sector = (old_clean_loc / entries_per_block) * bs->dsk.meta_block_size;
uint64_t pos = (old_clean_loc % entries_per_block);
clean_disk_entry *old_entry = (clean_disk_entry*)((uint8_t*)bs->metadata_buffer + sector + pos*bs->dsk.clean_entry_size);
memset(old_entry, 0, bs->dsk.clean_entry_size);
}
else if (old_clean_loc >= done_cnt)
{
updated = true;
uint64_t sector = ((old_clean_loc - done_cnt) / entries_per_block) * bs->dsk.meta_block_size;
uint64_t pos = (old_clean_loc % entries_per_block);
clean_disk_entry *old_entry = (clean_disk_entry*)(buf + sector + pos*bs->dsk.clean_entry_size);
memset(old_entry, 0, bs->dsk.clean_entry_size);
}
else
{
entries_to_zero.push_back(clean_it->second.location >> bs->dsk.block_order);
}
#ifdef BLOCKSTORE_DEBUG
printf("Free block %lu from %lx:%lx v%lu (new location is %lu)\n",
clean_it->second.location >> bs->dsk.block_order,
old_clean_loc,
clean_it->first.inode, clean_it->first.stripe, clean_it->second.version,
done_cnt+i);
#endif
bs->data_alloc->set(clean_it->second.location >> bs->dsk.block_order, false);
bs->data_alloc->set(old_clean_loc, false);
}
else
{

View File

@@ -24,7 +24,10 @@ class blockstore_init_meta
uint64_t md_offset = 0;
uint64_t next_offset = 0;
uint64_t entries_loaded = 0;
bool handle_entries(uint8_t *buf, uint64_t count, uint64_t done_cnt);
unsigned entries_per_block = 0;
int i = 0, j = 0;
std::vector<uint64_t> entries_to_zero;
bool handle_meta_block(uint8_t *buf, uint64_t count, uint64_t done_cnt);
void handle_event(ring_data_t *data, int buf_num);
public:
blockstore_init_meta(blockstore_impl_t *bs);

View File

@@ -198,10 +198,7 @@ void blockstore_impl_t::handle_journal_write(ring_data_t *data, uint64_t flush_i
if (data->res != data->iov.iov_len)
{
// FIXME: our state becomes corrupted after a write error. maybe do something better than just die
throw std::runtime_error(
"journal write failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
);
disk_error_abort("journal write", data->res, data->iov.iov_len);
}
auto fl_it = journal.flushing_ops.upper_bound((pending_journaling_t){ .flush_id = flush_id });
if (fl_it != journal.flushing_ops.end() && fl_it->flush_id == flush_id)
@@ -239,14 +236,6 @@ journal_t::~journal_t()
uint64_t journal_t::get_trim_pos()
{
auto journal_used_it = used_sectors.lower_bound(used_start);
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->first,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
);
#endif
if (journal_used_it == used_sectors.end())
{
// Journal is cleared to its end, restart from the beginning
@@ -259,12 +248,26 @@ uint64_t journal_t::get_trim_pos()
else
{
// next_free does not need updating during trim
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it->first, journal_used_it->second
);
#endif
return journal_used_it->first;
}
}
else if (journal_used_it->first > used_start)
{
// Journal is cleared up to <journal_used_it>
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it->first, journal_used_it->second
);
#endif
return journal_used_it->first;
}
// Can't trim journal

Some files were not shown because too many files have changed in this diff Show More