Compare commits

..

51 Commits

Author SHA1 Message Date
249a233b37 WIP another experiment - "smart" iothreads
Some checks failed
Test / test_snapshot_chain_ec (push) Successful in 2m55s
Test / test_rebalance_verify_imm (push) Successful in 3m55s
Test / test_root_node (push) Successful in 11s
Test / test_rebalance_verify (push) Successful in 4m32s
Test / test_switch_primary (push) Successful in 37s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m53s
Test / test_write (push) Successful in 46s
Test / test_write_no_same (push) Successful in 17s
Test / test_write_xor (push) Successful in 1m55s
Test / test_rebalance_verify_ec (push) Successful in 5m59s
Test / test_heal_pg_size_2 (push) Successful in 3m58s
Test / test_heal_ec (push) Successful in 3m51s
Test / test_heal_csum_32k_dmj (push) Successful in 5m47s
Test / test_heal_csum_32k_dj (push) Successful in 6m10s
Test / test_heal_csum_4k_dmj (push) Successful in 6m50s
Test / test_osd_tags (push) Successful in 19s
Test / test_enospc (push) Successful in 1m11s
Test / test_heal_csum_4k_dj (push) Successful in 6m1s
Test / test_enospc_xor (push) Successful in 1m11s
Test / test_heal_csum_32k (push) Failing after 10m14s
Test / test_enospc_imm (push) Successful in 46s
Test / test_enospc_imm_xor (push) Successful in 1m3s
Test / test_scrub (push) Successful in 28s
Test / test_scrub_zero_osd_2 (push) Successful in 28s
Test / test_scrub_xor (push) Successful in 34s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s
Test / test_scrub_pg_size_3 (push) Successful in 49s
Test / test_scrub_ec (push) Successful in 25s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 8m55s
2024-07-03 11:05:45 +03:00
d07e072212 Change bool wr to event mask in epoll_manager 2024-07-01 00:30:59 +03:00
21d1171ba4 Fix parsing after "slightly decopypasting" :)
Some checks failed
Test / test_rebalance_verify (push) Failing after 1m56s
Test / test_snapshot_chain_ec (push) Failing after 6m25s
Test / test_root_node (push) Successful in 17s
Test / test_rebalance_verify_imm (push) Successful in 5m47s
Test / test_switch_primary (push) Successful in 39s
Test / test_write (push) Successful in 43s
Test / test_write_no_same (push) Successful in 18s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m16s
Test / test_write_xor (push) Successful in 1m19s
Test / test_rebalance_verify_ec (push) Successful in 9m23s
Test / test_heal_pg_size_2 (push) Successful in 3m51s
Test / test_heal_ec (push) Successful in 4m57s
Test / test_heal_csum_32k_dmj (push) Successful in 4m44s
Test / test_heal_csum_32k_dj (push) Successful in 5m59s
Test / test_heal_csum_4k (push) Successful in 3m32s
Test / test_osd_tags (push) Successful in 12s
Test / test_enospc (push) Successful in 37s
Test / test_heal_csum_32k (push) Failing after 10m18s
Test / test_heal_csum_4k_dmj (push) Successful in 9m38s
Test / test_enospc_imm (push) Successful in 58s
Test / test_enospc_xor (push) Successful in 1m10s
Test / test_enospc_imm_xor (push) Successful in 57s
Test / test_scrub (push) Successful in 27s
Test / test_heal_csum_4k_dj (push) Failing after 10m14s
Test / test_scrub_zero_osd_2 (push) Successful in 23s
Test / test_scrub_xor (push) Successful in 35s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 36s
Test / test_scrub_pg_size_3 (push) Successful in 44s
Test / test_nfs (push) Successful in 14s
2024-06-29 00:09:30 +03:00
ace
8f83086889 Nova and cinder driver patches for OpenStack 2023.2 2024-06-28 00:04:57 +03:00
ceb18f25db Add libvirt 10.0 patch (same as 9.10 and 10.4 actually) 2024-06-28 00:03:46 +03:00
ed51a89f70 Add QEMU 8.2 and 9.0 patches 2024-06-27 12:33:16 +03:00
f59456f22d Add libvirt 10.4 patch (same as 9.10 actually) 2024-06-27 01:35:29 +03:00
ca63cd507d Fix possible infinite loop in flusher (surprisingly reproduced in test_write.sh with iothreads)
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 3m5s
Test / test_rebalance_verify_imm (push) Successful in 3m41s
Test / test_root_node (push) Successful in 11s
Test / test_rebalance_verify (push) Successful in 4m22s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 40s
Test / test_write_no_same (push) Successful in 20s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m5s
Test / test_write_xor (push) Successful in 1m13s
Test / test_rebalance_verify_ec (push) Successful in 5m21s
Test / test_heal_pg_size_2 (push) Successful in 4m15s
Test / test_heal_ec (push) Successful in 4m58s
Test / test_heal_csum_32k_dmj (push) Successful in 5m30s
Test / test_heal_csum_32k_dj (push) Successful in 6m12s
Test / test_heal_csum_4k (push) Successful in 5m32s
Test / test_osd_tags (push) Successful in 12s
Test / test_enospc (push) Successful in 41s
Test / test_heal_csum_4k_dj (push) Successful in 8m33s
Test / test_enospc_xor (push) Successful in 53s
Test / test_enospc_imm (push) Successful in 43s
Test / test_scrub (push) Successful in 27s
Test / test_enospc_imm_xor (push) Successful in 52s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub_xor (push) Successful in 36s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 41s
Test / test_scrub_ec (push) Successful in 25s
Test / test_scrub_pg_size_3 (push) Successful in 49s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_32k (push) Successful in 5m9s
Test / test_heal_csum_4k_dmj (push) Successful in 5m7s
2024-06-27 00:38:01 +03:00
ea0d72289c Treat copied buffers as written only after completing the write in client
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 3m18s
Test / test_rebalance_verify_imm (push) Successful in 3m22s
Test / test_root_node (push) Successful in 57s
Test / test_rebalance_verify (push) Successful in 5m10s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 55s
Test / test_write_xor (push) Successful in 53s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m40s
Test / test_write_no_same (push) Successful in 15s
Test / test_rebalance_verify_ec (push) Successful in 6m34s
Test / test_heal_pg_size_2 (push) Successful in 4m32s
Test / test_heal_csum_32k_dmj (push) Successful in 4m24s
Test / test_heal_ec (push) Successful in 4m54s
Test / test_heal_csum_32k_dj (push) Successful in 5m58s
Test / test_heal_csum_32k (push) Successful in 9m43s
Test / test_heal_csum_4k_dmj (push) Successful in 9m41s
Test / test_osd_tags (push) Successful in 13s
Test / test_heal_csum_4k_dj (push) Successful in 10m0s
Test / test_enospc (push) Successful in 44s
Test / test_enospc_xor (push) Successful in 55s
Test / test_enospc_imm (push) Successful in 44s
Test / test_scrub (push) Successful in 31s
Test / test_enospc_imm_xor (push) Successful in 57s
Test / test_scrub_zero_osd_2 (push) Successful in 23s
Test / test_scrub_xor (push) Successful in 27s
Test / test_scrub_pg_size_3 (push) Successful in 46s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_scrub_ec (push) Successful in 25s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 9m27s
SYNC operation fsyncs only completed operations, so treating writes as "eligible
for fsync" before actually completing them is incorrect

It affected SCHEME=ec test_heal.sh (with immediate_commit=none) test - it was
flapping with lost writes - some non-fsynced writes were legitimately lost by
the OSD, but weren't repeated by the client
2024-06-20 02:11:53 +03:00
e400a851f4 Repeat dirty buffer flushes on any PG primary change because the new primary may not know about unfinished operations of the old primary
Some checks reported warnings
Test / test_rebalance_verify_ec (push) Has been cancelled
Test / test_rebalance_verify_ec_imm (push) Has been cancelled
Test / test_root_node (push) Has been cancelled
Test / test_switch_primary (push) Has been cancelled
Test / test_write (push) Has been cancelled
Test / test_write_xor (push) Has been cancelled
Test / test_write_no_same (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
Test / test_heal_csum_32k_dmj (push) Has been cancelled
Test / test_heal_csum_32k_dj (push) Has been cancelled
Test / test_heal_csum_32k (push) Has been cancelled
Test / test_heal_csum_4k_dmj (push) Has been cancelled
Test / test_minsize_1 (push) Has been cancelled
Test / test_heal_csum_4k_dj (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
Test / test_osd_tags (push) Has been cancelled
Test / test_enospc (push) Has been cancelled
Test / test_enospc_xor (push) Has been cancelled
Test / test_enospc_imm (push) Has been cancelled
Test / test_enospc_imm_xor (push) Has been cancelled
Test / test_scrub (push) Has been cancelled
Test / test_scrub_zero_osd_2 (push) Has been cancelled
Test / test_scrub_xor (push) Has been cancelled
Test / test_scrub_pg_size_3 (push) Has been cancelled
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled
Test / test_scrub_ec (push) Has been cancelled
Test / test_nfs (push) Has been cancelled
Test / test_interrupted_rebalance_ec_imm (push) Has been cancelled
Test / test_snapshot (push) Has been cancelled
2024-06-19 00:28:26 +03:00
0fec7a9fea Drop dirty peer connections also when stopping PG to guarantee that clients do not miss fsync 2024-06-19 00:28:26 +03:00
b9de2a92a9 Print OSD performance stats
Some checks failed
Test / test_snapshot_chain_ec (push) Successful in 3m9s
Test / test_rebalance_verify_imm (push) Successful in 3m47s
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify (push) Successful in 4m35s
Test / test_switch_primary (push) Successful in 35s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m55s
Test / test_write (push) Successful in 55s
Test / test_write_no_same (push) Successful in 21s
Test / test_write_xor (push) Successful in 1m19s
Test / test_rebalance_verify_ec (push) Successful in 6m10s
Test / test_heal_pg_size_2 (push) Successful in 3m51s
Test / test_heal_ec (push) Successful in 5m10s
Test / test_heal_csum_32k_dmj (push) Successful in 4m46s
Test / test_heal_csum_32k_dj (push) Successful in 6m25s
Test / test_heal_csum_32k (push) Successful in 6m13s
Test / test_osd_tags (push) Successful in 41s
Test / test_heal_csum_4k_dj (push) Successful in 6m26s
Test / test_heal_csum_4k_dmj (push) Successful in 6m28s
Test / test_enospc (push) Successful in 1m53s
Test / test_enospc_imm (push) Successful in 51s
Test / test_enospc_xor (push) Successful in 1m37s
Test / test_scrub (push) Successful in 1m11s
Test / test_scrub_zero_osd_2 (push) Successful in 34s
Test / test_enospc_imm_xor (push) Successful in 1m29s
Test / test_heal_csum_4k (push) Successful in 5m46s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 43s
Test / test_scrub_pg_size_3 (push) Successful in 53s
Test / test_scrub_ec (push) Successful in 21s
Test / test_nfs (push) Successful in 15s
Test / test_scrub_xor (push) Failing after 3m7s
2024-06-17 13:02:58 +03:00
5360a70853 Make OSD also report derived stats 2024-06-17 13:02:52 +03:00
4c2328eb13 Implement ls-osd command
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m45s
Test / test_rebalance_verify_imm (push) Successful in 2m27s
Test / test_root_node (push) Successful in 11s
Test / test_rebalance_verify (push) Successful in 3m1s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 41s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 1m2s
Test / test_rebalance_verify_ec (push) Successful in 3m53s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m40s
Test / test_heal_pg_size_2 (push) Successful in 3m46s
Test / test_heal_ec (push) Successful in 3m49s
Test / test_heal_csum_32k_dmj (push) Successful in 5m41s
Test / test_heal_csum_32k_dj (push) Successful in 6m12s
Test / test_heal_csum_32k (push) Successful in 6m57s
Test / test_heal_csum_4k_dmj (push) Successful in 6m42s
Test / test_osd_tags (push) Successful in 27s
Test / test_enospc (push) Successful in 1m59s
Test / test_heal_csum_4k_dj (push) Successful in 6m33s
Test / test_enospc_xor (push) Successful in 2m24s
Test / test_enospc_imm (push) Successful in 1m32s
Test / test_scrub (push) Successful in 1m1s
Test / test_heal_csum_4k (push) Successful in 6m38s
Test / test_enospc_imm_xor (push) Successful in 1m11s
Test / test_scrub_zero_osd_2 (push) Successful in 33s
Test / test_scrub_xor (push) Successful in 32s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s
Test / test_nfs (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 48s
Test / test_scrub_ec (push) Successful in 21s
2024-06-17 02:22:14 +03:00
313daef12d Slightly decopypaste etcd key parsing 2024-06-17 01:38:42 +03:00
ad9c12e1b9 Fix Pseudo-FS initialization leading to ENOENTs some time after start
Some checks failed
Test / test_snapshot_chain_ec (push) Successful in 2m44s
Test / test_rebalance_verify_imm (push) Successful in 3m57s
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify (push) Successful in 5m9s
Test / test_switch_primary (push) Successful in 40s
Test / test_write (push) Successful in 1m2s
Test / test_write_no_same (push) Successful in 14s
Test / test_write_xor (push) Successful in 1m23s
Test / test_rebalance_verify_ec (push) Successful in 6m33s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m57s
Test / test_heal_pg_size_2 (push) Successful in 3m54s
Test / test_heal_csum_32k_dmj (push) Successful in 5m10s
Test / test_heal_csum_32k_dj (push) Successful in 5m9s
Test / test_heal_csum_32k (push) Successful in 5m43s
Test / test_osd_tags (push) Successful in 1m3s
Test / test_heal_csum_4k_dmj (push) Successful in 5m38s
Test / test_heal_csum_4k_dj (push) Successful in 5m38s
Test / test_enospc (push) Successful in 1m37s
Test / test_enospc_imm (push) Successful in 1m23s
Test / test_enospc_xor (push) Successful in 2m0s
Test / test_scrub (push) Successful in 45s
Test / test_enospc_imm_xor (push) Successful in 1m32s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_zero_osd_2 (push) Successful in 41s
Test / test_heal_csum_4k (push) Successful in 5m29s
Test / test_nfs (push) Successful in 21s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 33s
Test / test_scrub_ec (push) Successful in 31s
Test / test_scrub_pg_size_3 (push) Successful in 56s
Test / test_heal_ec (push) Failing after 2m54s
2024-06-16 23:43:09 +03:00
4473eb5512 Fix slow & failing CAS layer merge
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m56s
Test / test_rebalance_verify_imm (push) Successful in 2m44s
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify (push) Successful in 3m24s
Test / test_switch_primary (push) Successful in 34s
Test / test_rebalance_verify_ec (push) Successful in 3m3s
Test / test_write_xor (push) Successful in 44s
Test / test_write_no_same (push) Successful in 15s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m30s
Test / test_heal_pg_size_2 (push) Successful in 4m37s
Test / test_heal_csum_32k_dmj (push) Successful in 4m30s
Test / test_heal_ec (push) Successful in 4m45s
Test / test_heal_csum_32k_dj (push) Successful in 6m8s
Test / test_heal_csum_4k_dmj (push) Successful in 6m39s
Test / test_heal_csum_32k (push) Successful in 6m42s
Test / test_heal_csum_4k_dj (push) Successful in 6m30s
Test / test_osd_tags (push) Successful in 18s
Test / test_enospc (push) Successful in 1m21s
Test / test_enospc_imm (push) Successful in 1m13s
Test / test_enospc_xor (push) Successful in 2m2s
Test / test_scrub (push) Successful in 1m5s
Test / test_enospc_imm_xor (push) Successful in 1m42s
Test / test_scrub_zero_osd_2 (push) Successful in 1m1s
Test / test_heal_csum_4k (push) Successful in 6m18s
Test / test_scrub_xor (push) Successful in 31s
Test / test_nfs (push) Successful in 24s
Test / test_scrub_ec (push) Successful in 34s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_scrub_pg_size_3 (push) Successful in 43s
Test / test_write (push) Successful in 40s
2024-06-14 02:15:49 +03:00
6501abc060 Set default etcd_ws_keepalive_interval to 5
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m52s
Test / test_rebalance_verify_imm (push) Successful in 3m4s
Test / test_root_node (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 3m46s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 44s
Test / test_write_no_same (push) Successful in 20s
Test / test_write_xor (push) Successful in 1m7s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m8s
Test / test_rebalance_verify_ec (push) Successful in 5m25s
Test / test_heal_pg_size_2 (push) Successful in 3m27s
Test / test_heal_csum_32k_dmj (push) Successful in 5m42s
Test / test_heal_csum_32k_dj (push) Successful in 6m28s
Test / test_heal_csum_32k (push) Successful in 6m40s
Test / test_osd_tags (push) Successful in 25s
Test / test_heal_csum_4k_dmj (push) Successful in 7m16s
Test / test_enospc (push) Successful in 2m12s
Test / test_heal_csum_4k_dj (push) Successful in 6m26s
Test / test_enospc_imm (push) Successful in 1m41s
Test / test_enospc_xor (push) Successful in 2m21s
Test / test_heal_csum_4k (push) Successful in 6m19s
Test / test_enospc_imm_xor (push) Successful in 1m32s
Test / test_scrub (push) Successful in 49s
Test / test_scrub_zero_osd_2 (push) Successful in 34s
Test / test_scrub_xor (push) Successful in 31s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_nfs (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 46s
Test / test_scrub_ec (push) Successful in 27s
Test / test_heal_ec (push) Successful in 2m57s
2024-06-08 00:38:48 +03:00
1228403e74 Implement internal restart / run_forever in monitor
Some checks failed
Test / test_rebalance_verify_imm (push) Successful in 1m55s
Test / test_rebalance_verify (push) Successful in 2m42s
Test / test_root_node (push) Successful in 1m19s
Test / test_switch_primary (push) Successful in 33s
Test / test_rebalance_verify_ec (push) Successful in 3m21s
Test / test_etcd_fail (push) Failing after 10m8s
Test / test_write (push) Successful in 53s
Test / test_write_no_same (push) Successful in 16s
Test / test_write_xor (push) Successful in 1m0s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m59s
Test / test_heal_pg_size_2 (push) Successful in 4m37s
Test / test_heal_csum_32k_dmj (push) Failing after 4m45s
Test / test_heal_ec (push) Successful in 5m48s
Test / test_heal_csum_32k_dj (push) Successful in 6m12s
Test / test_heal_csum_32k (push) Successful in 6m30s
Test / test_heal_csum_4k_dmj (push) Successful in 6m16s
Test / test_osd_tags (push) Successful in 34s
Test / test_heal_csum_4k_dj (push) Successful in 6m50s
Test / test_enospc (push) Successful in 1m34s
Test / test_enospc_imm (push) Successful in 1m4s
Test / test_enospc_xor (push) Successful in 2m6s
Test / test_heal_csum_4k (push) Successful in 6m47s
Test / test_enospc_imm_xor (push) Successful in 1m26s
Test / test_scrub (push) Successful in 37s
Test / test_scrub_zero_osd_2 (push) Successful in 35s
Test / test_scrub_xor (push) Successful in 28s
Test / test_nfs (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 31s
Test / test_scrub_pg_size_3 (push) Successful in 45s
Test / test_scrub_ec (push) Successful in 29s
2024-06-08 00:35:18 +03:00
4eabebd245 Put all configuration to Mon.config
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m50s
Test / test_rebalance_verify_imm (push) Successful in 2m48s
Test / test_rebalance_verify (push) Successful in 3m18s
Test / test_root_node (push) Successful in 11s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 39s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 1m2s
Test / test_rebalance_verify_ec (push) Successful in 4m7s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m49s
Test / test_heal_pg_size_2 (push) Successful in 3m24s
Test / test_heal_ec (push) Successful in 5m29s
Test / test_heal_csum_32k_dmj (push) Successful in 6m5s
Test / test_heal_csum_32k_dj (push) Successful in 5m52s
Test / test_heal_csum_32k (push) Successful in 6m44s
Test / test_osd_tags (push) Successful in 21s
Test / test_enospc (push) Successful in 2m16s
Test / test_heal_csum_4k_dmj (push) Successful in 6m44s
Test / test_heal_csum_4k (push) Successful in 6m31s
Test / test_heal_csum_4k_dj (push) Successful in 6m45s
Test / test_enospc_imm (push) Successful in 1m21s
Test / test_enospc_xor (push) Successful in 1m28s
Test / test_scrub_zero_osd_2 (push) Successful in 35s
Test / test_scrub (push) Successful in 39s
Test / test_scrub_xor (push) Successful in 36s
Test / test_enospc_imm_xor (push) Successful in 49s
Test / test_nfs (push) Successful in 24s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s
Test / test_scrub_ec (push) Successful in 33s
Test / test_scrub_pg_size_3 (push) Successful in 41s
2024-06-07 00:20:38 +03:00
cf60b6818c Extract PG generation into pg_gen.js
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m56s
Test / test_rebalance_verify_imm (push) Successful in 3m2s
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify (push) Successful in 3m35s
Test / test_switch_primary (push) Successful in 33s
Test / test_write (push) Successful in 40s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 1m3s
Test / test_rebalance_verify_ec (push) Successful in 4m0s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m39s
Test / test_heal_pg_size_2 (push) Successful in 3m29s
Test / test_heal_csum_32k_dmj (push) Successful in 5m22s
Test / test_heal_ec (push) Successful in 6m27s
Test / test_heal_csum_32k_dj (push) Successful in 5m40s
Test / test_heal_csum_32k (push) Successful in 6m50s
Test / test_osd_tags (push) Successful in 25s
Test / test_heal_csum_4k_dmj (push) Successful in 6m28s
Test / test_enospc (push) Successful in 2m13s
Test / test_heal_csum_4k_dj (push) Successful in 6m10s
Test / test_heal_csum_4k (push) Successful in 6m12s
Test / test_scrub (push) Successful in 44s
Test / test_enospc_imm (push) Successful in 1m1s
Test / test_enospc_xor (push) Successful in 1m24s
Test / test_enospc_imm_xor (push) Successful in 1m5s
Test / test_scrub_zero_osd_2 (push) Successful in 27s
Test / test_scrub_xor (push) Successful in 28s
Test / test_nfs (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_pg_size_3 (push) Successful in 48s
Test / test_scrub_ec (push) Successful in 29s
2024-06-05 11:22:06 +03:00
1a4a7cdc37 Extract OSD Tree generation functions to osd_tree.js 2024-06-05 11:19:35 +03:00
1b48085e21 Extract remote etcd interaction to etcd_adapter.js 2024-06-05 11:19:35 +03:00
a71847244e Rename PGUtil.js to pg_utils.js 2024-06-05 10:51:20 +03:00
848c2d2722 Move LPOptimizer, DSL and tests to lp_optimizer/ 2024-06-05 10:51:20 +03:00
86832dc43f Add eslint import/no-unresolved 2024-06-05 10:51:20 +03:00
1f6da79463 Extract stats calculation into a separate file 2024-06-05 10:51:20 +03:00
9bf57c3760 Mention generic Toshiba MG instead of specific MGxx, fix russian vitastorfs link 2024-06-05 02:08:09 +03:00
a0305b5b4a Extract pool configuration validation into a separate file 2024-06-05 02:08:08 +03:00
1546f8e447 Extract etcd data "schema" into a separate file 2024-06-05 02:07:53 +03:00
8ce962b312 Move scripts 2024-06-05 02:07:53 +03:00
50e56b3b92 Add vitastor_c_inode_get_immediate_commit
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m48s
Test / test_rebalance_verify_imm (push) Successful in 2m58s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 3m33s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 40s
Test / test_write_no_same (push) Successful in 18s
Test / test_write_xor (push) Successful in 1m3s
Test / test_rebalance_verify_ec (push) Successful in 4m12s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m41s
Test / test_heal_pg_size_2 (push) Successful in 3m30s
Test / test_heal_ec (push) Successful in 4m46s
Test / test_heal_csum_32k_dmj (push) Successful in 5m6s
Test / test_heal_csum_32k_dj (push) Successful in 6m33s
Test / test_heal_csum_32k (push) Successful in 6m38s
Test / test_osd_tags (push) Successful in 33s
Test / test_heal_csum_4k_dmj (push) Successful in 6m59s
Test / test_enospc (push) Successful in 2m16s
Test / test_heal_csum_4k_dj (push) Successful in 6m48s
Test / test_enospc_imm (push) Successful in 1m42s
Test / test_enospc_xor (push) Successful in 2m26s
Test / test_enospc_imm_xor (push) Successful in 2m11s
Test / test_heal_csum_4k (push) Successful in 6m15s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub (push) Successful in 37s
Test / test_scrub_xor (push) Successful in 36s
Test / test_scrub_ec (push) Successful in 38s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_scrub_pg_size_3 (push) Successful in 47s
Test / test_nfs (push) Successful in 15s
2024-05-19 01:57:18 +03:00
a12d328793 Rename cli/ to cmd/, fix cmake install
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m39s
Test / test_rebalance_verify_imm (push) Successful in 3m2s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 3m39s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 41s
Test / test_write_no_same (push) Successful in 18s
Test / test_write_xor (push) Successful in 1m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m44s
Test / test_rebalance_verify_ec (push) Successful in 4m17s
Test / test_heal_pg_size_2 (push) Successful in 3m30s
Test / test_heal_ec (push) Successful in 5m4s
Test / test_heal_csum_32k_dmj (push) Successful in 5m53s
Test / test_heal_csum_32k_dj (push) Successful in 5m46s
Test / test_heal_csum_32k (push) Successful in 6m31s
Test / test_osd_tags (push) Successful in 25s
Test / test_heal_csum_4k_dmj (push) Successful in 7m7s
Test / test_enospc (push) Successful in 2m18s
Test / test_heal_csum_4k_dj (push) Successful in 5m57s
Test / test_heal_csum_4k (push) Successful in 6m27s
Test / test_enospc_imm (push) Successful in 1m6s
Test / test_enospc_xor (push) Successful in 1m18s
Test / test_scrub (push) Successful in 30s
Test / test_enospc_imm_xor (push) Successful in 1m5s
Test / test_scrub_zero_osd_2 (push) Successful in 25s
Test / test_scrub_xor (push) Successful in 31s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 26s
Test / test_nfs (push) Successful in 14s
Test / test_scrub_pg_size_3 (push) Successful in 46s
2024-05-15 23:04:50 +03:00
c79b38bd26 Move all sources to subdirs
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m50s
Test / test_rebalance_verify_imm (push) Successful in 3m17s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 4m6s
Test / test_switch_primary (push) Successful in 33s
Test / test_write (push) Successful in 53s
Test / test_write_no_same (push) Successful in 12s
Test / test_write_xor (push) Successful in 51s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m26s
Test / test_rebalance_verify_ec (push) Successful in 4m24s
Test / test_heal_pg_size_2 (push) Successful in 5m11s
Test / test_heal_ec (push) Successful in 5m8s
Test / test_heal_csum_32k_dmj (push) Successful in 5m1s
Test / test_heal_csum_32k_dj (push) Successful in 6m0s
Test / test_heal_csum_4k_dmj (push) Successful in 7m3s
Test / test_heal_csum_32k (push) Successful in 7m6s
Test / test_heal_csum_4k_dj (push) Successful in 6m53s
Test / test_osd_tags (push) Successful in 28s
Test / test_enospc (push) Successful in 1m10s
Test / test_enospc_imm (push) Successful in 57s
Test / test_enospc_xor (push) Successful in 1m24s
Test / test_heal_csum_4k (push) Successful in 6m58s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub (push) Successful in 33s
Test / test_scrub_xor (push) Successful in 26s
Test / test_enospc_imm_xor (push) Successful in 51s
Test / test_nfs (push) Successful in 24s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Test / test_scrub_ec (push) Successful in 32s
Test / test_scrub_pg_size_3 (push) Successful in 39s
2024-05-15 11:06:01 +03:00
44692d148a Make vitastor_kv.h header public
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m47s
Test / test_rebalance_verify_imm (push) Successful in 2m42s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 3m21s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 46s
Test / test_write_no_same (push) Successful in 17s
Test / test_write_xor (push) Successful in 49s
Test / test_rebalance_verify_ec (push) Successful in 4m51s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m31s
Test / test_heal_pg_size_2 (push) Successful in 3m36s
Test / test_heal_ec (push) Successful in 3m37s
Test / test_heal_csum_32k_dmj (push) Successful in 6m7s
Test / test_heal_csum_32k_dj (push) Successful in 6m12s
Test / test_heal_csum_32k (push) Successful in 7m20s
Test / test_heal_csum_4k_dmj (push) Successful in 7m7s
Test / test_osd_tags (push) Successful in 19s
Test / test_enospc (push) Successful in 1m27s
Test / test_enospc_xor (push) Successful in 2m24s
Test / test_heal_csum_4k_dj (push) Successful in 5m42s
Test / test_enospc_imm (push) Successful in 1m39s
Test / test_heal_csum_4k (push) Successful in 6m0s
Test / test_scrub_zero_osd_2 (push) Successful in 47s
Test / test_scrub (push) Successful in 50s
Test / test_enospc_imm_xor (push) Successful in 1m15s
Test / test_scrub_xor (push) Successful in 25s
Test / test_nfs (push) Successful in 23s
Test / test_scrub_ec (push) Successful in 32s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s
Test / test_scrub_pg_size_3 (push) Successful in 41s
2024-05-15 01:49:38 +03:00
ba52359611 Fix last master commit 2024-05-15 01:49:31 +03:00
23a9aa93b5 Fix pool create/modify --block_size validation
All checks were successful
Test / test_splitbrain (push) Has been skipped
Test / test_rebalance_verify (push) Has been skipped
Test / test_rebalance_verify_imm (push) Has been skipped
Test / test_rebalance_verify_ec (push) Has been skipped
Test / test_rebalance_verify_ec_imm (push) Has been skipped
Test / test_root_node (push) Has been skipped
Test / test_switch_primary (push) Has been skipped
Test / test_write (push) Has been skipped
Test / test_write_xor (push) Has been skipped
Test / test_write_no_same (push) Has been skipped
Test / test_heal_pg_size_2 (push) Has been skipped
Test / test_heal_ec (push) Has been skipped
Test / test_heal_csum_32k_dmj (push) Has been skipped
Test / test_heal_csum_32k_dj (push) Has been skipped
Test / test_heal_csum_32k (push) Has been skipped
Test / test_heal_csum_4k_dmj (push) Has been skipped
Test / test_heal_csum_4k_dj (push) Has been skipped
Test / test_heal_csum_4k (push) Has been skipped
Test / test_osd_tags (push) Has been skipped
Test / test_enospc (push) Has been skipped
Test / test_enospc_xor (push) Has been skipped
Test / test_enospc_imm (push) Has been skipped
Test / test_enospc_imm_xor (push) Has been skipped
Test / test_scrub (push) Has been skipped
Test / test_scrub_zero_osd_2 (push) Has been skipped
Test / test_scrub_xor (push) Has been skipped
Test / test_scrub_pg_size_3 (push) Has been skipped
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been skipped
Test / test_scrub_ec (push) Has been skipped
Test / test_nfs (push) Has been skipped
2024-05-04 16:33:22 +03:00
2412d9e239 Fix TTL comparison for lease/keepalive
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 3m5s
Test / test_rebalance_verify_imm (push) Successful in 3m29s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 4m3s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 54s
Test / test_write_no_same (push) Successful in 13s
Test / test_write_xor (push) Successful in 54s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m58s
Test / test_rebalance_verify_ec (push) Successful in 4m58s
Test / test_heal_pg_size_2 (push) Successful in 4m6s
Test / test_heal_ec (push) Successful in 4m15s
Test / test_heal_csum_32k_dmj (push) Successful in 5m52s
Test / test_heal_csum_32k_dj (push) Successful in 5m59s
Test / test_heal_csum_32k (push) Successful in 7m7s
Test / test_heal_csum_4k_dmj (push) Successful in 6m57s
Test / test_osd_tags (push) Successful in 28s
Test / test_enospc (push) Successful in 1m58s
Test / test_heal_csum_4k_dj (push) Successful in 6m53s
Test / test_heal_csum_4k (push) Successful in 6m20s
Test / test_enospc_xor (push) Successful in 2m9s
Test / test_enospc_imm (push) Successful in 41s
Test / test_scrub_zero_osd_2 (push) Successful in 35s
Test / test_scrub (push) Successful in 38s
Test / test_scrub_xor (push) Successful in 34s
Test / test_enospc_imm_xor (push) Successful in 58s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 35s
Test / test_scrub_ec (push) Successful in 33s
Test / test_nfs (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 41s
2024-04-30 01:53:05 +03:00
9301c857b1 Release 1.6.1
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m59s
Test / test_rebalance_verify_imm (push) Successful in 3m16s
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify (push) Successful in 3m50s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 39s
Test / test_write_no_same (push) Successful in 13s
Test / test_write_xor (push) Successful in 1m20s
Test / test_rebalance_verify_ec (push) Successful in 4m20s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m54s
Test / test_heal_pg_size_2 (push) Successful in 3m25s
Test / test_heal_csum_32k_dmj (push) Successful in 5m52s
Test / test_heal_ec (push) Successful in 6m12s
Test / test_heal_csum_32k_dj (push) Successful in 5m40s
Test / test_heal_csum_32k (push) Successful in 6m21s
Test / test_osd_tags (push) Successful in 21s
Test / test_enospc (push) Successful in 2m25s
Test / test_heal_csum_4k_dmj (push) Successful in 6m5s
Test / test_heal_csum_4k_dj (push) Successful in 6m3s
Test / test_heal_csum_4k (push) Successful in 6m1s
Test / test_scrub (push) Successful in 43s
Test / test_enospc_imm (push) Successful in 47s
Test / test_enospc_xor (push) Successful in 1m38s
Test / test_enospc_imm_xor (push) Successful in 1m0s
Test / test_scrub_zero_osd_2 (push) Successful in 26s
Test / test_scrub_xor (push) Successful in 36s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 33s
Test / test_scrub_ec (push) Successful in 26s
Test / test_scrub_pg_size_3 (push) Successful in 47s
Test / test_nfs (push) Successful in 16s
A bunch of monitor fixes

- Add noout flag for OSDs (/vitastor/config/osd/xx)
- Fix "effective" size of degraded PGs (and thus "used space") calculation in monitor
- Fix monitor not clearing PGs of deleted pools
- Fix incorrect PG generation with hosts with 0 OSDs
- Fix monitor crashing during primary OSD recheck when pool has no PGs
- Fix monitor crashing when node_placement included non-existing OSDs
- Fix possible data movement after removing OSDs reweighted to 0
- Remove extra empty keys from pool configurations created by vitastor-cli create-pool
- Fix 32-bit build
2024-04-22 02:01:29 +03:00
3094358ec2 Fix autovivification leading to extra empty keys in pool-create
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m48s
Test / test_rebalance_verify_imm (push) Successful in 3m4s
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify (push) Successful in 3m44s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 39s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 1m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m36s
Test / test_rebalance_verify_ec (push) Successful in 4m21s
Test / test_heal_pg_size_2 (push) Successful in 3m33s
Test / test_heal_csum_32k_dmj (push) Successful in 5m41s
Test / test_heal_ec (push) Successful in 6m5s
Test / test_heal_csum_32k_dj (push) Successful in 5m29s
Test / test_heal_csum_32k (push) Successful in 6m11s
Test / test_osd_tags (push) Successful in 22s
Test / test_enospc (push) Successful in 2m30s
Test / test_heal_csum_4k (push) Successful in 6m9s
Test / test_heal_csum_4k_dj (push) Successful in 6m11s
Test / test_heal_csum_4k_dmj (push) Successful in 6m14s
Test / test_scrub (push) Successful in 42s
Test / test_enospc_imm (push) Successful in 47s
Test / test_enospc_xor (push) Successful in 1m4s
Test / test_enospc_imm_xor (push) Successful in 1m1s
Test / test_scrub_zero_osd_2 (push) Successful in 27s
Test / test_scrub_xor (push) Successful in 27s
Test / test_nfs (push) Successful in 20s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 49s
Test / test_scrub_ec (push) Successful in 31s
2024-04-20 02:04:09 +03:00
87f666d2a2 Filter out OSDs reweighted to 0 2024-04-20 02:03:53 +03:00
bd7fe4ef8f Filter out non-existing OSDs added in node_placement 2024-04-20 02:03:36 +03:00
1b3f9a1416 Do not set non-existing OSD weight to 0, we'll remove them instead 2024-04-20 02:03:11 +03:00
a7b7354f38 Do not recheck primary distribution when pool has no PGs 2024-04-20 02:02:47 +03:00
765befa22f Remove empty nodes from tree because PG DSL expects that all leaf nodes are OSDs 2024-04-20 02:02:28 +03:00
87b3ab94fe Do not disable require-atomic-updates and no-unused-vars 2024-04-20 02:02:13 +03:00
2c0801f6e4 Configure ESLint and add it to CI
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 3m0s
Test / test_rebalance_verify_imm (push) Successful in 3m20s
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify (push) Successful in 3m50s
Test / test_switch_primary (push) Successful in 40s
Test / test_write (push) Successful in 41s
Test / test_write_no_same (push) Successful in 18s
Test / test_write_xor (push) Successful in 1m5s
Test / test_rebalance_verify_ec (push) Successful in 4m38s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m17s
Test / test_heal_pg_size_2 (push) Successful in 3m25s
Test / test_heal_ec (push) Successful in 4m46s
Test / test_heal_csum_32k_dmj (push) Successful in 5m38s
Test / test_heal_csum_32k_dj (push) Successful in 6m16s
Test / test_heal_csum_32k (push) Successful in 6m45s
Test / test_osd_tags (push) Successful in 27s
Test / test_heal_csum_4k_dmj (push) Successful in 7m12s
Test / test_enospc (push) Successful in 2m6s
Test / test_heal_csum_4k_dj (push) Successful in 6m34s
Test / test_enospc_imm (push) Successful in 1m43s
Test / test_heal_csum_4k (push) Successful in 6m23s
Test / test_enospc_xor (push) Successful in 1m57s
Test / test_enospc_imm_xor (push) Successful in 1m0s
Test / test_scrub (push) Successful in 32s
Test / test_scrub_zero_osd_2 (push) Successful in 31s
Test / test_scrub_xor (push) Successful in 33s
Test / test_nfs (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 27s
Test / test_scrub_ec (push) Successful in 28s
Test / test_scrub_pg_size_3 (push) Successful in 57s
2024-04-16 02:39:31 +03:00
fd83fef1d9 Fix pool deletion
Some checks reported warnings
Test / test_snapshot_chain_ec (push) Successful in 3m1s
Test / test_rebalance_verify_imm (push) Successful in 3m11s
Test / test_root_node (push) Successful in 9s
Test / test_rebalance_verify (push) Successful in 3m53s
Test / test_switch_primary (push) Successful in 39s
Test / test_write (push) Successful in 39s
Test / test_write_no_same (push) Successful in 18s
Test / test_write_xor (push) Successful in 1m9s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m53s
Test / test_rebalance_verify_ec (push) Successful in 4m33s
Test / test_heal_pg_size_2 (push) Successful in 3m27s
Test / test_heal_csum_4k_dmj (push) Has been cancelled
Test / test_heal_csum_4k_dj (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
Test / test_osd_tags (push) Has been cancelled
Test / test_enospc (push) Has been cancelled
Test / test_enospc_xor (push) Has been cancelled
Test / test_enospc_imm (push) Has been cancelled
Test / test_enospc_imm_xor (push) Has been cancelled
Test / test_scrub (push) Has been cancelled
Test / test_scrub_zero_osd_2 (push) Has been cancelled
Test / test_scrub_xor (push) Has been cancelled
Test / test_scrub_pg_size_3 (push) Has been cancelled
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled
Test / test_scrub_ec (push) Has been cancelled
Test / test_nfs (push) Has been cancelled
Test / test_heal_csum_32k_dj (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
Test / test_heal_csum_32k_dmj (push) Has been cancelled
Test / test_heal_csum_32k (push) Has been cancelled
2024-04-16 02:20:26 +03:00
8d1067971b Fix pg_effsize (and thus "used space") calculation in monitor 2024-04-16 02:20:18 +03:00
ae5af04fde Add noout flag for OSDs 2024-04-16 02:19:55 +03:00
266d038b11 Fix 32-bit build warnings and one error again :-)
All checks were successful
Test / test_snapshot_chain_ec (push) Successful in 2m52s
Test / test_rebalance_verify_imm (push) Successful in 3m7s
Test / test_root_node (push) Successful in 8s
Test / test_rebalance_verify (push) Successful in 3m36s
Test / test_switch_primary (push) Successful in 40s
Test / test_write (push) Successful in 41s
Test / test_write_no_same (push) Successful in 18s
Test / test_write_xor (push) Successful in 1m6s
Test / test_rebalance_verify_ec (push) Successful in 4m25s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m52s
Test / test_heal_pg_size_2 (push) Successful in 3m21s
Test / test_heal_ec (push) Successful in 5m27s
Test / test_heal_csum_32k_dmj (push) Successful in 5m56s
Test / test_heal_csum_32k_dj (push) Successful in 5m49s
Test / test_heal_csum_32k (push) Successful in 6m43s
Test / test_osd_tags (push) Successful in 21s
Test / test_enospc (push) Successful in 2m18s
Test / test_heal_csum_4k_dmj (push) Successful in 6m43s
Test / test_heal_csum_4k (push) Successful in 6m27s
Test / test_heal_csum_4k_dj (push) Successful in 6m29s
Test / test_enospc_imm (push) Successful in 1m5s
Test / test_enospc_xor (push) Successful in 1m38s
Test / test_scrub (push) Successful in 37s
Test / test_scrub_zero_osd_2 (push) Successful in 32s
Test / test_enospc_imm_xor (push) Successful in 45s
Test / test_scrub_xor (push) Successful in 33s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 34s
Test / test_scrub_ec (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 43s
Test / test_nfs (push) Successful in 13s
2024-04-11 22:49:33 +03:00
279 changed files with 6070 additions and 2567 deletions

View File

@@ -64,6 +64,13 @@ jobs:
# leak sanitizer sometimes crashes
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
npm_lint:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- run: cd /root/vitastor/mon && npm run lint
test_add_osd:
runs-on: ubuntu-latest
needs: build

13
.gitignore vendored
View File

@@ -3,16 +3,3 @@
package-lock.json
fio
qemu
osd
stub_osd
stub_uring_osd
stub_bench
osd_test
osd_peering_pg_test
dump_journal
nbd_proxy
rm_inode
test_allocator
test_blockstore
test_shit
osd_rmw_test

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "1.6.0")
set(VERSION "1.6.1")
add_subdirectory(src)

View File

@@ -1,6 +1,6 @@
#!/bin/bash
gcc -I. -E -o fio_headers.i src/fio_headers.h
gcc -I. -E -o fio_headers.i src/util/fio_headers.h
rm -rf fio-copy
for i in `grep -Po 'fio/[^"]+' fio_headers.i | sort | uniq`; do

View File

@@ -5,7 +5,7 @@
#cd b/qemu; make qapi
gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
-I qemu/include -E -o qemu_driver.i src/qemu_driver.c
-I qemu/include -E -o qemu_driver.i src/client/qemu_driver.c
rm -rf qemu-copy
for i in `grep -Po 'qemu/[^"]+' qemu_driver.i | sort | uniq`; do

View File

@@ -1,4 +1,4 @@
VERSION ?= v1.6.0
VERSION ?= v1.6.1
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.6.0
image: vitalif/vitastor-csi:v1.6.1
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -121,7 +121,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.6.0
image: vitalif/vitastor-csi:v1.6.1
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.6.0"
vitastorCSIDriverVersion = "1.6.1"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (1.6.0-1) unstable; urgency=medium
vitastor (1.6.1-1) unstable; urgency=medium
* Bugfixes

View File

@@ -27,7 +27,7 @@ RUN apt-get -y build-dep qemu
RUN apt-get --download-only source qemu
ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
ADD src/client/qemu_driver.c /root/qemu_driver.c
#RUN set -e; \
# apt-get install -y wget; \
@@ -52,7 +52,7 @@ RUN set -e; \
cd /root/packages/qemu-$REL/qemu-*/; \
quilt push -a; \
quilt add block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
cp /root/qemu_driver.c block/vitastor.c; \
quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \

View File

@@ -1,2 +1,3 @@
mon usr/lib/vitastor
mon/vitastor-mon.service /lib/systemd/system
mon usr/lib/vitastor/mon
mon/scripts/make-etcd usr/lib/vitastor/mon
mon/scripts/vitastor-mon.service /lib/systemd/system

View File

@@ -1,6 +1,6 @@
usr/bin/vitastor-osd
usr/bin/vitastor-disk
usr/bin/vitastor-dump-journal
mon/vitastor-osd@.service /lib/systemd/system
mon/vitastor.target /lib/systemd/system
mon/90-vitastor.rules /lib/udev/rules.d
mon/scripts/vitastor-osd@.service /lib/systemd/system
mon/scripts/vitastor.target /lib/systemd/system
mon/scripts/90-vitastor.rules /lib/udev/rules.d

View File

@@ -37,8 +37,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-1.6.0; \
cd vitastor-1.6.0; \
cp -r /root/vitastor vitastor-1.6.1; \
cd vitastor-1.6.1; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -51,8 +51,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.6.0.orig.tar.xz vitastor-1.6.0; \
cd vitastor-1.6.0; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.6.1.orig.tar.xz vitastor-1.6.1; \
cd vitastor-1.6.1; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -248,7 +248,7 @@ etcd_report_interval to guarantee that keepalive actually works.
## etcd_ws_keepalive_interval
- Type: seconds
- Default: 30
- Default: 5
- Can be changed online: yes
etcd websocket ping interval required to keep the connection alive and

View File

@@ -259,7 +259,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
## etcd_ws_keepalive_interval
- Тип: секунды
- Значение по умолчанию: 30
- Значение по умолчанию: 5
- Можно менять на лету: да
Интервал проверки живости вебсокет-подключений к etcd.

View File

@@ -86,7 +86,11 @@ Parent node reference is required for intermediate tree nodes.
Separate OSD settings are set in etc keys `/vitastor/config/osd/<number>`
in JSON format `{"<key>":<value>}`.
As of now, two settings are supported:
As of now, the following settings are supported:
- [reweight](#reweight)
- [tags](#tags)
- [noout](#noout)
## reweight
@@ -109,6 +113,14 @@ subsets and then use a specific subset for pool instead of all OSDs.
For example you can mark SSD OSDs with tag "ssd" and HDD OSDs with "hdd" and
such tags will work as device classes.
## noout
- Type: boolean
- Default: false
If set to true, [osd_out_time](monitor.en.md#osd_out_time) is ignored for this
OSD and it's never removed from data distribution by the monitor.
# Pool parameters
## name

View File

@@ -85,10 +85,11 @@
Настройки отдельных OSD задаются в ключах etcd `/vitastor/config/osd/<number>`
в JSON-формате `{"<key>":<value>}`.
На данный момент поддерживаются две настройки:
На данный момент поддерживаются следующие настройки:
- [reweight](#reweight)
- [tags](#tags)
- [noout](#noout)
## reweight
@@ -112,6 +113,14 @@
всех. Можно, например, пометить SSD OSD тегом "ssd", а HDD тегом "hdd", в
этом смысле теги работают аналогично классам устройств.
## noout
- Тип: булево (да/нет)
- Значение по умолчанию: false
Если установлено в true, то [osd_out_time](monitor.ru.md#osd_out_time) для этого
OSD игнорируется и OSD не удаляется из распределения данных монитором.
# Параметры
## name

View File

@@ -282,7 +282,7 @@
etcd_report_interval, чтобы keepalive гарантированно работал.
- name: etcd_ws_keepalive_interval
type: sec
default: 30
default: 5
online: true
info: |
etcd websocket ping interval required to keep the connection alive and

View File

@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
QEMU build process. To do that:
- Install vitastor client library headers (from source or from vitastor-client-dev package)
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
- Copy `src/client/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
- Build QEMU as usual
But it is also possible to build it out-of-tree. To do that:

View File

@@ -41,7 +41,7 @@ cmake .. && make -j8 install
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
- Скопируйте [src/client/qemu_driver.c](../../src/client/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
- Соберите QEMU как обычно
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:

View File

@@ -22,7 +22,7 @@
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
[here](../config/layout-cluster.en.md#immediate_commit).
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
Toshiba MG, Seagate EXOS or something similar. If your drives don't have such cache then
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
@@ -33,7 +33,7 @@
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
## Configure monitors

View File

@@ -123,4 +123,4 @@ vitastor-cli create -s 10G testimg
Если вы хотите использовать не только блочные образы виртуальных машин или контейнеров,
а также кластерную файловую систему, то:
- [Следуйте инструкциям](../usage/nfs.en.md#vitastorfs)
- [Следуйте инструкциям](../usage/nfs.ru.md#vitastorfs)

54
mon/.eslintrc.js Normal file
View File

@@ -0,0 +1,54 @@
module.exports = {
"env": {
"es6": true,
"node": true
},
"extends": [
"eslint:recommended",
"plugin:node/recommended"
],
"parserOptions": {
"ecmaVersion": 2020
},
"plugins": [
"import"
],
"rules": {
"indent": [
"error",
4
],
"brace-style": [
"error",
"allman",
{ "allowSingleLine": true }
],
"linebreak-style": [
"error",
"unix"
],
"semi": [
"error",
"always"
],
"no-useless-escape": [
"off"
],
"no-control-regex": [
"off"
],
"no-empty": [
"off"
],
"no-process-exit": [
"off"
],
"node/shebang": [
"off"
],
"import/no-unresolved": [
2,
{ "commonjs": true }
]
}
};

356
mon/etcd_adapter.js Normal file
View File

@@ -0,0 +1,356 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const http = require('http');
const WebSocket = require('ws');
const MON_STOPPED = 'Monitor instance is stopped';
class EtcdAdapter
{
constructor(mon)
{
this.mon = mon;
this.ws = null;
this.ws_alive = false;
this.ws_keepalive_timer = null;
}
parse_config(config)
{
this.parse_etcd_addresses(config.etcd_address||config.etcd_url);
}
parse_etcd_addresses(addrs)
{
const is_local_ip = this.mon.local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
this.etcd_local = [];
this.etcd_urls = [];
this.selected_etcd_url = null;
this.etcd_urls_to_try = [];
if (!(addrs instanceof Array))
addrs = addrs ? (''+(addrs||'')).split(/,/) : [];
if (!addrs.length)
{
console.error('Vitastor etcd address(es) not specified. Please set on the command line or in the config file');
process.exit(1);
}
for (let url of addrs)
{
let scheme = 'http';
url = url.trim().replace(/^(https?):\/\//, (m, m1) => { scheme = m1; return ''; });
const slash = url.indexOf('/');
const colon = url.indexOf(':');
const is_local = is_local_ip[colon >= 0 ? url.substr(0, colon) : (slash >= 0 ? url.substr(0, slash) : url)];
url = scheme+'://'+(slash >= 0 ? url : url+'/v3');
if (is_local)
this.etcd_local.push(url);
else
this.etcd_urls.push(url);
}
}
pick_next_etcd()
{
if (this.selected_etcd_url)
return this.selected_etcd_url;
if (!this.etcd_urls_to_try || !this.etcd_urls_to_try.length)
{
this.etcd_urls_to_try = [ ...this.etcd_local ];
const others = [ ...this.etcd_urls ];
while (others.length)
{
const url = others.splice(0|(others.length*Math.random()), 1);
this.etcd_urls_to_try.push(url[0]);
}
}
this.selected_etcd_url = this.etcd_urls_to_try.shift();
return this.selected_etcd_url;
}
stop_watcher(cur_addr)
{
cur_addr = cur_addr || this.selected_etcd_url;
if (this.ws)
{
console.log('Disconnected from etcd at '+this.ws_used_url);
this.ws.close();
this.ws = null;
}
if (this.ws_keepalive_timer)
{
clearInterval(this.ws_keepalive_timer);
this.ws_keepalive_timer = null;
}
if (this.selected_etcd_url == cur_addr)
{
this.selected_etcd_url = null;
}
}
restart_watcher(cur_addr)
{
this.stop_watcher(cur_addr);
this.start_watcher(this.mon.config.etcd_mon_retries).catch(this.mon.die);
}
async start_watcher(retries)
{
let retry = 0;
if (!retries || retries < 1)
{
retries = 1;
}
const tried = {};
while (retries < 0 || retry < retries)
{
const cur_addr = this.pick_next_etcd();
const base = 'ws'+cur_addr.substr(4);
let now = Date.now();
if (tried[base] && now-tried[base] < this.mon.config.etcd_start_timeout)
{
await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout-(now-tried[base])));
now = Date.now();
}
tried[base] = now;
if (this.mon.stopped)
{
return;
}
const ok = await new Promise(ok =>
{
const timer_id = setTimeout(() =>
{
if (this.ws)
{
console.log('Disconnected from etcd at '+this.ws_used_url);
this.ws.close();
this.ws = null;
}
ok(false);
}, this.mon.config.etcd_mon_timeout);
this.ws = new WebSocket(base+'/watch');
this.ws_used_url = cur_addr;
const fail = () =>
{
ok(false);
};
this.ws.on('error', fail);
this.ws.on('open', () =>
{
this.ws.removeListener('error', fail);
if (timer_id)
clearTimeout(timer_id);
ok(true);
});
});
if (ok)
break;
if (this.selected_etcd_url == cur_addr)
this.selected_etcd_url = null;
this.ws = null;
retry++;
}
if (!this.ws)
{
this.mon.die('Failed to open etcd watch websocket');
return;
}
if (this.mon.stopped)
{
this.stop_watcher();
return;
}
const cur_addr = this.selected_etcd_url;
this.ws_alive = true;
this.ws_keepalive_timer = setInterval(() =>
{
if (this.ws_alive && this.ws)
{
this.ws_alive = false;
this.ws.send(JSON.stringify({ progress_request: {} }));
}
else
{
console.log('etcd websocket timed out, restarting it');
this.restart_watcher(cur_addr);
}
}, (Number(this.mon.config.etcd_ws_keepalive_interval) || 5)*1000);
this.ws.on('error', () => this.restart_watcher(cur_addr));
this.ws.send(JSON.stringify({
create_request: {
key: b64(this.mon.config.etcd_prefix+'/'),
range_end: b64(this.mon.config.etcd_prefix+'0'),
start_revision: ''+this.mon.etcd_watch_revision,
watch_id: 1,
progress_notify: true,
},
}));
this.ws.on('message', (msg) =>
{
if (this.mon.stopped)
{
this.stop_watcher();
return;
}
this.ws_alive = true;
let data;
try
{
data = JSON.parse(msg);
}
catch (e)
{
}
if (!data || !data.result)
{
console.error('Unknown message received from watch websocket: '+msg);
}
else if (data.result.canceled)
{
// etcd watch canceled
if (data.result.compact_revision)
{
// we may miss events if we proceed
this.mon.die('Revisions before '+data.result.compact_revision+' were compacted by etcd, exiting');
}
this.mon.die('Watch canceled by etcd, reason: '+data.result.cancel_reason+', exiting');
}
else if (data.result.created)
{
// etcd watch created
console.log('Successfully subscribed to etcd at '+this.selected_etcd_url+', revision '+data.result.header.revision);
}
else
{
this.mon.on_message(data.result);
}
});
}
async become_master()
{
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
// eslint-disable-next-line no-constant-condition
while (1)
{
const res = await this.etcd_call('/kv/txn', {
compare: [ { target: 'CREATE', create_revision: 0, key: b64(this.mon.config.etcd_prefix+'/mon/master') } ],
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
}, this.mon.config.etcd_start_timeout, 0);
if (res.succeeded)
{
break;
}
console.log('Waiting to become master');
await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout));
}
console.log('Became master');
}
async etcd_call(path, body, timeout, retries)
{
let retry = 0;
if (retries >= 0 && retries < 1)
{
retries = 1;
}
const tried = {};
while (retries < 0 || retry < retries)
{
retry++;
const base = this.pick_next_etcd();
let now = Date.now();
if (tried[base] && now-tried[base] < timeout)
{
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
now = Date.now();
}
tried[base] = now;
if (this.mon.stopped)
{
throw new Error(MON_STOPPED);
}
const res = await POST(base+path, body, timeout);
if (this.mon.stopped)
{
throw new Error(MON_STOPPED);
}
if (res.error)
{
if (this.selected_etcd_url == base)
this.selected_etcd_url = null;
console.error('Failed to query etcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
continue;
}
if (res.json)
{
if (res.json.error)
{
console.error(path+': etcd returned error: '+res.json.error);
break;
}
return res.json;
}
}
throw new Error('Failed to query etcd ('+retries+' retries)');
}
}
function POST(url, body, timeout)
{
return new Promise(ok =>
{
const body_text = Buffer.from(JSON.stringify(body));
let timer_id = timeout > 0 ? setTimeout(() =>
{
if (req)
req.abort();
req = null;
ok({ error: 'timeout' });
}, timeout) : null;
let req = http.request(url, { method: 'POST', headers: {
'Content-Type': 'application/json',
'Content-Length': body_text.length,
} }, (res) =>
{
if (!req)
{
return;
}
clearTimeout(timer_id);
let res_body = '';
res.setEncoding('utf8');
res.on('error', (error) => ok({ error }));
res.on('data', chunk => { res_body += chunk; });
res.on('end', () =>
{
if (res.statusCode != 200)
{
ok({ error: res_body, code: res.statusCode });
return;
}
try
{
res_body = JSON.parse(res_body);
ok({ response: res, json: res_body });
}
catch (e)
{
ok({ error: e, response: res, body: res_body });
}
});
});
req.on('error', (error) => ok({ error }));
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
req.write(body_text);
req.end();
});
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
module.exports = EtcdAdapter;

391
mon/etcd_schema.js Normal file
View File

@@ -0,0 +1,391 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
// FIXME document all etcd keys and config variables in the form of JSON schema or similar
const etcd_nonempty_keys = {
'config/global': 1,
'config/node_placement': 1,
'config/pools': 1,
'config/pgs': 1,
'history/last_clean_pgs': 1,
'stats': 1,
};
const etcd_allow = new RegExp('^'+[
'config/global',
'config/node_placement',
'config/pools',
'config/osd/[1-9]\\d*',
'config/pgs',
'config/inode/[1-9]\\d*/[1-9]\\d*',
'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*',
'osd/inodestats/[1-9]\\d*',
'osd/space/[1-9]\\d*',
'mon/master',
'mon/member/[a-f0-9]+',
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/\\d+',
'pool/stats/[1-9]\\d*',
'stats',
'index/image/.*',
'index/maxid/[1-9]\\d*',
].join('$|^')+'$');
const etcd_tree = {
config: {
/* global: {
// WARNING: NOT ALL OF THESE ARE ACTUALLY CONFIGURABLE HERE
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
// etcd connection
config_path: "/etc/vitastor/vitastor.conf",
etcd_prefix: "/vitastor",
// etcd connection - configurable online
etcd_address: "10.0.115.10:2379/v3",
// mon
etcd_mon_ttl: 5, // min: 1
etcd_mon_timeout: 1000, // ms. min: 0
etcd_mon_retries: 5, // min: 0
mon_change_timeout: 1000, // ms. min: 100
mon_retry_change_timeout: 50, // ms. min: 10
mon_stats_timeout: 1000, // ms. min: 100
osd_out_time: 600, // seconds. min: 0
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
use_old_pg_combinator: false,
// client and osd
tcp_header_buffer_size: 65536,
use_sync_send_recv: false,
use_rdma: true,
rdma_device: null, // for example, "rocep5s0f0"
rdma_port_num: 1,
rdma_gid_index: 0,
rdma_mtu: 4096,
rdma_max_sge: 128,
rdma_max_send: 8,
rdma_max_recv: 16,
rdma_max_msg: 132096,
block_size: 131072,
disk_alignment: 4096,
bitmap_granularity: 4096,
immediate_commit: false, // 'all' or 'small'
// client - configurable online
client_max_dirty_bytes: 33554432,
client_max_dirty_ops: 1024,
client_enable_writeback: false,
client_max_buffered_bytes: 33554432,
client_max_buffered_ops: 1024,
client_max_writeback_iodepth: 256,
client_retry_interval: 50, // ms. min: 10
client_eio_retry_interval: 1000, // ms
client_retry_enospc: true,
osd_nearfull_ratio: 0.95,
// client and osd - configurable online
log_level: 0,
peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1
osd_idle_timeout: 5, // seconds. min: 1
osd_ping_timeout: 5, // seconds. min: 1
max_etcd_attempts: 5,
etcd_quick_timeout: 1000, // ms
etcd_slow_timeout: 5000, // ms
etcd_keepalive_timeout: 30, // seconds, default is max(30, etcd_report_interval*2)
etcd_ws_keepalive_interval: 5, // seconds
// osd
etcd_report_interval: 5, // seconds
etcd_stats_interval: 30, // seconds
run_primary: true,
osd_network: null, // "192.168.7.0/24" or an array of masks
bind_address: "0.0.0.0",
bind_port: 0,
readonly: false,
osd_memlock: false,
// osd - configurable online
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 1,
recovery_sleep_us: 0,
recovery_tune_util_low: 0.1,
recovery_tune_client_util_low: 0,
recovery_tune_util_high: 1.0,
recovery_tune_client_util_high: 0.5,
recovery_tune_interval: 1,
recovery_tune_agg_interval: 10, // 10 times recovery_tune_interval
recovery_tune_sleep_min_us: 10, // 10 microseconds
recovery_pg_switch: 128,
recovery_sync_batch: 16,
no_recovery: false,
no_rebalance: false,
print_stats_interval: 3,
slow_log_interval: 10,
inode_vanish_time: 60,
auto_scrub: false,
no_scrub: false,
scrub_interval: '30d', // 1s/1m/1h/1d
scrub_queue_depth: 1,
scrub_sleep: 0, // milliseconds
scrub_list_limit: 1000, // objects to list on one scrub iteration
scrub_find_best: true,
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
// blockstore - fixed in superblock
block_size,
disk_alignment,
journal_block_size,
meta_block_size,
bitmap_granularity,
journal_device,
journal_offset,
journal_size,
disable_journal_fsync,
data_device,
data_offset,
data_size,
disable_data_fsync,
meta_device,
meta_offset,
disable_meta_fsync,
disable_device_lock,
// blockstore - configurable offline
inmemory_metadata,
inmemory_journal,
journal_sector_buffer_count,
journal_no_same_sector_overwrites,
// blockstore - configurable online
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
throttle_small_writes: false,
throttle_target_iops: 100,
throttle_target_mbs: 100,
throttle_target_parallelism: 1,
throttle_threshold_us: 50,
}, */
global: {},
/* node_placement: {
host1: { level: 'host', parent: 'rack1' },
...
}, */
node_placement: {},
/* pools: {
<id>: {
name: 'testpool',
// 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
pg_size: 3,
pg_minsize: 2,
// number of parity chunks, required for EC
parity_chunks?: 1,
pg_count: 100,
// default is failure_domain=host
failure_domain?: 'host',
// additional failure domain rules; failure_domain=x is equivalent to x=123..N
level_placement?: 'dc=112233 host=123456',
raw_placement?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
old_combinator: false,
max_osd_combinations: 10000,
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
block_size: 131072,
bitmap_granularity: 4096,
// 'all'/'small'/'none', same as in OSD options
immediate_commit: 'none',
pg_stripe_size: 0,
root_node?: 'rack1',
// restrict pool to OSDs having all of these tags
osd_tags?: 'nvme' | [ 'nvme', ... ],
// prefer to put primary on OSD with these tags
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
// scrub interval
scrub_interval?: '30d',
},
...
}, */
pools: {},
osd: {
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
},
/* pgs: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
pgs: {},
/* inode: {
<pool_id>: {
<inode_t>: {
name: string,
size?: uint64_t, // bytes
parent_pool?: <pool_id>,
parent_id?: <inode_t>,
readonly?: boolean,
}
}
}, */
inode: {},
},
osd: {
state: {
/* <osd_num_t>: {
state: "up",
addresses: string[],
host: string,
port: uint16_t,
primary_enabled: boolean,
blockstore_enabled: boolean,
}, */
},
stats: {
/* <osd_num_t>: {
time: number, // unix time
blockstore_ready: boolean,
size: uint64_t, // bytes
free: uint64_t, // bytes
host: string,
op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t },
},
}, */
},
inodestats: {
/* <pool_id>: {
<inode_t>: {
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
}, */
},
space: {
/* <osd_num_t>: {
<pool_id>: {
<inode_t>: uint64_t, // bytes
},
}, */
},
},
mon: {
master: {
/* ip: [ string ], id: uint64_t */
},
standby: {
/* <uint64_t>: { ip: [ string ] }, */
},
},
pg: {
state: {
/* <pool_id>: {
<pg_id>: {
primary: osd_num_t,
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
}
}, */
},
stats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
history: {
/* <pool_id>: {
<pg_id>: {
osd_sets: osd_num_t[][],
all_peers: osd_num_t[],
epoch: uint64_t,
next_scrub: uint64_t,
},
}, */
},
},
inode: {
stats: {
/* <pool_id>: {
<inode_t>: {
raw_used: uint64_t, // raw used bytes on OSDs
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
},
}, */
},
},
pool: {
stats: {
/* <pool_id>: {
used_raw_tb: float, // used raw space in the pool
total_raw_tb: float, // maximum amount of space in the pool
raw_to_usable: float, // raw to usable ratio
space_efficiency: float, // 0..1
} */
},
},
stats: {
/* op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t, iops: uint64_t, lat: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
},
object_counts: {
object: uint64_t,
clean: uint64_t,
misplaced: uint64_t,
degraded: uint64_t,
incomplete: uint64_t,
},
object_bytes: {
total: uint64_t,
clean: uint64_t,
misplaced: uint64_t,
degraded: uint64_t,
incomplete: uint64_t,
}, */
},
history: {
last_clean_pgs: {},
},
index: {
image: {
/* <name>: {
id: uint64_t,
pool_id: uint64_t,
}, */
},
maxid: {
/* <pool_id>: uint64_t, */
},
},
};
module.exports = {
etcd_nonempty_keys,
etcd_allow,
etcd_tree,
};

View File

@@ -152,6 +152,7 @@ function parse_pg_dsl(text)
else if (rule[2] === '(')
{
rule[2] = [];
// eslint-disable-next-line no-constant-condition
while (true)
{
if (i > tokens.length-1)

View File

@@ -77,7 +77,7 @@ async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3
{
if (osd !== NO_OSD)
{
let osd_pg_count = (osd_weights[osd]||0)/total_weight*pg_effsize*pg_count;
let osd_pg_count = osd_weights[osd]/total_weight*pg_effsize*pg_count;
lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
}
}
@@ -215,7 +215,7 @@ function calc_intersect_weights(old_pg_size, pg_size, pg_count, prev_weights, al
{
const intersect_count = ordered
? pg.reduce((a, osd, i) => a + (prev_hash[osd] == 1+i ? 1 : 0), 0)
: pg.reduce((a, osd, i) => a + (prev_hash[osd] ? 1 : 0), 0);
: pg.reduce((a, osd) => a + (prev_hash[osd] ? 1 : 0), 0);
if (max_int < intersect_count)
{
max_int = intersect_count;
@@ -299,7 +299,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator
)).join(' + ');
const rm_osd_pg_count = (prev_pg_per_osd[osd]||[])
.reduce((a, [ old_pg_name, space ]) => (a + (all_pgs_hash[old_pg_name] ? space : 0)), 0);
const osd_pg_count = (osd_weights[osd]||0)*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
const osd_pg_count = osd_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
}
}

View File

@@ -198,7 +198,6 @@ function all_combinations(osd_tree, pg_size, ordered, count)
function check_combinations(osd_tree, pgs)
{
const hosts = Object.keys(osd_tree).sort();
const host_per_osd = {};
for (const host in osd_tree)
{
@@ -235,6 +234,7 @@ function compat(params)
module.exports = {
flatten_tree,
all_combinations,
SimpleCombinator,
compat,
NO_OSD,

View File

@@ -8,7 +8,7 @@
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
const { SimpleCombinator } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = {
ripper5: {

View File

@@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const LPOptimizer = require('./lp_optimizer.js');
async function run()
{

View File

@@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const LPOptimizer = require('./lp_optimizer.js');
const crush_tree = [
{ level: 1, children: [

View File

@@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = {
100: {

View File

@@ -2,7 +2,7 @@
// License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const LPOptimizer = require('./lp_optimizer.js');
const osd_tree = {
100: {
@@ -20,7 +20,7 @@ const osd_tree = {
},
500: {
4: 3.58498,
// 8: 3.58589,
/*8: 3.58589,*/
9: 3.63869,
},
600: {

View File

@@ -23,4 +23,4 @@ for (let i = 2; i < process.argv.length; i++)
}
}
new Mon(options).start().catch(e => { console.error(e); process.exit(1); });
Mon.run_forever(options);

1856
mon/mon.js

File diff suppressed because it is too large Load Diff

215
mon/osd_tree.js Normal file
View File

@@ -0,0 +1,215 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
function get_osd_tree(global_config, state)
{
const levels = global_config.placement_levels||{};
levels.host = levels.host || 100;
levels.osd = levels.osd || 101;
const tree = {};
let up_osds = {};
// This requires monitor system time to be in sync with OSD system times (at least to some extent)
const down_time = Date.now()/1000 - global_config.osd_out_time;
for (const osd_num of Object.keys(state.osd.stats).sort((a, b) => a - b))
{
const stat = state.osd.stats[osd_num];
const osd_cfg = state.config.osd[osd_num];
let reweight = osd_cfg == null ? 1 : Number(osd_cfg.reweight);
if (reweight < 0 || isNaN(reweight))
reweight = 1;
if (stat && stat.size && reweight && (state.osd.state[osd_num] || Number(stat.time) >= down_time ||
osd_cfg && osd_cfg.noout))
{
// Numeric IDs are reserved for OSDs
if (state.osd.state[osd_num] && reweight > 0)
{
// React to down OSDs immediately
up_osds[osd_num] = true;
}
tree[osd_num] = tree[osd_num] || {};
tree[osd_num].id = osd_num;
tree[osd_num].parent = tree[osd_num].parent || stat.host;
tree[osd_num].level = 'osd';
tree[osd_num].size = reweight * stat.size / 1024 / 1024 / 1024 / 1024; // terabytes
if (osd_cfg && osd_cfg.tags)
{
tree[osd_num].tags = (osd_cfg.tags instanceof Array ? [ ...osd_cfg.tags ] : [ osd_cfg.tags ])
.reduce((a, c) => { a[c] = true; return a; }, {});
}
delete tree[osd_num].children;
if (!tree[stat.host])
{
tree[stat.host] = {
id: stat.host,
level: 'host',
parent: null,
children: [],
};
}
}
}
for (const node_id in state.config.node_placement||{})
{
const node_cfg = state.config.node_placement[node_id];
if (/^\d+$/.exec(node_id))
{
node_cfg.level = 'osd';
}
if (!node_id || !node_cfg.level || !levels[node_cfg.level] ||
node_cfg.level === 'osd' && !tree[node_id])
{
// All nodes must have non-empty IDs and valid levels
// OSDs have to actually exist
continue;
}
tree[node_id] = tree[node_id] || {};
tree[node_id].id = node_id;
tree[node_id].level = node_cfg.level;
tree[node_id].parent = node_cfg.parent;
if (node_cfg.level !== 'osd')
{
tree[node_id].children = [];
}
}
return { up_osds, levels, osd_tree: tree };
}
function make_hier_tree(global_config, tree)
{
const levels = global_config.placement_levels||{};
levels.host = levels.host || 100;
levels.osd = levels.osd || 101;
tree = { ...tree };
for (const node_id in tree)
{
tree[node_id] = { ...tree[node_id], children: [] };
}
tree[''] = { children: [] };
for (const node_id in tree)
{
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
{
continue;
}
const node_cfg = tree[node_id];
const node_level = levels[node_cfg.level] || node_cfg.level;
let parent_level = node_cfg.parent && tree[node_cfg.parent] && tree[node_cfg.parent].children
&& tree[node_cfg.parent].level;
parent_level = parent_level ? (levels[parent_level] || parent_level) : null;
// Parent's level must be less than child's; OSDs must be leaves
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
tree[parent].children.push(tree[node_id]);
}
// Delete empty nodes
let deleted = 0;
do
{
deleted = 0;
for (const node_id in tree)
{
if (tree[node_id].level !== 'osd' && (!tree[node_id].children || !tree[node_id].children.length))
{
const parent = tree[node_id].parent;
if (parent)
{
tree[parent].children = tree[parent].children.filter(c => c != tree[node_id]);
}
deleted++;
delete tree[node_id];
}
}
} while (deleted > 0);
return tree;
}
function filter_osds_by_root_node(global_config, pool_tree, root_node)
{
if (!root_node)
{
return;
}
let hier_tree = make_hier_tree(global_config, pool_tree);
let included = [ ...(hier_tree[root_node] || {}).children||[] ];
for (let i = 0; i < included.length; i++)
{
if (included[i].children)
{
included.splice(i+1, 0, ...included[i].children);
}
}
let cur = pool_tree[root_node] || {};
while (cur && cur.id)
{
included.unshift(cur);
cur = pool_tree[cur.parent||''];
}
included = included.reduce((a, c) => { a[c.id||''] = true; return a; }, {});
for (const item in pool_tree)
{
if (!included[item])
{
delete pool_tree[item];
}
}
}
function filter_osds_by_tags(orig_tree, tags)
{
if (!tags)
{
return;
}
for (const tag of (tags instanceof Array ? tags : [ tags ]))
{
for (const osd in orig_tree)
{
if (orig_tree[osd].level === 'osd' &&
(!orig_tree[osd].tags || !orig_tree[osd].tags[tag]))
{
delete orig_tree[osd];
}
}
}
}
function filter_osds_by_block_layout(orig_tree, osd_stats, block_size, bitmap_granularity, immediate_commit)
{
for (const osd in orig_tree)
{
if (orig_tree[osd].level === 'osd')
{
const osd_stat = osd_stats[osd];
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
{
delete orig_tree[osd];
}
}
}
}
function get_affinity_osds(pool_cfg, up_osds, osd_tree)
{
let aff_osds = up_osds;
if (pool_cfg.primary_affinity_tags)
{
aff_osds = Object.keys(up_osds).reduce((a, c) => { a[c] = osd_tree[c]; return a; }, {});
filter_osds_by_tags(aff_osds, pool_cfg.primary_affinity_tags);
for (const osd in aff_osds)
{
aff_osds[osd] = true;
}
}
return aff_osds;
}
module.exports = {
get_osd_tree,
make_hier_tree,
filter_osds_by_root_node,
filter_osds_by_tags,
filter_osds_by_block_layout,
get_affinity_osds,
};

View File

@@ -1,15 +1,23 @@
{
"name": "vitastor-mon",
"version": "1.6.0",
"version": "1.6.1",
"description": "Vitastor SDS monitor service",
"main": "mon-main.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"lint": "eslint *.js lp_optimizer/*.js scripts/*.js"
},
"author": "Vitaliy Filippov",
"license": "UNLICENSED",
"dependencies": {
"sprintf-js": "^1.1.2",
"ws": "^7.2.5"
},
"devDependencies": {
"eslint": "^8.0.0",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-node": "^11.1.0"
},
"engines": {
"node": ">=12.0.0"
}
}

267
mon/pg_gen.js Normal file
View File

@@ -0,0 +1,267 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { RuleCombinator } = require('./lp_optimizer/dsl_pgs.js');
const { SimpleCombinator, flatten_tree } = require('./lp_optimizer/simple_pgs.js');
const { validate_pool_cfg, get_pg_rules } = require('./pool_config.js');
const LPOptimizer = require('./lp_optimizer/lp_optimizer.js');
const { scale_pg_count } = require('./pg_utils.js');
const { make_hier_tree, filter_osds_by_root_node,
filter_osds_by_tags, filter_osds_by_block_layout, get_affinity_osds } = require('./osd_tree.js');
let seed;
function reset_rng()
{
seed = 0x5f020e43;
}
function rng()
{
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed + 2147483648;
}
function pick_primary(pool_config, osd_set, up_osds, aff_osds)
{
let alive_set;
if (pool_config.scheme === 'replicated')
{
// Prefer "affinity" OSDs
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
}
else
{
// Prefer data OSDs for EC because they can actually read something without an additional network hop
const pg_data_size = (pool_config.pg_size||0) - (pool_config.parity_chunks||0);
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
if (!alive_set.length)
{
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && up_osds[osd_num]);
if (!alive_set.length)
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
}
}
if (!alive_set.length)
{
return 0;
}
return alive_set[rng() % alive_set.length];
}
function recheck_primary(state, global_config, up_osds, osd_tree)
{
let new_config_pgs;
for (const pool_id in state.config.pools)
{
const pool_cfg = state.config.pools[pool_id];
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
{
continue;
}
const aff_osds = get_affinity_osds(pool_cfg, up_osds, osd_tree);
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.config.pgs.items[pool_id])
{
continue;
}
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_config_pgs)
{
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
return new_config_pgs;
}
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
{
const aff_osds = get_affinity_osds(state.config.pools[pool_id] || {}, up_osds, osd_tree);
const pg_items = {};
reset_rng();
new_pgs.map((osd_set, i) =>
{
osd_set = osd_set.map(osd_num => osd_num === LPOptimizer.NO_OSD ? 0 : osd_num);
pg_items[i+1] = {
osd_set,
primary: pick_primary(state.config.pools[pool_id], osd_set, up_osds, aff_osds),
};
if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
prev_pgs[i].filter(osd_num => osd_num).length > 0)
{
pg_history[i] = pg_history[i] || {};
pg_history[i].osd_sets = pg_history[i].osd_sets || [];
pg_history[i].osd_sets.push(prev_pgs[i]);
}
if (pg_history[i] && pg_history[i].osd_sets)
{
pg_history[i].osd_sets = Object.values(pg_history[i].osd_sets
.reduce((a, c) => { a[c.join(' ')] = c; return a; }, {}));
}
});
for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++)
{
// FIXME: etcd has max_txn_ops limit, and it's 128 by default
// Sooo we probably want to change our storage scheme for PG histories...
request.compare.push({
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
target: 'MOD',
mod_revision: ''+etcd_watch_revision,
result: 'LESS',
});
if (pg_history[i])
{
request.success.push({
requestPut: {
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
value: b64(JSON.stringify(pg_history[i])),
},
});
}
else
{
request.success.push({
requestDeleteRange: {
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
},
});
}
}
save_to.items = save_to.items || {};
if (!new_pgs.length)
{
delete save_to.items[pool_id];
}
else
{
save_to.items[pool_id] = pg_items;
}
}
async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels)
{
const pool_cfg = state.config.pools[pool_id];
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
{
return null;
}
let pool_tree = { ...osd_tree };
filter_osds_by_root_node(global_config, pool_tree, pool_cfg.root_node);
filter_osds_by_tags(pool_tree, pool_cfg.osd_tags);
filter_osds_by_block_layout(
pool_tree,
state.osd.stats,
pool_cfg.block_size || global_config.block_size || 131072,
pool_cfg.bitmap_granularity || global_config.bitmap_granularity || 4096,
pool_cfg.immediate_commit || global_config.immediate_commit || 'none'
);
pool_tree = make_hier_tree(global_config, pool_tree);
// First try last_clean_pgs to minimize data movement
let prev_pgs = [];
for (const pg in ((state.history.last_clean_pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.history.last_clean_pgs.items[pool_id][pg].osd_set ];
}
if (!prev_pgs.length)
{
// Fall back to config/pgs if it's empty
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
}
}
const old_pg_count = prev_pgs.length;
const optimize_cfg = {
osd_weights: Object.values(pool_tree).filter(item => item.level === 'osd').reduce((a, c) => { a[c.id] = c.size; return a; }, {}),
combinator: !global_config.use_old_pg_combinator || pool_cfg.level_placement || pool_cfg.raw_placement
// new algorithm:
? new RuleCombinator(pool_tree, get_pg_rules(pool_id, pool_cfg, global_config.placement_levels), pool_cfg.max_osd_combinations)
// old algorithm:
: new SimpleCombinator(flatten_tree(pool_tree[''].children, levels, pool_cfg.failure_domain, 'osd'), pool_cfg.pg_size, pool_cfg.max_osd_combinations),
pg_count: pool_cfg.pg_count,
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
// Re-shuffle PGs if config/pgs.hash is empty
if (old_pg_count > 0 && state.config.pgs.hash)
{
if (prev_pgs.length != pool_cfg.pg_count)
{
// Scale PG count
// Do it even if old_pg_count is already equal to pool_cfg.pg_count,
// because last_clean_pgs may still contain the old number of PGs
scale_pg_count(prev_pgs, pool_cfg.pg_count);
}
for (const pg of prev_pgs)
{
while (pg.length < pool_cfg.pg_size)
{
pg.push(0);
}
}
optimize_result = await LPOptimizer.optimize_change({
prev_pgs,
...optimize_cfg,
});
}
else
{
optimize_result = await LPOptimizer.optimize_initial(optimize_cfg);
}
console.log(`Pool ${pool_id} (${pool_cfg.name || 'unnamed'}):`);
LPOptimizer.print_change_stats(optimize_result);
let pg_effsize = pool_cfg.pg_size;
for (const pg of optimize_result.int_pgs)
{
const this_pg_size = pg.filter(osd => osd != LPOptimizer.NO_OSD).length;
if (this_pg_size && this_pg_size < pg_effsize)
{
pg_effsize = this_pg_size;
}
}
return {
pool_id,
pgs: optimize_result.int_pgs,
stats: {
total_raw_tb: optimize_result.space,
pg_real_size: pg_effsize || pool_cfg.pg_size,
raw_to_usable: (pg_effsize || pool_cfg.pg_size) / (pool_cfg.scheme === 'replicated'
? 1 : (pool_cfg.pg_size - (pool_cfg.parity_chunks||0))),
space_efficiency: optimize_result.space/(optimize_result.total_space||1),
},
};
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
module.exports = {
recheck_primary,
save_new_pgs_txn,
generate_pool_pgs,
};

View File

@@ -97,7 +97,6 @@ function scale_pg_history(prev_pg_history, prev_pgs, new_pgs)
function scale_pg_count(prev_pgs, new_pg_count)
{
const old_pg_count = prev_pgs.length;
// Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
if (prev_pgs.length < new_pg_count)
{

169
mon/pool_config.js Normal file
View File

@@ -0,0 +1,169 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { parse_level_indexes, parse_pg_dsl } = require('./lp_optimizer/dsl_pgs.js');
function validate_pool_cfg(pool_id, pool_cfg, placement_levels, warn)
{
pool_cfg.pg_size = Math.floor(pool_cfg.pg_size);
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
if (!/^[1-9]\d*$/.exec(''+pool_id))
{
if (warn)
console.log('Pool ID '+pool_id+' is invalid');
return false;
}
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' &&
pool_cfg.scheme !== 'ec' && pool_cfg.scheme !== 'jerasure')
{
if (warn)
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated", "ec" and "jerasure" required)');
return false;
}
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
pool_cfg.scheme !== 'replicated' && pool_cfg.pg_size < 3)
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_size');
return false;
}
if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size ||
pool_cfg.scheme === 'xor' && pool_cfg.pg_minsize < (pool_cfg.pg_size - 1))
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_minsize');
return false;
}
if (pool_cfg.scheme === 'xor' && pool_cfg.parity_chunks != 0 && pool_cfg.parity_chunks != 1)
{
if (warn)
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
return false;
}
if ((pool_cfg.scheme === 'ec' || pool_cfg.scheme === 'jerasure') &&
(pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
{
if (warn)
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
return false;
}
if (!pool_cfg.pg_count || pool_cfg.pg_count < 1)
{
if (warn)
console.log('Pool '+pool_id+' has invalid pg_count');
return false;
}
if (!pool_cfg.name)
{
if (warn)
console.log('Pool '+pool_id+' has empty name');
return false;
}
if (pool_cfg.max_osd_combinations < 100)
{
if (warn)
console.log('Pool '+pool_id+' has invalid max_osd_combinations (must be at least 100)');
return false;
}
if (pool_cfg.root_node && typeof(pool_cfg.root_node) != 'string')
{
if (warn)
console.log('Pool '+pool_id+' has invalid root_node (must be a string)');
return false;
}
if (pool_cfg.osd_tags && typeof(pool_cfg.osd_tags) != 'string' &&
(!(pool_cfg.osd_tags instanceof Array) || pool_cfg.osd_tags.filter(t => typeof t != 'string').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' has invalid osd_tags (must be a string or array of strings)');
return false;
}
if (pool_cfg.primary_affinity_tags && typeof(pool_cfg.primary_affinity_tags) != 'string' &&
(!(pool_cfg.primary_affinity_tags instanceof Array) || pool_cfg.primary_affinity_tags.filter(t => typeof t != 'string').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
return false;
}
if (!get_pg_rules(pool_id, pool_cfg, placement_levels, true))
{
return false;
}
return true;
}
function get_pg_rules(pool_id, pool_cfg, placement_levels, warn)
{
if (pool_cfg.level_placement)
{
const pg_size = (0|pool_cfg.pg_size);
let rules = pool_cfg.level_placement;
if (typeof rules === 'string')
{
rules = rules.split(/\s+/).map(s => s.split(/=/, 2)).reduce((a, c) => { a[c[0]] = c[1]; return a; }, {});
}
else
{
rules = { ...rules };
}
// Always add failure_domain to prevent rules from being totally incorrect
const all_diff = [];
for (let i = 1; i <= pg_size; i++)
{
all_diff.push(i);
}
rules[pool_cfg.failure_domain || 'host'] = all_diff;
placement_levels = placement_levels||{};
placement_levels.host = placement_levels.host || 100;
placement_levels.osd = placement_levels.osd || 101;
for (const k in rules)
{
if (!placement_levels[k] || typeof rules[k] !== 'string' &&
(!(rules[k] instanceof Array) ||
rules[k].filter(s => typeof s !== 'string' && typeof s !== 'number').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: level_placement should be { [level]: string | (string|number)[] }');
return null;
}
else if (rules[k].length != pg_size)
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: values in level_placement should contain exactly pg_size ('+pg_size+') items');
return null;
}
}
return parse_level_indexes(rules);
}
else if (typeof pool_cfg.raw_placement === 'string')
{
try
{
return parse_pg_dsl(pool_cfg.raw_placement);
}
catch (e)
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: invalid raw_placement: '+e.message);
}
}
else
{
let rules = [ [] ];
let prev = [ 1 ];
for (let i = 1; i < pool_cfg.pg_size; i++)
{
rules.push([ [ pool_cfg.failure_domain||'host', '!=', prev ] ]);
prev = [ ...prev, i+1 ];
}
return rules;
}
}
module.exports = {
validate_pool_cfg,
get_pg_rules,
};

View File

@@ -38,7 +38,7 @@ async function run()
const st = await fs.stat(options.device);
options.device_block_size = st.blksize;
if (st.isBlockDevice())
device_size = Number(await system("/sbin/blockdev --getsize64 "+options.device))
device_size = Number(await system("/sbin/blockdev --getsize64 "+options.device));
else
device_size = st.size;
}
@@ -91,7 +91,7 @@ async function run()
function system(cmd)
{
return new Promise((ok, no) => child_process.exec(cmd, { maxBuffer: 64*1024*1024 }, (err, stdout, stderr) => (err ? no(err.message) : ok(stdout))));
return new Promise((ok, no) => child_process.exec(cmd, { maxBuffer: 64*1024*1024 }, (err, stdout/*, stderr*/) => (err ? no(err.message) : ok(stdout))));
}
run().catch(err => { console.error(err); process.exit(1); });

286
mon/stats.js Normal file
View File

@@ -0,0 +1,286 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
function derive_osd_stats(st, prev, prev_diff)
{
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
if (!st || !st.time || !prev || !prev.time || prev.time >= st.time)
{
return prev_diff || diff;
}
const timediff = BigInt(st.time*1000 - prev.time*1000);
for (const op in st.op_stats||{})
{
const pr = prev && prev.op_stats && prev.op_stats[op];
let c = st.op_stats[op];
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
const pr = prev && prev.subop_stats && prev.subop_stats[op];
let c = st.subop_stats[op];
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
let c = st.recovery_stats[op];
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
for (const pool_id in st.inode_stats||{})
{
diff.inode_stats[pool_id] = {};
for (const inode_num in st.inode_stats[pool_id])
{
const inode_diff = diff.inode_stats[pool_id][inode_num] = {};
for (const op of [ 'read', 'write', 'delete' ])
{
const c = st.inode_stats[pool_id][inode_num][op];
const pr = prev && prev.inode_stats && prev.inode_stats[pool_id] &&
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
inode_diff[op] = {
bps: (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff,
iops: n*1000n/timediff,
lat: (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/(n || 1n),
};
}
}
}
return diff;
}
// sum_op_stats(this.state.osd, this.prev_stats)
function sum_op_stats(all_osd, prev_stats)
{
for (const osd in all_osd.stats)
{
const cur = { ...all_osd.stats[osd], inode_stats: all_osd.inodestats[osd]||{} };
prev_stats.osd_diff[osd] = derive_osd_stats(
cur, prev_stats.osd_stats[osd], prev_stats.osd_diff[osd]
);
prev_stats.osd_stats[osd] = cur;
}
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
// Sum derived values instead of deriving summed
for (const osd in all_osd.state)
{
const derived = prev_stats.osd_diff[osd];
if (!all_osd.state[osd] || !derived)
{
continue;
}
for (const type in sum_diff)
{
for (const op in derived[type]||{})
{
for (const k in derived[type][op])
{
sum_diff[type][op] = sum_diff[type][op] || {};
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
}
}
}
}
return sum_diff;
}
// sum_object_counts(this.state, this.config)
function sum_object_counts(state, global_config)
{
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
for (const pool_id in state.pg.stats)
{
let object_size = 0;
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
{
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
{
object_size = state.osd.stats[osd_num].block_size;
break;
}
}
const pool_cfg = (state.config.pools[pool_id]||{});
if (!object_size)
{
object_size = pool_cfg.block_size || global_config.block_size || 131072;
}
if (pool_cfg.scheme !== 'replicated')
{
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
}
object_size = BigInt(object_size);
for (const pg_num in state.pg.stats[pool_id])
{
const st = state.pg.stats[pool_id][pg_num];
if (st)
{
for (const k in object_counts)
{
if (st[k+'_count'])
{
object_counts[k] += BigInt(st[k+'_count']);
object_bytes[k] += BigInt(st[k+'_count']) * object_size;
}
}
}
}
}
return { object_counts, object_bytes };
}
// sum_inode_stats(this.state, this.prev_stats)
function sum_inode_stats(state, prev_stats)
{
const inode_stats = {};
const inode_stub = () => ({
raw_used: 0n,
read: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
write: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
delete: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
});
const seen_pools = {};
for (const pool_id in state.config.pools)
{
seen_pools[pool_id] = true;
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
state.pool.stats[pool_id].used_raw_tb = 0n;
}
for (const osd_num in state.osd.space)
{
for (const pool_id in state.osd.space[osd_num])
{
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
if (!seen_pools[pool_id])
{
state.pool.stats[pool_id].used_raw_tb = 0n;
seen_pools[pool_id] = true;
}
inode_stats[pool_id] = inode_stats[pool_id] || {};
for (const inode_num in state.osd.space[osd_num][pool_id])
{
const u = BigInt(state.osd.space[osd_num][pool_id][inode_num]||0);
if (inode_num)
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
inode_stats[pool_id][inode_num].raw_used += u;
}
state.pool.stats[pool_id].used_raw_tb += u;
}
}
}
for (const pool_id in seen_pools)
{
const used = state.pool.stats[pool_id].used_raw_tb;
state.pool.stats[pool_id].used_raw_tb = Number(used)/1024/1024/1024/1024;
}
for (const osd_num in state.osd.state)
{
const ist = state.osd.inodestats[osd_num];
if (!ist || !state.osd.state[osd_num])
{
continue;
}
for (const pool_id in ist)
{
inode_stats[pool_id] = inode_stats[pool_id] || {};
for (const inode_num in ist[pool_id])
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
for (const op of [ 'read', 'write', 'delete' ])
{
inode_stats[pool_id][inode_num][op].count += BigInt(ist[pool_id][inode_num][op].count||0);
inode_stats[pool_id][inode_num][op].usec += BigInt(ist[pool_id][inode_num][op].usec||0);
inode_stats[pool_id][inode_num][op].bytes += BigInt(ist[pool_id][inode_num][op].bytes||0);
}
}
}
}
for (const osd in state.osd.state)
{
const osd_diff = prev_stats.osd_diff[osd];
if (!osd_diff || !state.osd.state[osd])
{
continue;
}
for (const pool_id in osd_diff.inode_stats)
{
for (const inode_num in prev_stats.osd_diff[osd].inode_stats[pool_id])
{
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
for (const op of [ 'read', 'write', 'delete' ])
{
const op_diff = prev_stats.osd_diff[osd].inode_stats[pool_id][inode_num][op] || {};
const op_st = inode_stats[pool_id][inode_num][op];
op_st.bps += op_diff.bps;
op_st.iops += op_diff.iops;
op_st.lat += op_diff.lat;
op_st.n_osd = (op_st.n_osd || 0) + 1;
}
}
}
}
for (const pool_id in inode_stats)
{
for (const inode_num in inode_stats[pool_id])
{
let nonzero = inode_stats[pool_id][inode_num].raw_used > 0;
for (const op of [ 'read', 'write', 'delete' ])
{
const op_st = inode_stats[pool_id][inode_num][op];
if (op_st.n_osd)
{
op_st.lat /= BigInt(op_st.n_osd);
delete op_st.n_osd;
}
if (op_st.bps > 0 || op_st.iops > 0)
nonzero = true;
}
if (!nonzero && (!state.config.inode[pool_id] || !state.config.inode[pool_id][inode_num]))
{
// Deleted inode (no data, no I/O, no config)
delete inode_stats[pool_id][inode_num];
}
}
}
return { inode_stats, seen_pools };
}
function serialize_bigints(obj)
{
obj = { ...obj };
for (const k in obj)
{
if (typeof obj[k] == 'bigint')
{
obj[k] = ''+obj[k];
}
else if (typeof obj[k] == 'object')
{
obj[k] = serialize_bigints(obj[k]);
}
}
return obj;
}
module.exports = {
derive_osd_stats,
sum_op_stats,
sum_object_counts,
sum_inode_stats,
serialize_bigints,
};

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '1.6.0'
VERSION = '1.6.1'
LOG = logging.getLogger(__name__)
@@ -707,10 +707,10 @@ class VitastorDriver(driver.CloneableImageVD,
return ({}, True)
return ({}, False)
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id):
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True)
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id, disable_sparse=False):
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True, disable_sparse=False)
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False):
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False, disable_sparse=False):
tmp_dir = volume_utils.image_conversion_dir()
with tempfile.NamedTemporaryFile(dir = tmp_dir) as tmp:
image_utils.fetch_to_raw(

View File

@@ -0,0 +1,670 @@
From 571bde71268dcca6446454bb1e895e21bcc7b2a0 Mon Sep 17 00:00:00 2001
From: ace <ace@0xace.cc>
Date: Sat, 18 May 2024 19:45:49 +0300
Subject: [PATCH] Add Vitastor support
---
include/libvirt/libvirt-storage.h | 1 +
src/conf/domain_conf.c | 4 +-
src/conf/domain_validate.c | 10 +-
src/conf/schemas/domaincommon.rng | 30 +++++
src/conf/storage_conf.c | 20 ++-
src/conf/storage_conf.h | 2 +
src/conf/storage_source_conf.c | 2 +
src/conf/storage_source_conf.h | 1 +
src/conf/virstorageobj.c | 3 +
src/libvirt-storage.c | 1 +
src/libxl/libxl_conf.c | 1 +
src/libxl/xen_xl.c | 1 +
src/qemu/qemu_block.c | 45 +++++++
src/qemu/qemu_domain.c | 4 +-
src/qemu/qemu_snapshot.c | 2 +
src/storage/storage_driver.c | 1 +
.../storage_source_backingstore.c | 123 ++++++++++++++++++
src/test/test_driver.c | 1 +
.../storagepoolcapsschemadata/poolcaps-fs.xml | 7 +
.../poolcaps-full.xml | 7 +
tests/storagepoolxml2argvtest.c | 1 +
tools/virsh-pool.c | 3 +
22 files changed, 265 insertions(+), 5 deletions(-)
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
index aaad4a3da1..5f5daa8341 100644
--- a/include/libvirt/libvirt-storage.h
+++ b/include/libvirt/libvirt-storage.h
@@ -326,6 +326,7 @@ typedef enum {
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
} virConnectListAllStoragePoolsFlags;
int virConnectListAllStoragePools(virConnectPtr conn,
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 52a5796ad2..089697b2a3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7191,7 +7191,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
src->configFile = virXPathString("string(./config/@file)", ctxt);
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
src->query = virXMLPropString(node, "query");
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
@@ -30657,6 +30658,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
case VIR_STORAGE_POOL_MPATH:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index faa7659f07..01b907d60d 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_RBD:
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
}
}
- /* internal snapshots and config files are currently supported only with rbd: */
+ /* internal snapshots are currently supported only with rbd: */
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
if (src->snapshot) {
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
_("<snapshot> element is currently supported only with 'rbd' disks"));
return -1;
}
+ }
+ /* config files are currently supported only with rbd and vitastor: */
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
if (src->configFile) {
virReportError(VIR_ERR_XML_ERROR, "%s",
- _("<config> element is currently supported only with 'rbd' disks"));
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
return -1;
}
}
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index df44cd9857..4bb72fc697 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -1997,6 +1997,35 @@
</element>
</define>
+ <define name="diskSourceNetworkProtocolVitastor">
+ <element name="source">
+ <interleave>
+ <attribute name="protocol">
+ <value>vitastor</value>
+ </attribute>
+ <ref name="diskSourceCommon"/>
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <optional>
+ <attribute name="query"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="diskSourceNetworkHost"/>
+ </zeroOrMore>
+ <optional>
+ <element name="config">
+ <attribute name="file">
+ <ref name="absFilePath"/>
+ </attribute>
+ <empty/>
+ </element>
+ </optional>
+ <empty/>
+ </interleave>
+ </element>
+ </define>
+
<define name="diskSourceNetworkProtocolISCSI">
<element name="source">
<attribute name="protocol">
@@ -2347,6 +2376,7 @@
<ref name="diskSourceNetworkProtocolSimple"/>
<ref name="diskSourceNetworkProtocolVxHS"/>
<ref name="diskSourceNetworkProtocolNFS"/>
+ <ref name="diskSourceNetworkProtocolVitastor"/>
</choice>
</define>
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index 68842004b7..1d69a788b6 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
"logical", "disk", "iscsi",
"iscsi-direct", "scsi", "mpath",
"rbd", "sheepdog", "gluster",
- "zfs", "vstorage",
+ "zfs", "vstorage", "vitastor",
);
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
.formatToString = virStorageFileFormatTypeToString,
}
},
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
+ .poolOptions = {
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
+ VIR_STORAGE_POOL_SOURCE_NAME),
+ },
+ .volOptions = {
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
+ .formatFromString = virStorageVolumeFormatFromString,
+ .formatToString = virStorageFileFormatTypeToString,
+ }
+ },
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
.poolOptions = {
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
_("element 'name' is mandatory for RBD pool"));
return -1;
}
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("element 'name' is mandatory for Vitastor pool"));
+ return -1;
+ }
if (options->formatFromString) {
g_autofree char *format = NULL;
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
* files, so they don't have a target */
if (def->type != VIR_STORAGE_POOL_RBD &&
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
def->type != VIR_STORAGE_POOL_GLUSTER &&
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index fc67957cfe..720c07ef74 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -103,6 +103,7 @@ typedef enum {
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
VIR_STORAGE_POOL_ZFS, /* ZFS */
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
VIR_STORAGE_POOL_LAST,
} virStoragePoolType;
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index 959ec5ed40..e751dd4d6a 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
"ssh",
"vxhs",
"nfs",
+ "vitastor",
);
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
return 24007;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_RBD:
/* we don't provide a default for RBD */
return 0;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index 05b4bda16c..b5ed143c39 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -129,6 +129,7 @@ typedef enum {
VIR_STORAGE_NET_PROTOCOL_SSH,
VIR_STORAGE_NET_PROTOCOL_VXHS,
VIR_STORAGE_NET_PROTOCOL_NFS,
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
VIR_STORAGE_NET_PROTOCOL_LAST
} virStorageNetProtocol;
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index 59fa5da372..4739167f5f 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
return 1;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_RBD:
case VIR_STORAGE_POOL_LAST:
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index db7660aac4..561df34709 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 62e1be6672..71a1d42896 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
index f175359307..8efcf4c329 100644
--- a/src/libxl/xen_xl.c
+++ b/src/libxl/xen_xl.c
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index c9f5cbbf29..dbbac36836 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
}
+static virJSONValue *
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
+{
+ virJSONValue *ret = NULL;
+ virStorageNetHostDef *host;
+ size_t i;
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+ g_autofree char *etcd = NULL;
+
+ for (i = 0; i < src->nhosts; i++) {
+ host = src->hosts + i;
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
+ return NULL;
+ }
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
+ }
+ if (src->nhosts > 0) {
+ etcd = virBufferContentAndReset(&buf);
+ }
+
+ if (virJSONValueObjectAdd(&ret,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
+
+ return ret;
+}
+
+
static virJSONValue *
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
{
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
return NULL;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
+ return NULL;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
case VIR_STORAGE_NET_PROTOCOL_SSH:
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 341c543280..61b248fa2c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -5207,7 +5207,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
if (src->query &&
(actualType != VIR_STORAGE_TYPE_NETWORK ||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("query is supported only with HTTP(S) protocols"));
return -1;
@@ -10387,6 +10388,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
break;
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index 0cac0c4146..4955ebd8d4 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index 314fe930e0..fb615a8b4e 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
index 80681924ea..8a3ade9ec0 100644
--- a/src/storage_file/storage_source_backingstore.c
+++ b/src/storage_file/storage_source_backingstore.c
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
}
+static int
+virStorageSourceParseVitastorColonString(const char *colonstr,
+ virStorageSource *src)
+{
+ char *p, *e, *next;
+ g_autofree char *options = NULL;
+
+ /* optionally skip the "vitastor:" prefix if provided */
+ if (STRPREFIX(colonstr, "vitastor:"))
+ colonstr += strlen("vitastor:");
+
+ options = g_strdup(colonstr);
+
+ p = options;
+ while (*p) {
+ /* find : delimiter or end of string */
+ for (e = p; *e && *e != ':'; ++e) {
+ if (*e == '\\') {
+ e++;
+ if (*e == '\0')
+ break;
+ }
+ }
+ if (*e == '\0') {
+ next = e; /* last kv pair */
+ } else {
+ next = e + 1;
+ *e = '\0';
+ }
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
+ sep[1] == ';' ||
+ sep[1] == ' ')) {
+ *sep = '\0';
+ sep += 2;
+ break;
+ }
+ }
+
+ if (virStorageSourceRBDAddHost(src, h) < 0)
+ return -1;
+
+ h = sep;
+ }
+ }
+
+ p = next;
+ }
+
+ if (!src->path) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int
virStorageSourceParseNBDColonString(const char *nbdstr,
virStorageSource *src)
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
return 0;
}
+static int
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
+ virJSONValue *json,
+ const char *jsonstr G_GNUC_UNUSED,
+ int opaque G_GNUC_UNUSED)
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;
+
+ src->type = VIR_STORAGE_TYPE_NETWORK;
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
+
+ /* legacy syntax passed via 'filename' option */
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
+ return virStorageSourceParseVitastorColonString(filename, src);
+
+ if (!image) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("missing image name in Vitastor backing volume "
+ "JSON specification"));
+ return -1;
+ }
+
+ src->path = g_strdup(image);
+ src->configFile = g_strdup(conf);
+ src->query = g_strdup(etcd_prefix);
+
+ if (servers) {
+ nservers = virJSONValueArraySize(servers);
+
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
+ src->nhosts = nservers;
+
+ for (i = 0; i < nservers; i++) {
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
+ virJSONValueArrayGet(servers, i)) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
virJSONValue *json,
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index ed545848af..dbfdbe8476 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -7336,6 +7336,7 @@ testStorageVolumeTypeForPool(int pooltype)
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
return VIR_STORAGE_VOL_NETWORK;
case VIR_STORAGE_POOL_LOGICAL:
case VIR_STORAGE_POOL_DISK:
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
index eee75af746..8bd0a57bdd 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='no'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
index 805950a937..852df0de16 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='yes'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
index e8e40d695e..db55fe5f3a 100644
--- a/tests/storagepoolxml2argvtest.c
+++ b/tests/storagepoolxml2argvtest.c
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_VSTORAGE:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_LAST:
default:
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
index 36f00cf643..5f5bd3464e 100644
--- a/tools/virsh-pool.c
+++ b/tools/virsh-pool.c
@@ -1223,6 +1223,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
case VIR_STORAGE_POOL_VSTORAGE:
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
+ break;
case VIR_STORAGE_POOL_LAST:
break;
}
--
2.43.0

View File

@@ -0,0 +1,643 @@
commit 1f7e90e36b2afca0312392979b96d31951a8d66b
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
Date: Thu Jun 27 01:34:54 2024 +0300
Add Vitastor support
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
index aaad4a3da1..5f5daa8341 100644
--- a/include/libvirt/libvirt-storage.h
+++ b/include/libvirt/libvirt-storage.h
@@ -326,6 +326,7 @@ typedef enum {
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
} virConnectListAllStoragePoolsFlags;
int virConnectListAllStoragePools(virConnectPtr conn,
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index fde594f811..66537db3e3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7220,7 +7220,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
src->configFile = virXPathString("string(./config/@file)", ctxt);
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
src->query = virXMLPropString(node, "query");
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
@@ -30734,6 +30735,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
case VIR_STORAGE_POOL_MPATH:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 395e036e8f..8a0190f85b 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_RBD:
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
}
}
- /* internal snapshots and config files are currently supported only with rbd: */
+ /* internal snapshots are currently supported only with rbd: */
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
if (src->snapshot) {
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
_("<snapshot> element is currently supported only with 'rbd' disks"));
return -1;
}
+ }
+ /* config files are currently supported only with rbd and vitastor: */
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
if (src->configFile) {
virReportError(VIR_ERR_XML_ERROR, "%s",
- _("<config> element is currently supported only with 'rbd' disks"));
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
return -1;
}
}
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index a46a824f88..4c5b720643 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -1997,6 +1997,35 @@
</element>
</define>
+ <define name="diskSourceNetworkProtocolVitastor">
+ <element name="source">
+ <interleave>
+ <attribute name="protocol">
+ <value>vitastor</value>
+ </attribute>
+ <ref name="diskSourceCommon"/>
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <optional>
+ <attribute name="query"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="diskSourceNetworkHost"/>
+ </zeroOrMore>
+ <optional>
+ <element name="config">
+ <attribute name="file">
+ <ref name="absFilePath"/>
+ </attribute>
+ <empty/>
+ </element>
+ </optional>
+ <empty/>
+ </interleave>
+ </element>
+ </define>
+
<define name="diskSourceNetworkProtocolISCSI">
<element name="source">
<attribute name="protocol">
@@ -2347,6 +2376,7 @@
<ref name="diskSourceNetworkProtocolSimple"/>
<ref name="diskSourceNetworkProtocolVxHS"/>
<ref name="diskSourceNetworkProtocolNFS"/>
+ <ref name="diskSourceNetworkProtocolVitastor"/>
</choice>
</define>
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index 68842004b7..1d69a788b6 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
"logical", "disk", "iscsi",
"iscsi-direct", "scsi", "mpath",
"rbd", "sheepdog", "gluster",
- "zfs", "vstorage",
+ "zfs", "vstorage", "vitastor",
);
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
.formatToString = virStorageFileFormatTypeToString,
}
},
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
+ .poolOptions = {
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
+ VIR_STORAGE_POOL_SOURCE_NAME),
+ },
+ .volOptions = {
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
+ .formatFromString = virStorageVolumeFormatFromString,
+ .formatToString = virStorageFileFormatTypeToString,
+ }
+ },
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
.poolOptions = {
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
_("element 'name' is mandatory for RBD pool"));
return -1;
}
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("element 'name' is mandatory for Vitastor pool"));
+ return -1;
+ }
if (options->formatFromString) {
g_autofree char *format = NULL;
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
* files, so they don't have a target */
if (def->type != VIR_STORAGE_POOL_RBD &&
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
def->type != VIR_STORAGE_POOL_GLUSTER &&
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index fc67957cfe..720c07ef74 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -103,6 +103,7 @@ typedef enum {
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
VIR_STORAGE_POOL_ZFS, /* ZFS */
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
VIR_STORAGE_POOL_LAST,
} virStoragePoolType;
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index 959ec5ed40..e751dd4d6a 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
"ssh",
"vxhs",
"nfs",
+ "vitastor",
);
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
return 24007;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_RBD:
/* we don't provide a default for RBD */
return 0;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index 05b4bda16c..b5ed143c39 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -129,6 +129,7 @@ typedef enum {
VIR_STORAGE_NET_PROTOCOL_SSH,
VIR_STORAGE_NET_PROTOCOL_VXHS,
VIR_STORAGE_NET_PROTOCOL_NFS,
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
VIR_STORAGE_NET_PROTOCOL_LAST
} virStorageNetProtocol;
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index 59fa5da372..4739167f5f 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
return 1;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_RBD:
case VIR_STORAGE_POOL_LAST:
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index db7660aac4..561df34709 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 62e1be6672..71a1d42896 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
index 53f6871efc..c34b8cee1a 100644
--- a/src/libxl/xen_xl.c
+++ b/src/libxl/xen_xl.c
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index 738b72d7ea..5dd082fc89 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
}
+static virJSONValue *
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
+{
+ virJSONValue *ret = NULL;
+ virStorageNetHostDef *host;
+ size_t i;
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+ g_autofree char *etcd = NULL;
+
+ for (i = 0; i < src->nhosts; i++) {
+ host = src->hosts + i;
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
+ return NULL;
+ }
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
+ }
+ if (src->nhosts > 0) {
+ etcd = virBufferContentAndReset(&buf);
+ }
+
+ if (virJSONValueObjectAdd(&ret,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
+
+ return ret;
+}
+
+
static virJSONValue *
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
{
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
return NULL;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
+ return NULL;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
case VIR_STORAGE_NET_PROTOCOL_SSH:
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index bda62f2e5c..84b4e5f2b8 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -5260,7 +5260,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
if (src->query &&
(actualType != VIR_STORAGE_TYPE_NETWORK ||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("query is supported only with HTTP(S) protocols"));
return -1;
@@ -10514,6 +10515,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
break;
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index f5260c4a22..2f9d8406fe 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index 86c03762d2..630c6eff1a 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
index 80681924ea..8a3ade9ec0 100644
--- a/src/storage_file/storage_source_backingstore.c
+++ b/src/storage_file/storage_source_backingstore.c
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
}
+static int
+virStorageSourceParseVitastorColonString(const char *colonstr,
+ virStorageSource *src)
+{
+ char *p, *e, *next;
+ g_autofree char *options = NULL;
+
+ /* optionally skip the "vitastor:" prefix if provided */
+ if (STRPREFIX(colonstr, "vitastor:"))
+ colonstr += strlen("vitastor:");
+
+ options = g_strdup(colonstr);
+
+ p = options;
+ while (*p) {
+ /* find : delimiter or end of string */
+ for (e = p; *e && *e != ':'; ++e) {
+ if (*e == '\\') {
+ e++;
+ if (*e == '\0')
+ break;
+ }
+ }
+ if (*e == '\0') {
+ next = e; /* last kv pair */
+ } else {
+ next = e + 1;
+ *e = '\0';
+ }
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
+ sep[1] == ';' ||
+ sep[1] == ' ')) {
+ *sep = '\0';
+ sep += 2;
+ break;
+ }
+ }
+
+ if (virStorageSourceRBDAddHost(src, h) < 0)
+ return -1;
+
+ h = sep;
+ }
+ }
+
+ p = next;
+ }
+
+ if (!src->path) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int
virStorageSourceParseNBDColonString(const char *nbdstr,
virStorageSource *src)
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
return 0;
}
+static int
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
+ virJSONValue *json,
+ const char *jsonstr G_GNUC_UNUSED,
+ int opaque G_GNUC_UNUSED)
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;
+
+ src->type = VIR_STORAGE_TYPE_NETWORK;
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
+
+ /* legacy syntax passed via 'filename' option */
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
+ return virStorageSourceParseVitastorColonString(filename, src);
+
+ if (!image) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("missing image name in Vitastor backing volume "
+ "JSON specification"));
+ return -1;
+ }
+
+ src->path = g_strdup(image);
+ src->configFile = g_strdup(conf);
+ src->query = g_strdup(etcd_prefix);
+
+ if (servers) {
+ nservers = virJSONValueArraySize(servers);
+
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
+ src->nhosts = nservers;
+
+ for (i = 0; i < nservers; i++) {
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
+ virJSONValueArrayGet(servers, i)) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
virJSONValue *json,
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index d2d1bc43e3..31a92e4a01 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -7339,6 +7339,7 @@ testStorageVolumeTypeForPool(int pooltype)
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
return VIR_STORAGE_VOL_NETWORK;
case VIR_STORAGE_POOL_LOGICAL:
case VIR_STORAGE_POOL_DISK:
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
index eee75af746..8bd0a57bdd 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='no'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
index 805950a937..852df0de16 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='yes'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
index e8e40d695e..db55fe5f3a 100644
--- a/tests/storagepoolxml2argvtest.c
+++ b/tests/storagepoolxml2argvtest.c
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_VSTORAGE:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_LAST:
default:
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
index f9aad8ded0..64704b4288 100644
--- a/tools/virsh-pool.c
+++ b/tools/virsh-pool.c
@@ -1187,6 +1187,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
case VIR_STORAGE_POOL_VSTORAGE:
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
+ break;
case VIR_STORAGE_POOL_LAST:
break;
}

288
patches/nova-28.diff Normal file
View File

@@ -0,0 +1,288 @@
diff --git a/nova/virt/image/model.py b/nova/virt/image/model.py
index 971f7e9c07..ec3fca72cb 100644
--- a/nova/virt/image/model.py
+++ b/nova/virt/image/model.py
@@ -129,3 +129,22 @@ class RBDImage(Image):
self.user = user
self.password = password
self.servers = servers
+
+
+class VitastorImage(Image):
+ """Class for images in a remote Vitastor cluster"""
+
+ def __init__(self, name, etcd_address = None, etcd_prefix = None, config_path = None):
+ """Create a new Vitastor image object
+
+ :param name: name of the image
+ :param etcd_address: etcd URL(s) (optional)
+ :param etcd_prefix: etcd prefix (optional)
+ :param config_path: path to the configuration (optional)
+ """
+ super(VitastorImage, self).__init__(FORMAT_RAW)
+
+ self.name = name
+ self.etcd_address = etcd_address
+ self.etcd_prefix = etcd_prefix
+ self.config_path = config_path
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 5358f3766a..ebe3d6effb 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -41,7 +41,7 @@ IMAGE_API = glance.API()
def qemu_img_info(path, format=None):
"""Return an object containing the parsed output from qemu-img info."""
- if not os.path.exists(path) and not path.startswith('rbd:'):
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
raise exception.DiskNotFound(location=path)
info = nova.privsep.qemu.unprivileged_qemu_img_info(path, format=format)
@@ -50,7 +50,7 @@ def qemu_img_info(path, format=None):
def privileged_qemu_img_info(path, format=None, output_format='json'):
"""Return an object containing the parsed output from qemu-img info."""
- if not os.path.exists(path) and not path.startswith('rbd:'):
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
raise exception.DiskNotFound(location=path)
info = nova.privsep.qemu.privileged_qemu_img_info(path, format=format)
diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
index f9475776b3..a2e18aab67 100644
--- a/nova/virt/libvirt/config.py
+++ b/nova/virt/libvirt/config.py
@@ -1060,6 +1060,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
self.driver_iommu = False
self.source_path = None
self.source_protocol = None
+ self.source_query = None
+ self.source_config = None
self.source_name = None
self.source_hosts = []
self.source_ports = []
@@ -1189,6 +1191,10 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
source = etree.Element("source", protocol=self.source_protocol)
if self.source_name is not None:
source.set('name', self.source_name)
+ if self.source_query is not None:
+ source.set('query', self.source_query)
+ if self.source_config is not None:
+ source.append(etree.Element('config', file=self.source_config))
hosts_info = zip(self.source_hosts, self.source_ports)
for name, port in hosts_info:
host = etree.Element('host', name=name)
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 391231c527..f38faa1608 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -179,6 +179,7 @@ VOLUME_DRIVERS = {
'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
+ 'vitastor': 'nova.virt.libvirt.volume.vitastor.LibvirtVitastorVolumeDriver',
'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501
@@ -385,10 +386,10 @@ class LibvirtDriver(driver.ComputeDriver):
# This prevents the risk of one test setting a capability
# which bleeds over into other tests.
- # LVM and RBD require raw images. If we are not configured to
+ # LVM, RBD, Vitastor require raw images. If we are not configured to
# force convert images into raw format, then we _require_ raw
# images only.
- raw_only = ('rbd', 'lvm')
+ raw_only = ('rbd', 'lvm', 'vitastor')
requires_raw_image = (CONF.libvirt.images_type in raw_only and
not CONF.force_raw_images)
requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
@@ -775,12 +776,12 @@ class LibvirtDriver(driver.ComputeDriver):
# Some imagebackends are only able to import raw disk images,
# and will fail if given any other format. See the bug
# https://bugs.launchpad.net/nova/+bug/1816686 for more details.
- if CONF.libvirt.images_type in ('rbd',):
+ if CONF.libvirt.images_type in ('rbd', 'vitastor'):
if not CONF.force_raw_images:
msg = _("'[DEFAULT]/force_raw_images = False' is not "
- "allowed with '[libvirt]/images_type = rbd'. "
+ "allowed with '[libvirt]/images_type = rbd' or 'vitastor'. "
"Please check the two configs and if you really "
- "do want to use rbd as images_type, set "
+ "do want to use rbd or vitastor as images_type, set "
"force_raw_images to True.")
raise exception.InvalidConfiguration(msg)
@@ -2603,6 +2604,16 @@ class LibvirtDriver(driver.ComputeDriver):
if connection_info['data'].get('auth_enabled'):
username = connection_info['data']['auth_username']
path = f"rbd:{volume_name}:id={username}"
+ elif connection_info['driver_volume_type'] == 'vitastor':
+ volume_name = connection_info['data']['name']
+ path = 'vitastor:image='+volume_name.replace(':', '\\:')
+ for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
+ if k in connection_info['data']:
+ kk = k
+ if kk == 'etcd_address':
+ # FIXME use etcd_address in qemu driver
+ kk = 'etcd_host'
+ path += ":"+kk.replace('_', '-')+"="+connection_info['data'][k].replace(':', '\\:')
else:
path = 'unknown'
raise exception.DiskNotFound(location='unknown')
@@ -2827,8 +2838,8 @@ class LibvirtDriver(driver.ComputeDriver):
image_format = CONF.libvirt.snapshot_image_format or source_type
- # NOTE(bfilippov): save lvm and rbd as raw
- if image_format == 'lvm' or image_format == 'rbd':
+ # NOTE(bfilippov): save lvm and rbd and vitastor as raw
+ if image_format == 'lvm' or image_format == 'rbd' or image_format == 'vitastor':
image_format = 'raw'
metadata = self._create_snapshot_metadata(instance.image_meta,
@@ -2899,7 +2910,7 @@ class LibvirtDriver(driver.ComputeDriver):
expected_state=task_states.IMAGE_UPLOADING)
# TODO(nic): possibly abstract this out to the root_disk
- if source_type == 'rbd' and live_snapshot:
+ if (source_type == 'rbd' or source_type == 'vitastor') and live_snapshot:
# Standard snapshot uses qemu-img convert from RBD which is
# not safe to run with live_snapshot.
live_snapshot = False
@@ -4099,7 +4110,7 @@ class LibvirtDriver(driver.ComputeDriver):
# cleanup rescue volume
lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
if lvmdisk.endswith('.rescue')])
- if CONF.libvirt.images_type == 'rbd':
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
filter_fn = lambda disk: (disk.startswith(instance.uuid) and
disk.endswith('.rescue'))
rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
@@ -4356,6 +4367,8 @@ class LibvirtDriver(driver.ComputeDriver):
# TODO(mikal): there is a bug here if images_type has
# changed since creation of the instance, but I am pretty
# sure that this bug already exists.
+ if CONF.libvirt.images_type == 'vitastor':
+ return 'vitastor'
return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
@staticmethod
@@ -4764,10 +4777,10 @@ class LibvirtDriver(driver.ComputeDriver):
finally:
# NOTE(mikal): if the config drive was imported into RBD,
# then we no longer need the local copy
- if CONF.libvirt.images_type == 'rbd':
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
LOG.info('Deleting local config drive %(path)s '
- 'because it was imported into RBD.',
- {'path': config_disk_local_path},
+ 'because it was imported into %(type).',
+ {'path': config_disk_local_path, 'type': CONF.libvirt.images_type},
instance=instance)
os.unlink(config_disk_local_path)
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index da2a6e8b8a..52c02e72f1 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -340,6 +340,10 @@ def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
disk_path = disk.source_name
if disk_path:
disk_path = 'rbd:' + disk_path
+ elif not disk_path and disk.source_protocol == 'vitastor':
+ disk_path = disk.source_name
+ if disk_path:
+ disk_path = 'vitastor:' + disk_path
if not disk_path:
raise RuntimeError(_("Can't retrieve root device path "
@@ -354,6 +358,8 @@ def get_disk_type_from_path(path: str) -> ty.Optional[str]:
return 'lvm'
elif path.startswith('rbd:'):
return 'rbd'
+ elif path.startswith('vitastor:'):
+ return 'vitastor'
elif (os.path.isdir(path) and
os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
return 'ploop'
diff --git a/nova/virt/libvirt/volume/vitastor.py b/nova/virt/libvirt/volume/vitastor.py
new file mode 100644
index 0000000000..0256df62c1
--- /dev/null
+++ b/nova/virt/libvirt/volume/vitastor.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2021+, Vitaliy Filippov <vitalif@yourcmc.ru>
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from os_brick import exception as os_brick_exception
+from os_brick import initiator
+from os_brick.initiator import connector
+from oslo_log import log as logging
+
+import nova.conf
+from nova import utils
+from nova.virt.libvirt.volume import volume as libvirt_volume
+
+
+CONF = nova.conf.CONF
+LOG = logging.getLogger(__name__)
+
+
+class LibvirtVitastorVolumeDriver(libvirt_volume.LibvirtBaseVolumeDriver):
+ """Driver to attach Vitastor volumes to libvirt."""
+ def __init__(self, host):
+ super(LibvirtVitastorVolumeDriver, self).__init__(host, is_block_dev=False)
+
+ def connect_volume(self, connection_info, instance):
+ pass
+
+ def disconnect_volume(self, connection_info, instance, force=False):
+ pass
+
+ def get_config(self, connection_info, disk_info):
+ """Returns xml for libvirt."""
+ conf = super(LibvirtVitastorVolumeDriver, self).get_config(connection_info, disk_info)
+ conf.source_type = 'network'
+ conf.source_protocol = 'vitastor'
+ conf.source_name = connection_info['data'].get('name')
+ conf.source_query = connection_info['data'].get('etcd_prefix') or None
+ conf.source_config = connection_info['data'].get('config_path') or None
+ conf.source_hosts = []
+ conf.source_ports = []
+ addresses = connection_info['data'].get('etcd_address', '')
+ if addresses:
+ if not isinstance(addresses, list):
+ addresses = addresses.split(',')
+ for addr in addresses:
+ if addr.startswith('https://'):
+ raise NotImplementedError('Vitastor block driver does not support SSL for etcd communication yet')
+ if addr.startswith('http://'):
+ addr = addr[7:]
+ addr = addr.rstrip('/')
+ if addr.endswith('/v3'):
+ addr = addr[0:-3]
+ p = addr.find('/')
+ if p > 0:
+ raise NotImplementedError('libvirt does not support custom URL paths for Vitastor etcd yet. Use /etc/vitastor/vitastor.conf')
+ p = addr.find(':')
+ port = '2379'
+ if p > 0:
+ port = addr[p+1:]
+ addr = addr[0:p]
+ conf.source_hosts.append(addr)
+ conf.source_ports.append(port)
+ return conf
+
+ def extend_volume(self, connection_info, instance, requested_size):
+ return requested_size

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index 59ff6d380c..abde3715c2 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -109,6 +109,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 6c77d9687d..390683ee71 100644
--- a/meson.build
+++ b/meson.build
@@ -1295,6 +1295,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'))
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -2157,6 +2177,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
@@ -4356,6 +4377,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
summary_info += {'libcap-ng support': libcap_ng}
summary_info += {'bpf support': libbpf}
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index c9baeda639..85e1df5a56 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ca390c5700..8f11ae9fa5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3201,7 +3201,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4255,6 +4255,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4713,6 +4735,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5148,6 +5171,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5370,6 +5404,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 76781f17f4..ac5fe3aa08 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -30,7 +30,7 @@
--with-suffix="qemu-kvm" \
--firmwarepath=/usr/share/qemu-firmware \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -176,6 +176,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 680fa3f581..dab422bf04 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -168,6 +168,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
@@ -445,6 +446,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-relocatable) printf "%s" -Drelocatable=true ;;

View File

@@ -0,0 +1,190 @@
diff --git a/block/meson.build b/block/meson.build
index e1f03fd773..db0cfb2321 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -114,6 +114,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 91a0aa64c6..e8bc710578 100644
--- a/meson.build
+++ b/meson.build
@@ -1452,6 +1452,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'))
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -2250,6 +2270,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
@@ -4443,6 +4464,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
summary_info += {'libcap-ng support': libcap_ng}
summary_info += {'bpf support': libbpf}
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index 0a99a059ec..16dc440118 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 746d1694c2..fb7aa4423b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3203,7 +3203,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4285,6 +4285,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4741,6 +4763,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5180,6 +5203,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5402,6 +5436,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 76781f17f4..ac5fe3aa08 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -30,7 +30,7 @@
--with-suffix="qemu-kvm" \
--firmwarepath=/usr/share/qemu-firmware \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -176,6 +176,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 680fa3f581..dab422bf04 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -168,6 +168,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
@@ -445,6 +446,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-relocatable) printf "%s" -Drelocatable=true ;;

View File

@@ -9,7 +9,7 @@ for i in "$DIR"/qemu-*-vitastor.patch "$DIR"/pve-qemu-*-vitastor.patch; do
echo '===================================================================' >> $i
echo '--- /dev/null' >> $i
echo '+++ a/block/vitastor.c' >> $i
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i
cat "$DIR"/../src/qemu_driver.c | sed 's/^/+/' >> $i
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/client/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i
cat "$DIR"/../src/client/qemu_driver.c | sed 's/^/+/' >> $i
fi
done

View File

@@ -24,4 +24,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-1.6.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.6.0$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-1.6.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.6.1$(rpm --eval '%dist').tar.gz *

View File

@@ -1,5 +1,5 @@
# This is an attempt to automatically build patched RPM specs
# More or less broken, better use *.spec.patch for now (and copy src/qemu_driver.c to SOURCES/qemu-vitastor.c)
# More or less broken, better use *.spec.patch for now (and copy src/client/qemu_driver.c to SOURCES/qemu-vitastor.c)
# Build packages for CentOS 8 inside a container
# cd ..; podman build -t qemu-el8 -v `pwd`/packages:/root/packages -f rpm/qemu-el8.Dockerfile .

View File

@@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.6.0.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.6.1.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.6.0
Version: 1.6.1
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.6.0.el7.tar.gz
Source0: vitastor-1.6.1.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -104,14 +104,15 @@ rm -rf $RPM_BUILD_ROOT
%make_install
. /opt/rh/rh-nodejs12/enable
cd mon
npm install
npm install --production
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.6.0.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.6.1.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.6.0
Version: 1.6.1
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.6.0.el8.tar.gz
Source0: vitastor-1.6.1.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -101,14 +101,15 @@ Vitastor fio drivers for benchmarking.
rm -rf $RPM_BUILD_ROOT
%make_install
cd mon
npm install
npm install --production
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files

View File

@@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.6.0.el9.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.6.1.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.6.0
Version: 1.6.1
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.6.0.el9.tar.gz
Source0: vitastor-1.6.1.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -94,14 +94,15 @@ Vitastor fio drivers for benchmarking.
rm -rf $RPM_BUILD_ROOT
%cmake_install
cd mon
npm install
npm install --production
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
%files

View File

@@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="1.6.0")
add_definitions(-DVERSION="1.6.1")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})
@@ -72,300 +72,28 @@ add_dependencies(test build_tests)
include_directories(
../
${CMAKE_SOURCE_DIR}/src/blockstore
${CMAKE_SOURCE_DIR}/src/cmd
${CMAKE_SOURCE_DIR}/src/client
${CMAKE_SOURCE_DIR}/src/disk_tool
${CMAKE_SOURCE_DIR}/src/kv
${CMAKE_SOURCE_DIR}/src/nfs
${CMAKE_SOURCE_DIR}/src/osd
${CMAKE_SOURCE_DIR}/src/test
${CMAKE_SOURCE_DIR}/src/util
/usr/include/jerasure
${LIBURING_INCLUDE_DIRS}
${IBVERBS_INCLUDE_DIRS}
)
# libvitastor_blk.so
add_library(vitastor_blk SHARED
allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_disk.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp crc32c.c ringloop.cpp
)
target_link_libraries(vitastor_blk
${LIBURING_LIBRARIES}
tcmalloc_minimal
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so
add_library(fio_vitastor_blk SHARED
fio_engine.cpp
../json11/json11.cpp
)
target_link_libraries(fio_vitastor_blk
vitastor_blk
)
endif (${WITH_FIO})
# libvitastor_common.a
set(MSGR_RDMA "")
if (IBVERBS_LIBRARIES)
set(MSGR_RDMA "msgr_rdma.cpp")
endif (IBVERBS_LIBRARIES)
add_library(vitastor_common STATIC
epoll_manager.cpp etcd_state_client.cpp messenger.cpp addr_util.cpp
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ringloop.cpp ../json11/json11.cpp
http_client.cpp osd_ops.cpp pg_states.cpp timerfd_manager.cpp str_util.cpp ${MSGR_RDMA}
)
target_compile_options(vitastor_common PUBLIC -fPIC)
# vitastor-osd
add_executable(vitastor-osd
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
osd_cluster.cpp osd_rmw.cpp osd_scrub.cpp osd_primary_describe.cpp
)
target_link_libraries(vitastor-osd
vitastor_common
vitastor_blk
Jerasure
${ISAL_LIBRARIES}
${IBVERBS_LIBRARIES}
)
if (${WITH_FIO})
# libfio_vitastor_sec.so
add_library(fio_vitastor_sec SHARED
fio_sec_osd.cpp
rw_blocking.cpp
addr_util.cpp
)
target_link_libraries(fio_vitastor_sec
tcmalloc_minimal
)
endif (${WITH_FIO})
# libvitastor_client.so
add_library(vitastor_client SHARED
cluster_client.cpp
cluster_client_list.cpp
cluster_client_wb.cpp
vitastor_c.cpp
cli_common.cpp
cli_alloc_osd.cpp
cli_status.cpp
cli_describe.cpp
cli_fix.cpp
cli_ls.cpp
cli_create.cpp
cli_modify.cpp
cli_flatten.cpp
cli_merge.cpp
cli_rm_data.cpp
cli_rm.cpp
cli_rm_osd.cpp
cli_pool_cfg.cpp
cli_pool_create.cpp
cli_pool_ls.cpp
cli_pool_modify.cpp
cli_pool_rm.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
target_link_libraries(vitastor_client
vitastor_common
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor.so
add_library(fio_vitastor SHARED
fio_cluster.cpp
)
target_link_libraries(fio_vitastor
vitastor_client
)
endif (${WITH_FIO})
# vitastor-nbd
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
add_executable(vitastor-nbd
nbd_proxy.cpp
)
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
# libvitastor_kv.so
add_library(vitastor_kv SHARED
kv_db.cpp
kv_db.h
)
target_link_libraries(vitastor_kv
vitastor_client
)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
# vitastor-kv
add_executable(vitastor-kv
kv_cli.cpp
)
target_link_libraries(vitastor-kv
vitastor_kv
)
add_executable(vitastor-kv-stress
kv_stress.cpp
)
target_link_libraries(vitastor-kv-stress
vitastor_kv
)
# vitastor-nfs
add_executable(vitastor-nfs
nfs_proxy.cpp
nfs_block.cpp
nfs_kv.cpp
nfs_kv_create.cpp
nfs_kv_getattr.cpp
nfs_kv_link.cpp
nfs_kv_lookup.cpp
nfs_kv_read.cpp
nfs_kv_readdir.cpp
nfs_kv_remove.cpp
nfs_kv_rename.cpp
nfs_kv_setattr.cpp
nfs_kv_write.cpp
nfs_fsstat.cpp
nfs_mount.cpp
nfs_portmap.cpp
sha256.c
nfs/xdr_impl.cpp
nfs/rpc_xdr.cpp
nfs/portmap_xdr.cpp
nfs/nfs_xdr.cpp
)
target_link_libraries(vitastor-nfs
vitastor_client
vitastor_kv
)
# vitastor-cli
add_executable(vitastor-cli
cli.cpp
)
target_link_libraries(vitastor-cli
vitastor_client
)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
# vitastor-disk
add_executable(vitastor-disk
disk_tool.cpp disk_simple_offsets.cpp
disk_tool_journal.cpp disk_tool_meta.cpp disk_tool_prepare.cpp disk_tool_resize.cpp disk_tool_udev.cpp disk_tool_utils.cpp disk_tool_upgrade.cpp
crc32c.c str_util.cpp ../json11/json11.cpp rw_blocking.cpp allocator.cpp ringloop.cpp blockstore_disk.cpp
)
target_link_libraries(vitastor-disk
tcmalloc_minimal
${LIBURING_LIBRARIES}
)
if (${WITH_QEMU})
# qemu_driver.so
add_library(qemu_vitastor SHARED
qemu_driver.c
)
target_compile_options(qemu_vitastor PUBLIC -DVITASTOR_SOURCE_TREE)
target_include_directories(qemu_vitastor PUBLIC
../qemu/b/qemu
../qemu/include
${GLIB_INCLUDE_DIRS}
)
target_link_libraries(qemu_vitastor
vitastor_client
)
set_target_properties(qemu_vitastor PROPERTIES
PREFIX ""
OUTPUT_NAME "block-vitastor"
)
endif (${WITH_QEMU})
### Test stubs
# stub_osd, stub_bench, osd_test
add_executable(stub_osd stub_osd.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(stub_osd tcmalloc_minimal)
add_executable(stub_bench stub_bench.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(stub_bench tcmalloc_minimal)
add_executable(osd_test osd_test.cpp rw_blocking.cpp addr_util.cpp)
target_link_libraries(osd_test tcmalloc_minimal)
# osd_rmw_test
add_executable(osd_rmw_test EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test)
add_test(NAME osd_rmw_test COMMAND osd_rmw_test)
if (ISAL_LIBRARIES)
add_executable(osd_rmw_test_je EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
target_compile_definitions(osd_rmw_test_je PUBLIC -DNO_ISAL)
target_link_libraries(osd_rmw_test_je Jerasure tcmalloc_minimal)
add_dependencies(build_tests osd_rmw_test_je)
add_test(NAME osd_rmw_test_jerasure COMMAND osd_rmw_test_je)
endif (ISAL_LIBRARIES)
# stub_uring_osd
add_executable(stub_uring_osd
stub_uring_osd.cpp
)
target_link_libraries(stub_uring_osd
vitastor_common
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
tcmalloc_minimal
)
# osd_peering_pg_test
add_executable(osd_peering_pg_test EXCLUDE_FROM_ALL osd_peering_pg_test.cpp osd_peering_pg.cpp)
target_link_libraries(osd_peering_pg_test tcmalloc_minimal)
add_dependencies(build_tests osd_peering_pg_test)
add_test(NAME osd_peering_pg_test COMMAND osd_peering_pg_test)
# test_allocator
add_executable(test_allocator EXCLUDE_FROM_ALL test_allocator.cpp allocator.cpp)
add_dependencies(build_tests test_allocator)
add_test(NAME test_allocator COMMAND test_allocator)
# test_cas
add_executable(test_cas
test_cas.cpp
)
target_link_libraries(test_cas
vitastor_client
)
# test_crc32
add_executable(test_crc32
test_crc32.cpp
)
target_link_libraries(test_crc32
vitastor_blk
)
# test_cluster_client
add_executable(test_cluster_client
EXCLUDE_FROM_ALL
test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)
add_dependencies(build_tests test_cluster_client)
add_test(NAME test_cluster_client COMMAND test_cluster_client)
## test_blockstore, test_shit
#add_executable(test_blockstore test_blockstore.cpp)
#target_link_libraries(test_blockstore blockstore)
#add_executable(test_shit test_shit.cpp osd_peering_pg.cpp)
#target_link_libraries(test_shit ${LIBURING_LIBRARIES} m)
add_subdirectory(blockstore)
add_subdirectory(cmd)
add_subdirectory(client)
add_subdirectory(disk_tool)
add_subdirectory(kv)
add_subdirectory(nfs)
add_subdirectory(osd)
add_subdirectory(test)
### Install
@@ -378,7 +106,7 @@ install(
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/vitastor.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/client/vitastor.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
if (${WITH_FIO})
install(TARGETS fio_vitastor fio_vitastor_blk fio_vitastor_sec LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif (${WITH_FIO})

View File

@@ -0,0 +1,27 @@
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
# libvitastor_blk.so
add_library(vitastor_blk SHARED
../util/allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_disk.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp ../util/crc32c.c ../util/ringloop.cpp
)
target_link_libraries(vitastor_blk
${LIBURING_LIBRARIES}
tcmalloc_minimal
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so
add_library(fio_vitastor_blk SHARED
fio_engine.cpp
../../json11/json11.cpp
)
target_link_libraries(fio_vitastor_blk
vitastor_blk
)
endif (${WITH_FIO})

View File

@@ -366,6 +366,7 @@ resume_0:
!flusher->flush_queue.size() || !flusher->dequeuing)
{
stop_flusher:
flusher->dequeuing = false;
if (flusher->trim_wanted > 0 && try_trim)
{
// Attempt forced trim
@@ -373,7 +374,6 @@ stop_flusher:
flusher->active_flushers++;
goto trim_journal;
}
flusher->dequeuing = false;
wait_state = 0;
return true;
}

95
src/client/CMakeLists.txt Normal file
View File

@@ -0,0 +1,95 @@
cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
# libvitastor_common.a
set(MSGR_RDMA "")
if (IBVERBS_LIBRARIES)
set(MSGR_RDMA "msgr_rdma.cpp")
endif (IBVERBS_LIBRARIES)
add_library(vitastor_common STATIC
../util/epoll_manager.cpp etcd_state_client.cpp messenger.cpp ../util/addr_util.cpp
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ../util/ringloop.cpp ../../json11/json11.cpp
http_client.cpp osd_ops.cpp pg_states.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ${MSGR_RDMA}
)
target_compile_options(vitastor_common PUBLIC -fPIC)
# libvitastor_client.so
add_library(vitastor_client SHARED
cluster_client.cpp
cluster_client_list.cpp
cluster_client_wb.cpp
vitastor_c.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "client/vitastor_c.h")
target_link_libraries(vitastor_client
vitastor_common
vitastor_cli
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
if (${WITH_FIO})
# libfio_vitastor.so
add_library(fio_vitastor SHARED
fio_cluster.cpp
)
target_link_libraries(fio_vitastor
vitastor_client
)
# libfio_vitastor_sec.so
add_library(fio_vitastor_sec SHARED
fio_sec_osd.cpp
../util/rw_blocking.cpp
../util/addr_util.cpp
)
target_link_libraries(fio_vitastor_sec
tcmalloc_minimal
)
endif (${WITH_FIO})
# vitastor-nbd
pkg_check_modules(NL3 libnl-3.0 libnl-genl-3.0)
add_executable(vitastor-nbd
nbd_proxy.cpp
)
target_include_directories(vitastor-nbd PUBLIC ${NL3_INCLUDE_DIRS})
target_link_libraries(vitastor-nbd vitastor_client ${NL3_LIBRARIES})
if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
if (${WITH_QEMU})
# qemu_driver.so
add_library(qemu_vitastor SHARED
qemu_driver.c
)
target_compile_options(qemu_vitastor PUBLIC -DVITASTOR_SOURCE_TREE)
target_include_directories(qemu_vitastor PUBLIC
../../qemu/b/qemu
../../qemu/include
${GLIB_INCLUDE_DIRS}
)
target_link_libraries(qemu_vitastor
vitastor_client
)
set_target_properties(qemu_vitastor PROPERTIES
PREFIX ""
OUTPUT_NAME "block-vitastor"
)
endif (${WITH_QEMU})
# test_cluster_client
add_executable(test_cluster_client
EXCLUDE_FROM_ALL
../test/test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp ../test/mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ../../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client BEFORE PUBLIC ${CMAKE_SOURCE_DIR}/src/test/mock)
add_dependencies(build_tests test_cluster_client)
add_test(NAME test_cluster_client COMMAND test_cluster_client)

View File

@@ -34,7 +34,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
{
// peer_osd just dropped connection
// determine WHICH dirty_buffers are now obsolete and repeat them
if (wb->repeat_ops_for(this, peer_osd) > 0)
if (wb->repeat_ops_for(this, peer_osd, 0, 0) > 0)
{
continue_ops();
}
@@ -52,7 +52,8 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
st_cli.tfd = tfd;
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
st_cli.on_change_osd_state_hook = [this](uint64_t peer_osd) { on_change_osd_state_hook(peer_osd); };
st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_hook(changes); };
st_cli.on_change_pool_config_hook = [this]() { on_change_pool_config_hook(); };
st_cli.on_change_pg_state_hook = [this](pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary) { on_change_pg_state_hook(pool_id, pg_num, prev_primary); };
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
st_cli.on_reload_hook = [this]() { st_cli.load_global_config(); };
@@ -77,11 +78,6 @@ cluster_client_t::~cluster_client_t()
cluster_op_t::~cluster_op_t()
{
if (buf)
{
free(buf);
buf = NULL;
}
if (bitmap_buf)
{
free(bitmap_buf);
@@ -427,7 +423,7 @@ void cluster_client_t::on_load_pgs_hook(bool success)
continue_ops();
}
void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes)
void cluster_client_t::on_change_pool_config_hook()
{
for (auto pool_item: st_cli.pool_config)
{
@@ -450,6 +446,19 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
continue_ops();
}
void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary)
{
auto & pg_cfg = st_cli.pool_config[pool_id].pg_config[pg_num];
if (pg_cfg.cur_primary != prev_primary)
{
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
{
continue_ops();
}
}
}
bool cluster_client_t::get_immediate_commit(uint64_t inode)
{
if (enable_writeback)
@@ -570,6 +579,14 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
{
op->cur_inode = op->inode;
op->retval = 0;
op->state = 0;
op->retry_after = 0;
op->inflight_count = 0;
op->done_count = 0;
op->part_bitmaps = NULL;
op->bitmap_buf_size = 0;
op->prev_wait = 0;
assert(!op->prev && !op->next);
// check alignment, readonly flag and so on
if (!check_rw(op))
{
@@ -600,7 +617,9 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
{
if (!(op->flags & OP_FLUSH_BUFFER) && !op->version /* no CAS write-repeat */)
{
wb->copy_write(op, CACHE_WRITTEN);
uint64_t flush_id = ++wb->last_flush_id;
wb->copy_write(op, CACHE_REPEATING, flush_id);
op->flush_id = flush_id;
}
if (dirty_bytes >= client_max_dirty_bytes || dirty_ops >= client_max_dirty_ops)
{
@@ -816,6 +835,10 @@ resume_2:
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->inode));
op->retval = op->len / pool_cfg.bitmap_granularity;
}
if (op->flush_id)
{
wb->mark_flush_written(op->inode, op->offset, op->len, op->flush_id);
}
erase_op(op);
return 1;
}
@@ -988,6 +1011,29 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
}
}
bool cluster_client_t::affects_pg(uint64_t inode, uint64_t offset, uint64_t len, pool_id_t pool_id, pg_num_t pg_num)
{
if (INODE_POOL(inode) != pool_id)
{
return false;
}
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(inode));
uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
uint64_t pg_block_size = pool_cfg.data_block_size * pg_data_size;
uint64_t first_stripe = (offset / pg_block_size) * pg_block_size;
uint64_t last_stripe = len > 0 ? ((offset + len - 1) / pg_block_size) * pg_block_size : first_stripe;
if ((last_stripe/pool_cfg.pg_stripe_size) - (first_stripe/pool_cfg.pg_stripe_size) + 1 >= pool_cfg.real_pg_count)
{
// All PGs are affected
return true;
}
pg_num_t first_pg_num = (first_stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
pg_num_t last_pg_num = (last_stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
return (first_pg_num <= last_pg_num
? (pg_num >= first_pg_num && pg_num <= last_pg_num)
: (pg_num >= first_pg_num || pg_num <= last_pg_num));
}
bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd)
{
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(inode));
@@ -1210,7 +1256,9 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs
// FIXME postpone such things to set_immediate here to avoid bugs
// Set op->retry_after to retry operation after a short pause (not immediately)
if (!op->retry_after)
if (!op->retry_after && (op->retval == -EPIPE ||
op->retval == -EIO && client_eio_retry_interval ||
op->retval == -ENOSPC && client_retry_enospc))
{
op->retry_after = op->retval != -EPIPE ? client_eio_retry_interval : client_retry_interval;
}

View File

@@ -56,8 +56,6 @@ struct cluster_op_t
protected:
int state = 0;
uint64_t cur_inode; // for snapshot reads
void *buf = NULL;
cluster_op_t *orig_op = NULL;
bool needs_reslice = false;
int retry_after = 0;
int inflight_count = 0, done_count = 0;
@@ -66,6 +64,7 @@ protected:
unsigned bitmap_buf_size = 0;
cluster_op_t *prev = NULL, *next = NULL;
int prev_wait = 0;
uint64_t flush_id = 0;
friend class cluster_client_t;
friend class writeback_cache_t;
};
@@ -81,6 +80,7 @@ class cluster_client_t
ring_loop_t *ringloop;
std::map<pool_id_t, uint64_t> pg_counts;
std::map<pool_pg_num_t, osd_num_t> pg_primary;
// client_max_dirty_* is actually "max unsynced", for the case when immediate_commit is off
uint64_t client_max_dirty_bytes = 0;
uint64_t client_max_dirty_ops = 0;
@@ -146,9 +146,11 @@ public:
protected:
bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
bool affects_pg(uint64_t inode, uint64_t offset, uint64_t len, pool_id_t pool_id, pg_num_t pg_num);
void on_load_config_hook(json11::Json::object & config);
void on_load_pgs_hook(bool success);
void on_change_hook(std::map<std::string, etcd_kv_t> & changes);
void on_change_pool_config_hook();
void on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary);
void on_change_osd_state_hook(uint64_t peer_osd);
void execute_internal(cluster_op_t *op);
void unshift_op(cluster_op_t *op);

View File

@@ -46,11 +46,12 @@ public:
bool is_left_merged(dirty_buf_it_t dirty_it);
bool is_right_merged(dirty_buf_it_t dirty_it);
bool is_merged(const dirty_buf_it_t & dirty_it);
void copy_write(cluster_op_t *op, int state);
int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd);
void copy_write(cluster_op_t *op, int state, uint64_t new_flush_id = 0);
int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd, pool_id_t pool_id, pg_num_t pg_num);
void start_writebacks(cluster_client_t *cli, int count);
bool read_from_cache(cluster_op_t *op, uint32_t bitmap_granularity);
void flush_buffers(cluster_client_t *cli, dirty_buf_it_t from_it, dirty_buf_it_t to_it);
void mark_flush_written(uint64_t inode, uint64_t offset, uint64_t len, uint64_t flush_id);
void fsync_start();
void fsync_error();
void fsync_ok();

View File

@@ -71,7 +71,7 @@ bool writeback_cache_t::is_merged(const dirty_buf_it_t & dirty_it)
return is_left_merged(dirty_it) || is_right_merged(dirty_it);
}
void writeback_cache_t::copy_write(cluster_op_t *op, int state)
void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flush_id)
{
// Save operation for replay when one of PGs goes out of sync
// (primary OSD drops our connection in this case)
@@ -180,6 +180,7 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state)
.buf = buf,
.len = op->len,
.state = state,
.flush_id = new_flush_id,
.refcnt = refcnt,
});
if (state == CACHE_DIRTY)
@@ -208,7 +209,7 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state)
}
}
int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd)
int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd, pool_id_t pool_id, pg_num_t pg_num)
{
int repeated = 0;
if (dirty_buffers.size())
@@ -218,8 +219,11 @@ int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd)
for (auto wr_it = dirty_buffers.begin(), flush_it = wr_it, last_it = wr_it; ; )
{
bool end = wr_it == dirty_buffers.end();
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING &&
cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING;
if (peer_osd)
flush_this = flush_this && cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
if (pool_id && pg_num)
flush_this = flush_this && cli->affects_pg(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, pool_id, pg_num);
if (flush_it != wr_it && (end || !flush_this ||
wr_it->first.inode != flush_it->first.inode ||
wr_it->first.stripe != last_it->first.stripe+last_it->second.len))
@@ -265,7 +269,7 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
writebacks_active++;
op->callback = [this, flush_id](cluster_op_t* op)
{
// Buffer flushes should be always retried, regardless of the error,
// Buffer flushes are always retried, regardless of the error,
// so they should never result in an error here
assert(op->retval == op->len);
for (auto fl_it = flushed_buffers.find(flush_id);
@@ -277,16 +281,7 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
}
flushed_buffers.erase(fl_it++);
}
for (auto dirty_it = find_dirty(op->inode, op->offset);
dirty_it != dirty_buffers.end() && dirty_it->first.inode == op->inode &&
dirty_it->first.stripe < op->offset+op->len; dirty_it++)
{
if (dirty_it->second.flush_id == flush_id && dirty_it->second.state == CACHE_REPEATING)
{
dirty_it->second.flush_id = 0;
dirty_it->second.state = CACHE_WRITTEN;
}
}
mark_flush_written(op->inode, op->offset, op->len, flush_id);
delete op;
writebacks_active--;
// We can't call execute_internal because it affects an invalid copy of the list here
@@ -304,6 +299,20 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
}
}
void writeback_cache_t::mark_flush_written(uint64_t inode, uint64_t offset, uint64_t len, uint64_t flush_id)
{
for (auto dirty_it = find_dirty(inode, offset);
dirty_it != dirty_buffers.end() && dirty_it->first.inode == inode &&
dirty_it->first.stripe < offset+len; dirty_it++)
{
if (dirty_it->second.flush_id == flush_id && dirty_it->second.state == CACHE_REPEATING)
{
dirty_it->second.flush_id = 0;
dirty_it->second.state = CACHE_WRITTEN;
}
}
}
void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
{
if (!writeback_queue.size())

View File

@@ -253,7 +253,7 @@ void etcd_state_client_t::parse_config(const json11::Json & config)
this->etcd_ws_keepalive_interval = config["etcd_ws_keepalive_interval"].uint64_value();
if (this->etcd_ws_keepalive_interval <= 0)
{
this->etcd_ws_keepalive_interval = 30;
this->etcd_ws_keepalive_interval = 5;
}
this->max_etcd_attempts = config["max_etcd_attempts"].uint64_value();
if (this->max_etcd_attempts <= 0)
@@ -890,6 +890,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
}
}
}
if (on_change_pool_config_hook)
{
on_change_pool_config_hook();
}
}
else if (key == etcd_prefix+"/config/pgs")
{
@@ -1028,13 +1032,19 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
else if (value.is_null())
{
auto & pg_cfg = this->pool_config[pool_id].pg_config[pg_num];
auto prev_primary = pg_cfg.cur_primary;
pg_cfg.state_exists = false;
pg_cfg.cur_primary = 0;
pg_cfg.cur_state = 0;
if (on_change_pg_state_hook)
{
on_change_pg_state_hook(pool_id, pg_num, prev_primary);
}
}
else
{
auto & pg_cfg = this->pool_config[pool_id].pg_config[pg_num];
auto prev_primary = pg_cfg.cur_primary;
pg_cfg.state_exists = true;
osd_num_t cur_primary = value["primary"].uint64_value();
int state = 0;
@@ -1065,6 +1075,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
}
pg_cfg.cur_primary = cur_primary;
pg_cfg.cur_state = state;
if (on_change_pg_state_hook)
{
on_change_pg_state_hook(pool_id, pg_num, prev_primary);
}
}
}
else if (key.substr(0, etcd_prefix.length()+11) == etcd_prefix+"/osd/state/")

View File

@@ -20,9 +20,11 @@
#define MAX_DATA_BLOCK_SIZE 128*1024*1024
#define DEFAULT_BITMAP_GRANULARITY 4096
#ifndef IMMEDIATE_NONE
#define IMMEDIATE_NONE 0
#define IMMEDIATE_SMALL 1
#define IMMEDIATE_ALL 2
#endif
struct etcd_kv_t
{
@@ -101,7 +103,7 @@ protected:
void pick_next_etcd();
public:
int etcd_keepalive_timeout = 30;
int etcd_ws_keepalive_interval = 30;
int etcd_ws_keepalive_interval = 5;
int max_etcd_attempts = 5;
int etcd_quick_timeout = 1000;
int etcd_slow_timeout = 5000;
@@ -125,6 +127,8 @@ public:
std::function<void(json11::Json::object &)> on_load_config_hook;
std::function<json11::Json()> load_pgs_checks_hook;
std::function<void(bool)> on_load_pgs_hook;
std::function<void()> on_change_pool_config_hook;
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
std::function<void(osd_num_t)> on_change_osd_state_hook;
std::function<void()> on_reload_hook;

View File

@@ -271,7 +271,7 @@ void http_co_t::close_connection()
}
if (peer_fd >= 0)
{
tfd->set_fd_handler(peer_fd, false, NULL);
tfd->set_fd_handler(peer_fd, 0, NULL);
close(peer_fd);
peer_fd = -1;
}
@@ -314,7 +314,7 @@ void http_co_t::start_connection()
stackout();
return;
}
tfd->set_fd_handler(peer_fd, true, [this](int peer_fd, int epoll_events)
tfd->set_fd_handler(peer_fd, EPOLLIN|EPOLLOUT, [this](int peer_fd, int epoll_events)
{
this->epoll_events |= epoll_events;
handle_events();
@@ -372,7 +372,7 @@ void http_co_t::handle_connect_result()
}
int one = 1;
setsockopt(peer_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
tfd->set_fd_handler(peer_fd, false, [this](int peer_fd, int epoll_events)
tfd->set_fd_handler(peer_fd, EPOLLIN, [this](int peer_fd, int epoll_events)
{
this->epoll_events |= epoll_events;
handle_events();

Some files were not shown because too many files have changed in this diff Show More