Compare commits

...

79 Commits

Author SHA1 Message Date
afcab324e1 Destroy qemu driver under lock (may fix crashes after attach/detach?)
All checks were successful
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify_ec (push) Successful in 1m44s
Test / test_write_no_same (push) Successful in 12s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 37s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m22s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m23s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize (push) Successful in 17s
Test / test_resize_auto (push) Successful in 13s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_osd_tags (push) Successful in 12s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 18s
Test / test_scrub_zero_osd_2 (push) Successful in 19s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m18s
2025-03-07 13:13:27 +03:00
819f1125ae Support used_for_app instead of used_for_fs
All checks were successful
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 37s
Test / test_write_no_same (push) Successful in 12s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m28s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m26s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 14s
Test / test_heal_csum_4k_dmj (push) Successful in 2m22s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 18s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 19s
Test / test_heal_csum_4k (push) Successful in 2m21s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_nfs (push) Successful in 15s
Test / test_scrub_ec (push) Successful in 20s
Test / test_etcd_fail (push) Successful in 46s
2025-03-07 01:03:43 +03:00
108df7329f Fix PG object count statistics on deletion of non-existing objects
All checks were successful
Test / test_rebalance_verify_ec_imm (push) Successful in 1m35s
Test / test_rebalance_verify_ec (push) Successful in 1m46s
Test / test_write_no_same (push) Successful in 12s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 37s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_ec (push) Successful in 2m38s
Test / test_heal_csum_32k_dj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m22s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m17s
Test / test_resize (push) Successful in 15s
Test / test_resize_auto (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_osd_tags (push) Successful in 13s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 17s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m18s
2025-03-04 00:40:56 +03:00
d32edf6cdf Fix deletion writeback 2025-03-04 00:40:35 +03:00
dca436d7e6 Trigger event loop automatically in libvitastor_c
All checks were successful
Test / test_dd (push) Successful in 16s
Test / test_rebalance_verify_ec (push) Successful in 1m56s
Test / test_write_no_same (push) Successful in 14s
Test / test_switch_primary (push) Successful in 37s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_ec (push) Successful in 2m24s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize_auto (push) Successful in 12s
Test / test_resize (push) Successful in 17s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 20s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 2m24s
2025-03-03 00:57:09 +03:00
8129a0b4e3 Loop once after registering eventfd to prevent skipping previous events 2025-03-03 00:57:00 +03:00
704c87d512 Trigger initial epoll when adding an FD 2025-03-03 00:56:17 +03:00
10216a5fb5 Build node.js addon as a Debian package 2025-03-02 18:04:56 +03:00
3932eb7ff6 Trigger event loop once after each vitastor_c_* call
All checks were successful
Test / test_rebalance_verify_ec_imm (push) Successful in 1m42s
Test / test_rebalance_verify_ec (push) Successful in 1m54s
Test / test_write_no_same (push) Successful in 12s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m28s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m27s
Test / test_heal_csum_4k_dj (push) Successful in 2m25s
Test / test_resize (push) Successful in 20s
Test / test_resize_auto (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 19s
Test / test_osd_tags (push) Successful in 16s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 20s
Test / test_scrub_zero_osd_2 (push) Successful in 21s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m20s
2025-03-02 01:23:41 +03:00
69cbe7bbb2 Release 1.11.0
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m47s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m35s
Test / test_write_no_same (push) Successful in 12s
Test / test_switch_primary (push) Successful in 38s
Test / test_write (push) Successful in 37s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m29s
Test / test_heal_ec (push) Successful in 2m45s
Test / test_heal_csum_32k_dj (push) Successful in 2m30s
Test / test_heal_csum_32k (push) Successful in 2m29s
Test / test_heal_csum_4k_dmj (push) Successful in 2m33s
Test / test_resize (push) Successful in 19s
Test / test_heal_csum_4k_dj (push) Successful in 2m28s
Test / test_resize_auto (push) Successful in 13s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 17s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 21s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 2m27s
New features:

- Support containerized Vitastor installations: http://vitastor.io/docs/installation/docker.html
- Add new functions to the node.js binding: delete(), get_immediate_commit(), on_ready(),
  get_min_io_size(), get_max_atomic_write_size()
- S3 (Zenko Cloudserver with Vitastor support) is coming shortly and will be released separately

Bug fixes:

- Use IP-derived etcd node names in make-etcd
- Set short name of the OSD process to display in `top`
- Fix snap-create without pool_id failing when there are multiple pools
- Several bugs are fixed in the write-back cache, it should now be stable:
  - Fix incorrect snapshot reads from dirty write-back cache
  - Do not try to repeat pending writebacks on OSD reconnections
  - Fix client hangs with multiple SYNCs in the writeback queue
  - Fix client hangs do to incorrect calculation of the writeback queue size
- Several improvements for NBD mapping/unmapping:
  - Add a workaround for race condition in the Linux kernel NBD driver leading
    to vitastor-nbd sometimes breaking a previously mapped device instead of
    setting up a new one
  - Check if the device is actually mapped in vitastor-nbd unmap
  - Fix device name/number validation in vitastor-nbd
- Fix OSD crashes after starting with corrupted metadata - from now it will skip
  corrupted metadata entries and heal itself
- Fix scrubbing of misplaced objects and object state recalculation after
  vitastor-cli fix - previously, an OSD restart could be required to fix object states
- Make primary OSD distribution more stable by using murmur3 hash instead of the old pseudo-rng
- Fix monitor sometimes racing with itself - do not touch /pool/stats from stats
  aggregation if PG recheck is active
- Sort vitastor-cli ls output by name by default
- Update antietcd to 1.1.2
2025-03-01 13:39:42 +03:00
4950a1636c Allow "infinite" startup for clients if explicitly requested 2025-03-01 13:39:42 +03:00
2eb20dff28 Do not crash on io_uring initialization failure in node-vitastor 2025-03-01 13:29:48 +03:00
59f0b0427c Support containerized Vitastor installations
All checks were successful
Test / test_dd (push) Successful in 17s
Test / test_rebalance_verify_ec (push) Successful in 1m44s
Test / test_write_no_same (push) Successful in 11s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 37s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 13s
Test / test_osd_tags (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 19s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 17s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 17s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 21s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m21s
2025-02-27 20:06:15 +03:00
124162ad38 Use IP-derived etcd node names in make-etcd 2025-02-26 11:54:37 +03:00
391c92af1a Set OSD process name 2025-02-26 11:54:37 +03:00
c3d8fdd855 Fix snap-create without pool_id ID generation with multiple pools
All checks were successful
Test / test_dd (push) Successful in 17s
Test / test_rebalance_verify_ec (push) Successful in 1m40s
Test / test_write_no_same (push) Successful in 11s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 37s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_ec (push) Successful in 2m30s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m29s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize (push) Successful in 15s
Test / test_resize_auto (push) Successful in 12s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 20s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 19s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 20s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m20s
2025-02-26 11:54:28 +03:00
9ccf3af97b Add qemu-block-extra and qemu-utils 2025-02-23 15:08:16 +03:00
568a209f0d Update docker image to debian bookworm 2025-02-23 13:27:32 +03:00
b151013201 Fix snapshot reads from a dirty write-back cache
All checks were successful
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify_ec (push) Successful in 1m46s
Test / test_write_no_same (push) Successful in 13s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 37s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m17s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 17s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 2m19s
2025-02-23 02:31:19 +03:00
4a763725fe Add free() to bindiff.c 2025-02-22 16:52:19 +03:00
b8d83cd7f4 No, it's not a good idea to destroy client in the child nbd process
Some checks failed
Test / test_rebalance_verify_ec_imm (push) Failing after 59s
Test / test_write_no_same (push) Successful in 13s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m21s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 12s
Test / test_resize (push) Successful in 18s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 19s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 21s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m21s
Test / test_rebalance_verify (push) Failing after 52s
Should probably have been an obvious side effect :-)

Child process gets open file descriptors to parent's epoll/timerfd,
and it's totally OK to just close() all of them, but it's absolutely NOT
OK to run destructors - they modify the kernel state of epoll/timerfd
before destroying. So, basically, when we destroy the client in the child
process, we break it in the parent too. This also means that cluster_client_t
doesn't support fork(). :-)
2025-02-22 15:10:27 +03:00
2e9ee2fe20 Do not try to repeat pending writebacks
Some checks reported warnings
Test / test_snapshot (push) Has been cancelled
Test / test_snapshot_ec (push) Has been cancelled
Test / test_minsize_1 (push) Has been cancelled
Test / test_move_reappear (push) Has been cancelled
Test / test_rm (push) Has been cancelled
Test / test_rm_degraded (push) Has been cancelled
Test / test_snapshot_chain (push) Has been cancelled
Test / test_snapshot_chain_ec (push) Has been cancelled
Test / test_snapshot_down (push) Has been cancelled
Test / test_snapshot_down_ec (push) Has been cancelled
Test / test_splitbrain (push) Has been cancelled
Test / test_rebalance_verify (push) Has been cancelled
Test / test_rebalance_verify_imm (push) Has been cancelled
Test / test_rebalance_verify_ec (push) Has been cancelled
Test / test_rebalance_verify_ec_imm (push) Has been cancelled
Test / test_dd (push) Has been cancelled
Test / test_root_node (push) Has been cancelled
Test / test_switch_primary (push) Has been cancelled
Test / test_write (push) Has been cancelled
Test / test_write_xor (push) Has been cancelled
Test / test_write_no_same (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
Test / test_heal_antietcd (push) Has been cancelled
Test / test_heal_csum_32k_dmj (push) Has been cancelled
Test / test_heal_csum_32k_dj (push) Has been cancelled
Test / test_heal_csum_32k (push) Has been cancelled
Test / test_heal_csum_4k_dmj (push) Has been cancelled
Test / test_heal_csum_4k_dj (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
2025-02-22 14:16:44 +03:00
508ae852e4 Fix trap in test_rebalance_verify
Some checks failed
Test / test_dd (push) Successful in 16s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 33s
Test / test_write_no_same (push) Successful in 11s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_rebalance_verify_ec_imm (push) Failing after 6m26s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m25s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 12s
Test / test_resize (push) Successful in 16s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 20s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k_dj (push) Successful in 2m19s
Test / test_heal_csum_4k (push) Successful in 2m21s
2025-02-22 02:18:41 +03:00
97ee400505 Add a workaround for race condition in the Linux kernel NBD driver
Some checks failed
Test / test_switch_primary (push) Successful in 37s
Test / test_write_no_same (push) Successful in 11s
Test / test_write_xor (push) Successful in 36s
Test / test_rebalance_verify_ec_imm (push) Failing after 6m22s
Test / test_rebalance_verify_ec (push) Failing after 6m32s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m22s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m24s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_resize (push) Successful in 15s
Test / test_resize_auto (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_osd_tags (push) Successful in 12s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 15s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m26s
Test / test_heal_csum_4k (push) Successful in 2m23s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 17s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_nfs (push) Successful in 14s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 19s
Test / test_rebalance_verify (push) Failing after 18s
Do all NBD configuration in the child process, after the last fork.
Why? It's needed because there is a race condition in the Linux kernel nbd driver
in nbd_add_socket() - it saves `current` task pointer as `nbd->task_setup` and
then rechecks if the new `current` is the same. Problem is that if that process
is already dead, `current` may be freed and then replaced by another process
with the same pointer value. So the check passes and NBD allows a different process
to set up a device which is already set up. Proper fix would have to be done in the
kernel code, but the workaround is obviously to perform NBD setup from the process
which will then actually call NBD_DO_IT. That process stays alive during the whole
time of NBD device execution and the (nbd->task_setup != current) check always
works correctly, and we don't accidentally break previous NBD devices while setting
up a new device. Forking to check every device is of course rather slow, so we also
do an additional check by calling list_mapped() before searching for a free NBD device.
2025-02-21 13:17:37 +03:00
5ee4894fab Check if mapped in vitastor-nbd unmap
Some checks failed
Test / test_rebalance_verify_imm (push) Successful in 1m41s
Test / test_write (push) Successful in 33s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m46s
Test / test_write_no_same (push) Successful in 11s
Test / test_write_xor (push) Successful in 37s
Test / test_rebalance_verify_ec (push) Failing after 3m31s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m22s
Test / test_heal_antietcd (push) Successful in 2m28s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m24s
Test / test_heal_csum_32k (push) Successful in 2m28s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 12s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 16s
Test / test_heal_csum_4k_dj (push) Successful in 2m18s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_nfs (push) Successful in 14s
Test / test_scrub_ec (push) Successful in 18s
Test / test_heal_csum_4k (push) Successful in 2m20s
2025-02-21 01:28:06 +03:00
125dcafb11 Prevent OSD crashes when metadata is corrupted
All checks were successful
Test / test_write_no_same (push) Successful in 13s
Test / test_rebalance_verify_imm (push) Successful in 1m37s
Test / test_write_xor (push) Successful in 40s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m44s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m27s
Test / test_heal_csum_32k (push) Successful in 2m26s
Test / test_heal_csum_4k_dmj (push) Successful in 2m30s
Test / test_resize (push) Successful in 22s
Test / test_heal_csum_4k_dj (push) Successful in 2m30s
Test / test_resize_auto (push) Successful in 13s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 19s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 27s
Test / test_scrub_ec (push) Successful in 23s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 2m29s
Test / test_rebalance_verify (push) Successful in 2m4s
Test / test_rebalance_verify_ec (push) Successful in 2m21s
2025-02-20 02:19:32 +03:00
9f44cf71df Fix device name/number validation in vitastor-nbd
All checks were successful
Test / test_rebalance_verify (push) Successful in 1m44s
Test / test_rebalance_verify_imm (push) Successful in 1m44s
Test / test_write_no_same (push) Successful in 11s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m51s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 19s
Test / test_osd_tags (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 19s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 18s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 20s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m20s
2025-02-20 01:33:11 +03:00
df3c63ca7f Sort vitastor-cli ls by name by default 2025-02-20 01:32:49 +03:00
be66edd09f Prevent infinite loops on syncs in writeback_overflow
All checks were successful
Test / test_dd (push) Successful in 17s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m54s
Test / test_write_no_same (push) Successful in 11s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m26s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 12s
Test / test_resize (push) Successful in 18s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 21s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m22s
2025-02-19 01:44:12 +03:00
ccbc0c5928 Add assert !writeback_bytes
All checks were successful
Test / test_rebalance_verify_imm (push) Successful in 2m26s
Test / test_write (push) Successful in 43s
Test / test_rebalance_verify_ec (push) Successful in 2m40s
Test / test_write_no_same (push) Successful in 18s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m39s
Test / test_write_xor (push) Successful in 41s
Test / test_heal_pg_size_2 (push) Successful in 2m23s
Test / test_heal_ec (push) Successful in 2m24s
Test / test_heal_antietcd (push) Successful in 2m30s
Test / test_heal_csum_32k_dmj (push) Successful in 2m43s
Test / test_heal_csum_32k (push) Successful in 2m36s
Test / test_heal_csum_32k_dj (push) Successful in 2m48s
Test / test_heal_csum_4k_dmj (push) Successful in 2m33s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 16s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 13s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc_xor (push) Successful in 17s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 19s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m28s
2025-02-19 01:15:46 +03:00
78ca4538bf Fix qemu docker build for ubuntu
All checks were successful
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m37s
Test / test_write_no_same (push) Successful in 18s
Test / test_switch_primary (push) Successful in 40s
Test / test_write (push) Successful in 49s
Test / test_write_xor (push) Successful in 53s
Test / test_heal_pg_size_2 (push) Successful in 2m34s
Test / test_heal_ec (push) Successful in 2m31s
Test / test_heal_antietcd (push) Successful in 2m32s
Test / test_heal_csum_32k_dmj (push) Successful in 2m36s
Test / test_heal_csum_32k_dj (push) Successful in 2m47s
Test / test_heal_csum_32k (push) Successful in 2m39s
Test / test_heal_csum_4k_dmj (push) Successful in 2m41s
Test / test_heal_csum_4k_dj (push) Successful in 2m32s
Test / test_resize (push) Successful in 28s
Test / test_resize_auto (push) Successful in 22s
Test / test_snapshot_pool2 (push) Successful in 32s
Test / test_osd_tags (push) Successful in 17s
Test / test_enospc (push) Successful in 18s
Test / test_enospc_imm (push) Successful in 20s
Test / test_enospc_xor (push) Successful in 22s
Test / test_enospc_imm_xor (push) Successful in 29s
Test / test_scrub (push) Successful in 23s
Test / test_scrub_zero_osd_2 (push) Successful in 26s
Test / test_scrub_xor (push) Successful in 25s
Test / test_scrub_pg_size_3 (push) Successful in 26s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 24s
Test / test_nfs (push) Successful in 13s
Test / test_scrub_ec (push) Successful in 21s
Test / test_heal_csum_4k (push) Successful in 2m48s
2025-02-18 23:44:16 +03:00
86b5760ec1 Fix writeback incorrectly calculating queue size which was leading to client hangs
All checks were successful
Test / test_root_node (push) Successful in 13s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m52s
Test / test_write_no_same (push) Successful in 11s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 41s
Test / test_write_xor (push) Successful in 42s
Test / test_heal_pg_size_2 (push) Successful in 2m30s
Test / test_heal_ec (push) Successful in 2m22s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m24s
Test / test_heal_csum_32k_dj (push) Successful in 2m27s
Test / test_heal_csum_32k (push) Successful in 2m27s
Test / test_heal_csum_4k_dmj (push) Successful in 2m43s
Test / test_resize (push) Successful in 24s
Test / test_heal_csum_4k_dj (push) Successful in 2m45s
Test / test_resize_auto (push) Successful in 12s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 17s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 20s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 26s
Test / test_scrub_ec (push) Successful in 22s
Test / test_nfs (push) Successful in 18s
Test / test_heal_csum_4k (push) Successful in 2m40s
2025-02-18 23:42:55 +03:00
27f3803d2f Add vitastor_c_delete() and delete() to the node.js binding
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m56s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m55s
Test / test_write_no_same (push) Successful in 16s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 36s
Test / test_write_xor (push) Successful in 42s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m16s
Test / test_resize (push) Successful in 17s
Test / test_resize_auto (push) Successful in 12s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 15s
Test / test_enospc_xor (push) Successful in 17s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 19s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 21s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m17s
2025-02-15 18:27:17 +03:00
2ead06e126 Add ubuntu jammy to docs 2025-02-12 15:32:35 +03:00
a5d5559f8e Add get_immediate_commit() to the node.js binding 2025-02-06 01:35:48 +03:00
e8e7ba8fde Add FIXME for CAS in non-immediate_commit mode 2025-02-06 01:35:48 +03:00
6fd831a299 Add on_ready(), get_min_io_size(), get_max_atomic_write_size() to the node.js binding 2025-02-06 01:35:48 +03:00
069808dfce Fix --config_path option in docs
All checks were successful
Test / test_dd (push) Successful in 16s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m43s
Test / test_write_no_same (push) Successful in 13s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 39s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m25s
Test / test_heal_csum_4k_dj (push) Successful in 2m16s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 16s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 14s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m16s
2025-01-24 17:21:11 +03:00
bcefa42bc0 Scrub all chunks, not just 1 chunk per position
Some checks failed
Test / test_rebalance_verify_ec (push) Successful in 1m41s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m43s
Test / test_write_no_same (push) Successful in 11s
Test / test_write (push) Successful in 34s
Test / test_switch_primary (push) Successful in 36s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m18s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Failing after 2m30s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 15s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 19s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 18s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 21s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 16s
Test / test_heal_csum_4k (push) Successful in 2m19s
2025-01-23 02:02:55 +03:00
4636e02d43 Remove scheme, pg_size, pg_data_size from op_data 2025-01-23 01:20:31 +03:00
e4c7d1c147 s/3/4/ 2025-01-23 01:20:31 +03:00
a4677f3e69 Mention P5530 2025-01-23 01:20:31 +03:00
7cbf207d65 Use murmur3 to select primary OSD instead of old pseudo-rng
Some checks failed
Test / test_rebalance_verify_ec (push) Successful in 1m39s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s
Test / test_write_no_same (push) Successful in 11s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 15s
Test / test_osd_tags (push) Successful in 9s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_xor (push) Failing after 17s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m20s
Test / test_scrub_zero_osd_2 (push) Failing after 19s
2025-01-18 12:28:54 +03:00
7c9711af20 Do not touch /pool/stats from stats aggregation if PG recheck is active
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m41s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m43s
Test / test_write_no_same (push) Successful in 12s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m24s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 17s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 19s
Test / test_scrub_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m14s
2025-01-16 20:41:16 +03:00
33ef701464 Update antietcd to 1.1.2
All checks were successful
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m44s
Test / test_write_no_same (push) Successful in 12s
Test / test_write (push) Successful in 33s
Test / test_switch_primary (push) Successful in 35s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m18s
Test / test_heal_csum_32k (push) Successful in 2m14s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 10s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m10s
2025-01-04 02:13:36 +03:00
61ededa230 Release 1.10.1
All checks were successful
Test / test_rebalance_verify_ec_imm (push) Successful in 1m47s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_heal_csum_4k_dmj (push) Successful in 2m24s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize (push) Successful in 17s
Test / test_resize_auto (push) Successful in 11s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 18s
Test / test_scrub_pg_size_3 (push) Successful in 19s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m13s
Test / test_etcd_fail_antietcd (push) Successful in 41s
New features:

- Add "deleted" image flag which is set when vitastor-cli rm starts to delete an image,
  but can't delete it fully due to inactive PGs or stopped OSDs
- Support JSON output in vitastor-disk prepare and purge
- Show backfillfull pools in vitastor-cli status
- Make object listings consistent (used in vitastor-cli rm/rm-data/merge/etc).
  This means that there is now a guarantee that if a data block is present when you invoke rm,
  rm will attempt to delete it, even if rm is invoked when the PG switches state. Previously in
  such cases rm could skip and leave some objects behind as garbage, and merge probably could
  incorrectly move data between snapshots.
- Make deletions (rm/rm-data) consistent. This means that rm/rm-data will either complete
  successfully and delete all requested image data or complete with an error if some objects
  could not be deleted or if there is a possibility that some data is left on stopped OSDs.
  Previously, when some PGs or OSDs were inactive at the moment of deletion, rm-data was
  behaving incorrectly: it wasn't retrying deletions failed due to dropped OSD connections,
  it could hang waiting for PGs to activate, and it could return with a successful error
  code while some garbage was still possibly left on some OSDs. Deletions are not fully atomic
  cluster-wide yet, which means that you still have to repeat the deletion request after you
  return stopped OSDs back, but now you always know for sure if you have to repeat it.

Bug fixes:

- Fix vitastor-cli rm --exact / --matching command not working
- Finally fix "Unexpected status" in the Proxmox plugin
- Fix vitastor-cli create-snap incorrectly linking multiple snapshots in a different pool
- Fix incomplete image parent_id loop check in OSD
- Fix reads from snapshots in a different pool not working if there are more than 2 snapshots
- Fix append of VITASTOR_CONF to cmdline in the opennebula prebackup script
- Fix OSDs crashing again when the cluster is full with EC (was meant to work since 1.6.0 but didn't)
- Improve logging of subop failures
2025-01-03 16:22:09 +03:00
d9d90d3183 Fix build for debian buster 2025-01-03 16:21:56 +03:00
9dbcdbcec9 Return left_on_dead OSD list in DELETE replies and use it in rm-data
All checks were successful
Test / test_rebalance_verify_ec_imm (push) Successful in 1m48s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 37s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m26s
Test / test_heal_ec (push) Successful in 2m21s
Test / test_heal_antietcd (push) Successful in 2m21s
Test / test_heal_csum_32k_dmj (push) Successful in 2m32s
Test / test_heal_csum_32k_dj (push) Successful in 2m35s
Test / test_heal_csum_32k (push) Successful in 2m24s
Test / test_heal_csum_4k_dmj (push) Successful in 2m27s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 11s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 17s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 24s
Test / test_scrub_pg_size_3 (push) Successful in 23s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 26s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m29s
Test / test_etcd_fail_antietcd (push) Successful in 41s
2025-01-03 15:57:09 +03:00
a147f7e7dc Copy & repeat deletions too
All checks were successful
Test / test_root_node (push) Successful in 11s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m38s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m23s
Test / test_heal_csum_32k_dj (push) Successful in 2m22s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m18s
2025-01-03 00:21:52 +03:00
0e6bf66734 Add bindiff for tests
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m45s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m46s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m14s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m19s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 17s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 12s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m15s
2025-01-02 19:59:04 +03:00
ab822d3050 Support consistent listings in client (rm-data, merge and etc)
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m50s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m49s
Test / test_write_no_same (push) Successful in 12s
Test / test_write (push) Successful in 32s
Test / test_switch_primary (push) Successful in 35s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_ec (push) Successful in 2m43s
Test / test_heal_csum_32k (push) Successful in 2m17s
Test / test_heal_csum_32k_dj (push) Successful in 2m32s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 10s
Test / test_heal_csum_4k_dj (push) Successful in 2m19s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m18s
2025-01-02 18:07:12 +03:00
d5366a0767 Support listings from primary OSDs (for consistent deletions) 2025-01-02 11:07:24 +03:00
40b8a8b0da Add wait_up_timeout support to cluster_client and use it in vitastor-cli rm-data & merge
All checks were successful
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m48s
Test / test_write_no_same (push) Successful in 11s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 15s
Test / test_snapshot_pool2 (push) Successful in 15s
Test / test_osd_tags (push) Successful in 10s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m18s
2025-01-01 17:57:58 +03:00
5c5119aba4 Pass min_offset/max_offset to list_inode()
All checks were successful
Test / test_dd (push) Successful in 16s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m44s
Test / test_write_no_same (push) Successful in 10s
Test / test_write (push) Successful in 33s
Test / test_switch_primary (push) Successful in 36s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m18s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m22s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 16s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 19s
Test / test_scrub_zero_osd_2 (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m12s
2025-01-01 15:40:12 +03:00
4edda88903 Wait for OSDs to either connect or stop infinitely during listing, not for peer_connect_timeout
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m48s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m47s
Test / test_write_no_same (push) Successful in 11s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m20s
Test / test_heal_ec (push) Successful in 2m18s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m21s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 16s
Test / test_osd_tags (push) Successful in 11s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 14s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m26s
2025-01-01 15:29:42 +03:00
80dda3ca94 Remove separate list_inode_next()
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m47s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m50s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_csum_32k_dmj (push) Successful in 2m16s
Test / test_heal_antietcd (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m18s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 16s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 10s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 18s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m21s
2025-01-01 14:19:18 +03:00
c8decb32e8 Rename to client_wait_up_timeout
All checks were successful
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m48s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m23s
Test / test_heal_csum_4k_dmj (push) Successful in 2m18s
Test / test_heal_csum_4k_dj (push) Successful in 2m17s
Test / test_resize_auto (push) Successful in 11s
Test / test_resize (push) Successful in 15s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 14s
Test / test_heal_csum_4k (push) Successful in 2m21s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
2025-01-01 11:26:57 +03:00
4995592e61 Retry listings on broken OSD connections
Some checks reported warnings
Test / test_rebalance_verify_ec (push) Successful in 1m52s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m54s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 36s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_antietcd (push) Has started running
Test / test_heal_csum_32k_dj (push) Has been cancelled
Test / test_heal_csum_32k (push) Has been cancelled
Test / test_heal_csum_4k_dmj (push) Has been cancelled
Test / test_heal_csum_4k_dj (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
Test / test_resize (push) Has been cancelled
Test / test_resize_auto (push) Has been cancelled
Test / test_snapshot_pool2 (push) Has been cancelled
Test / test_osd_tags (push) Has been cancelled
Test / test_enospc (push) Has been cancelled
Test / test_enospc_xor (push) Has been cancelled
Test / test_enospc_imm (push) Has been cancelled
Test / test_enospc_imm_xor (push) Has been cancelled
Test / test_scrub (push) Has been cancelled
Test / test_scrub_zero_osd_2 (push) Has been cancelled
Test / test_scrub_xor (push) Has been cancelled
Test / test_scrub_pg_size_3 (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_csum_32k_dmj (push) Has been cancelled
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been cancelled
Test / test_scrub_ec (push) Has been cancelled
Test / test_nfs (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
2025-01-01 11:14:36 +03:00
d9f9b0bca5 Start listings consistently with the current PG state, add wait_up_timeout
Some checks reported warnings
Test / test_dd (push) Successful in 15s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m49s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m22s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k (push) Successful in 2m24s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 15s
Test / test_osd_tags (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
This still doesn't make listings 100% consistent yet; for 100% consistent
listings we have to receive listings only from the primary OSD, not from all
peer OSDs, but this issue will be fixed separately.
2025-01-01 10:58:22 +03:00
d0396267d0 Clear retry_timeout when the client is destroyed 2025-01-01 10:58:22 +03:00
b46d5db115 Support JSON output in vitastor-disk prepare and purge
Some checks failed
Test / test_rebalance_verify_ec (push) Successful in 1m50s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m50s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m16s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_resize (push) Successful in 16s
Test / test_resize_auto (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 9s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m20s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 14s
Test / test_heal_ec (push) Failing after 10m10s
2024-12-29 15:19:44 +03:00
ecd92655fe Fix rm --exact / --matching not removing one uppermost image in each chain
All checks were successful
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_no_same (push) Successful in 11s
Test / test_write_xor (push) Successful in 35s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m42s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s
Test / test_heal_csum_32k_dj (push) Successful in 2m26s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_resize (push) Successful in 14s
Test / test_resize_auto (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 15s
Test / test_osd_tags (push) Successful in 10s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 12s
Test / test_heal_csum_4k_dj (push) Successful in 2m13s
Test / test_enospc_imm_xor (push) Successful in 17s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m12s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 13s
Test / test_scrub_ec (push) Successful in 18s
Test / test_etcd_fail (push) Successful in 43s
2024-12-28 21:53:49 +03:00
383712148b Fix rm --exact / --matching not being invoked at all O_o
All checks were successful
Test / test_root_node (push) Successful in 12s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m51s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 33s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m16s
Test / test_heal_ec (push) Successful in 2m17s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k_dj (push) Successful in 2m23s
Test / test_heal_csum_32k (push) Successful in 2m12s
Test / test_heal_csum_4k_dmj (push) Successful in 2m15s
Test / test_resize (push) Successful in 15s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 9s
Test / test_osd_tags (push) Successful in 9s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 14s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 16s
Test / test_nfs (push) Successful in 12s
Test / test_heal_csum_4k (push) Successful in 2m24s
2024-12-28 21:47:00 +03:00
42d40153ff Do not intercept STDERR in Proxmox plugin (finally fixes "unexpected status"!) 2024-12-28 21:18:49 +03:00
561b36a4c1 Use revision from txn response header, not from put subresponse
Some checks failed
Test / test_rebalance_verify_ec (push) Successful in 1m41s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m41s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m16s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_ec (push) Failing after 2m22s
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m18s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_osd_tags (push) Successful in 9s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 14s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 14s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m10s
2024-12-28 21:01:15 +03:00
685af019f5 Allow :: and 0.0.0.0 as local IPs in antietcd_adapter 2024-12-28 20:52:27 +03:00
a31592d131 Print sizes in "Auto-selecting" as "4K", not "4 K"
All checks were successful
Test / test_root_node (push) Successful in 11s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m53s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m18s
Test / test_resize (push) Successful in 14s
Test / test_resize_auto (push) Successful in 9s
Test / test_snapshot_pool2 (push) Successful in 15s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm_xor (push) Successful in 14s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 12s
Test / test_heal_csum_4k (push) Successful in 2m18s
2024-12-28 19:15:23 +03:00
28b0a2597d Add a test for multiple snapshots in a second pool
All checks were successful
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m42s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 35s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 38s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_32k (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m19s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 15s
Test / test_osd_tags (push) Successful in 10s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_enospc (push) Successful in 12s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 14s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 16s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 18s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 12s
Test / test_heal_csum_4k (push) Successful in 2m15s
2024-12-28 18:57:30 +03:00
de6b345473 Fix create-snap taking parent_pool from incorrect key parent_pool_id 2024-12-28 18:53:29 +03:00
8bf52d6e96 Fix inode parent_id loop check 2024-12-28 18:40:17 +03:00
5623dca02c Fix vitastor client passing incorrect mod_revision for snapshotted images
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m37s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m41s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m20s
Test / test_heal_antietcd (push) Successful in 2m17s
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k_dj (push) Successful in 2m29s
Test / test_heal_csum_32k (push) Successful in 2m18s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_4k_dj (push) Successful in 2m23s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 18s
Test / test_scrub_xor (push) Successful in 16s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s
Test / test_scrub_ec (push) Successful in 17s
Test / test_nfs (push) Successful in 15s
Test / test_heal_csum_4k (push) Successful in 2m19s
This was leading to reads only working for the image itself and for its latest snapshot
2024-12-28 16:01:35 +03:00
abdc207297 Fix append of VITASTOR_CONF to cmdline in the opennebula prebackup script 2024-12-28 13:33:24 +03:00
044e621b62 Add test_rm_degraded to CI
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m40s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m42s
Test / test_write_no_same (push) Successful in 10s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m19s
Test / test_heal_ec (push) Successful in 2m18s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m19s
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s
Test / test_heal_csum_4k_dj (push) Successful in 2m20s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_osd_tags (push) Successful in 9s
Test / test_enospc (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc_xor (push) Successful in 15s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 14s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 14s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m11s
2024-12-27 18:31:58 +03:00
ba9aabf187 Return listing errors from list_inode_start(), abort merging and fail deletion on unsuccessfull listings 2024-12-27 18:31:21 +03:00
5c890e4a12 Fix rm-data hanging when some OSDs are inactive, add a test for it
All checks were successful
Test / test_root_node (push) Successful in 10s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 33s
Test / test_write (push) Successful in 32s
Test / test_write_xor (push) Successful in 36s
Test / test_heal_pg_size_2 (push) Successful in 2m17s
Test / test_heal_ec (push) Successful in 2m18s
Test / test_heal_antietcd (push) Successful in 2m17s
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s
Test / test_heal_csum_32k_dj (push) Successful in 2m20s
Test / test_heal_csum_4k_dmj (push) Successful in 2m13s
Test / test_heal_csum_32k (push) Successful in 2m18s
Test / test_heal_csum_4k_dj (push) Successful in 2m19s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_osd_tags (push) Successful in 10s
Test / test_enospc (push) Successful in 13s
Test / test_snapshot_pool2 (push) Successful in 18s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm (push) Successful in 12s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 14s
Test / test_scrub (push) Successful in 17s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 17s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m14s
There's also another case which also needs to be fixed - we shouldn't retry
deletions for indefinite time if an OSD is stopped during deletion
2024-12-27 16:29:33 +03:00
0b0c2afbce Implement "deleted" flag
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m33s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m34s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 34s
Test / test_write (push) Successful in 33s
Test / test_write_xor (push) Successful in 37s
Test / test_heal_pg_size_2 (push) Successful in 2m18s
Test / test_heal_ec (push) Successful in 2m17s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k_dj (push) Successful in 2m21s
Test / test_heal_csum_32k (push) Successful in 2m18s
Test / test_heal_csum_4k_dmj (push) Successful in 2m21s
Test / test_heal_csum_4k_dj (push) Successful in 2m21s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 15s
Test / test_osd_tags (push) Successful in 11s
Test / test_enospc (push) Successful in 12s
Test / test_snapshot_pool2 (push) Successful in 17s
Test / test_enospc_xor (push) Successful in 16s
Test / test_enospc_imm (push) Successful in 13s
Test / test_enospc_imm_xor (push) Successful in 16s
Test / test_scrub (push) Successful in 15s
Test / test_scrub_zero_osd_2 (push) Successful in 15s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m21s
2024-12-27 01:18:55 +03:00
651c055bd9 Show backfillfull pools in vitastor-cli status
Some checks reported warnings
Test / test_rm (push) Has been cancelled
Test / test_snapshot_chain (push) Has been cancelled
Test / test_snapshot_chain_ec (push) Has been cancelled
Test / test_snapshot_down (push) Has been cancelled
Test / test_snapshot_down_ec (push) Has been cancelled
Test / test_splitbrain (push) Has been cancelled
Test / test_rebalance_verify (push) Has been cancelled
Test / test_rebalance_verify_imm (push) Has been cancelled
Test / buildenv (push) Has been cancelled
Test / test_rebalance_verify_ec (push) Has been cancelled
Test / test_rebalance_verify_ec_imm (push) Has been cancelled
Test / test_dd (push) Has been cancelled
Test / test_root_node (push) Has been cancelled
Test / test_switch_primary (push) Has been cancelled
Test / test_write (push) Has been cancelled
Test / test_write_xor (push) Has been cancelled
Test / test_write_no_same (push) Has been cancelled
Test / test_heal_pg_size_2 (push) Has been cancelled
Test / test_heal_ec (push) Has been cancelled
Test / test_heal_antietcd (push) Has been cancelled
Test / test_heal_csum_32k_dmj (push) Has been cancelled
Test / test_heal_csum_32k_dj (push) Has been cancelled
Test / test_heal_csum_32k (push) Has been cancelled
Test / test_heal_csum_4k_dmj (push) Has been cancelled
Test / test_heal_csum_4k_dj (push) Has been cancelled
Test / test_heal_csum_4k (push) Has been cancelled
Test / test_resize (push) Has been cancelled
Test / test_resize_auto (push) Has been cancelled
Test / test_snapshot_pool2 (push) Has been cancelled
Test / test_osd_tags (push) Has been cancelled
2024-12-26 12:17:47 +03:00
42eebfc1bd Fix OSDs still crashing when the cluster is full with EC
All checks were successful
Test / test_rebalance_verify_ec (push) Successful in 1m37s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s
Test / test_write_no_same (push) Successful in 9s
Test / test_switch_primary (push) Successful in 33s
Test / test_write (push) Successful in 34s
Test / test_write_xor (push) Successful in 35s
Test / test_heal_pg_size_2 (push) Successful in 2m16s
Test / test_heal_ec (push) Successful in 2m19s
Test / test_heal_csum_32k_dmj (push) Successful in 2m13s
Test / test_heal_antietcd (push) Successful in 2m18s
Test / test_heal_csum_32k_dj (push) Successful in 2m23s
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s
Test / test_heal_csum_32k (push) Successful in 2m22s
Test / test_heal_csum_4k_dj (push) Successful in 2m22s
Test / test_resize_auto (push) Successful in 10s
Test / test_resize (push) Successful in 14s
Test / test_snapshot_pool2 (push) Successful in 16s
Test / test_osd_tags (push) Successful in 9s
Test / test_enospc (push) Successful in 11s
Test / test_enospc_imm (push) Successful in 11s
Test / test_enospc_xor (push) Successful in 14s
Test / test_enospc_imm_xor (push) Successful in 15s
Test / test_scrub (push) Successful in 16s
Test / test_scrub_zero_osd_2 (push) Successful in 16s
Test / test_scrub_xor (push) Successful in 15s
Test / test_scrub_pg_size_3 (push) Successful in 18s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s
Test / test_scrub_ec (push) Successful in 15s
Test / test_nfs (push) Successful in 13s
Test / test_heal_csum_4k (push) Successful in 2m26s
ENOSPC handling was introduced in 1.6.0 but it was not complete; now it is

P.S: See also client_retry_enospc (true by default)
2024-12-26 01:56:33 +03:00
cef98052f5 Improve logging of subop failures 2024-12-26 01:54:40 +03:00
141 changed files with 3308 additions and 1292 deletions

View File

@@ -414,6 +414,24 @@ jobs:
echo ""
done
test_rm_degraded:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm_degraded.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VITASTOR_VERSION "1.10.0")
set(VITASTOR_VERSION "1.11.0")
add_subdirectory(src)

View File

@@ -41,6 +41,7 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
- [Автор и лицензия](docs/intro/author.ru.md)
- Установка
- [Пакеты](docs/installation/packages.ru.md)
- [Docker](docs/installation/docker.ru.md)
- [Proxmox](docs/installation/proxmox.ru.md)
- [OpenNebula](docs/installation/opennebula.ru.md)
- [OpenStack](docs/installation/openstack.ru.md)

View File

@@ -41,6 +41,7 @@ Read more details in the documentation. You can start from here: [Quick Start](d
- [Author and license](docs/intro/author.en.md)
- Installation
- [Packages](docs/installation/packages.en.md)
- [Docker](docs/installation/docker.en.md)
- [Proxmox](docs/installation/proxmox.en.md)
- [OpenNebula](docs/installation/opennebula.en.md)
- [OpenStack](docs/installation/openstack.en.md)

View File

@@ -1,4 +1,4 @@
VITASTOR_VERSION ?= v1.10.0
VITASTOR_VERSION ?= v1.11.0
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.10.0
image: vitalif/vitastor-csi:v1.11.0
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -121,7 +121,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.10.0
image: vitalif/vitastor-csi:v1.11.0
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.10.0"
vitastorCSIDriverVersion = "1.11.0"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (1.10.0-1) unstable; urgency=medium
vitastor (1.11.0-1) unstable; urgency=medium
* Bugfixes

11
debian/control vendored
View File

@@ -2,7 +2,10 @@ Source: vitastor
Section: admin
Priority: optional
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
node-bindings <!nocheck>, node-gyp, node-nan
Standards-Version: 4.5.0
Homepage: https://vitastor.io/
Rules-Requires-Root: no
@@ -59,3 +62,9 @@ Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
Description: Vitastor OpenNebula storage plugin
Vitastor storage plugin for OpenNebula.
Package: node-vitastor
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, node-bindings
Description: Node.js bindings for Vitastor client
Node.js native bindings for the Vitastor client library (vitastor-client).

1
debian/node-vitastor.install vendored Normal file
View File

@@ -0,0 +1 @@
usr/lib/x86_64-linux-gnu/nodejs/vitastor

View File

@@ -1,8 +1,10 @@
# Build patched QEMU for Debian inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
ARG DISTRO=debian
ARG REL=
FROM debian:$REL
FROM $DISTRO:$REL
ARG DISTRO=debian
ARG REL=
WORKDIR /root
@@ -20,8 +22,8 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep qemu
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
# To build a custom version
#RUN cp /root/packages/qemu-orig/* /root
RUN apt-get --download-only source qemu
@@ -38,9 +40,9 @@ ADD src/client/qemu_driver.c /root/qemu_driver.c
# apt-get install -y vitastor-client vitastor-client-dev quilt
RUN set -e; \
dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
apt-get update; \
apt-get install -y quilt; \
DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install quilt; \
mkdir -p /root/packages/qemu-$REL; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \

8
debian/rules vendored
View File

@@ -4,6 +4,14 @@ export DH_VERBOSE = 1
%:
dh $@
override_dh_install:
perl -pe 's!prefix=/usr!prefix='`pwd`'/debian/tmp/usr!' < obj-x86_64-linux-gnu/src/client/vitastor.pc > node-binding/vitastor.pc
cd node-binding && PKG_CONFIG_PATH=./ PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 npm install --unsafe-perm || exit 1
mkdir -p debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
cp -v node-binding/package.json node-binding/index.js node-binding/addon.cc node-binding/addon.h node-binding/client.cc node-binding/client.h debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor
cp -v node-binding/build/Release/addon.node debian/tmp/usr/lib/x86_64-linux-gnu/nodejs/vitastor/build/Release
dh_install
override_dh_installdeb:
cat debian/fio_version >> debian/vitastor-fio.substvars
[ -f debian/qemu_version ] && (cat debian/qemu_version >> debian/vitastor-qemu.substvars) || true

View File

@@ -22,7 +22,8 @@ RUN set -e -x; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update && \
apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl && \
apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
apt-get -y build-dep fio && \
apt-get --download-only source fio

View File

@@ -1,9 +1,11 @@
# Build Docker image with Vitastor packages
FROM debian:bullseye
FROM debian:bookworm
ADD vitastor.list /etc/apt/sources.list.d
ADD vitastor.gpg /etc/apt/trusted.gpg.d
ADD vitastor.pref /etc/apt/preferences.d
ADD apt.conf /etc/apt/
RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean
ADD etc/apt /etc/apt/
RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
ADD sleep.sh /usr/bin/
ADD install.sh /usr/bin/
ADD scripts /opt/scripts/
ADD etc /etc/
RUN ln -s /usr/lib/vitastor/mon/make-etcd /usr/bin/make-etcd

9
docker/Makefile Normal file
View File

@@ -0,0 +1,9 @@
VITASTOR_VERSION ?= v1.11.0
all: build push
build:
@docker build --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
push:
@docker push vitalif/vitastor:$(VITASTOR_VERSION)

View File

@@ -0,0 +1 @@
deb http://vitastor.io/debian bookworm main

View File

@@ -0,0 +1,27 @@
[Unit]
Description=Containerized etcd for Vitastor
After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
PartOf=vitastor.target
[Service]
Restart=always
Environment=GOGC=50
EnvironmentFile=/etc/vitastor/docker.conf
EnvironmentFile=/etc/vitastor/etcd.conf
SyslogIdentifier=etcd
ExecStart=bash -c 'docker run --rm -i -v /var/lib/vitastor/etcd:/data \
--log-driver none --network host $CONTAINER_OPTIONS --name vitastor-etcd \
$ETCD_IMAGE /usr/local/bin/etcd --name "$ETCD_NAME" --data-dir /data \
--snapshot-count 10000 --advertise-client-urls http://$ETCD_IP:2379 --listen-client-urls http://$ETCD_IP:2379 \
--initial-advertise-peer-urls http://$ETCD_IP:2380 --listen-peer-urls http://$ETCD_IP:2380 \
--initial-cluster-token vitastor-etcd-1 --initial-cluster "$ETCD_INITIAL_CLUSTER" \
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \
--auto-compaction-retention=10 --auto-compaction-mode=revision'
ExecStop=docker stop vitastor-etcd
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,23 @@
[Unit]
Description=Empty container for running Vitastor commands
After=network-online.target local-fs.target time-sync.target docker.service
Wants=network-online.target local-fs.target time-sync.target docker.service
PartOf=vitastor.target
[Service]
Restart=always
EnvironmentFile=/etc/vitastor/docker.conf
ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev \
--privileged --log-driver none --network host --name vitastor vitastor:$VITASTOR_VERSION \
sleep.sh'
ExecStartPost=udevadm trigger
ExecStop=docker stop vitastor
WorkingDirectory=/
PrivateTmp=false
TasksMax=infinity
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,23 @@
[Unit]
Description=Containerized Vitastor monitor
After=network-online.target local-fs.target time-sync.target docker.service
Wants=network-online.target local-fs.target time-sync.target docker.service
PartOf=vitastor.target
[Service]
Restart=always
EnvironmentFile=/etc/vitastor/docker.conf
SyslogIdentifier=vitastor-mon
ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /var/lib/vitastor:/var/lib/vitastor -v /dev:/dev \
--log-driver none --network host $CONTAINER_OPTIONS --name vitastor-mon vitastor:$VITASTOR_VERSION \
node /usr/lib/vitastor/mon/mon-main.js'
ExecStop=docker stop vitastor-mon
WorkingDirectory=/
PrivateTmp=false
TasksMax=infinity
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,27 @@
[Unit]
Description=Containerized Vitastor object storage daemon osd.%i
After=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
Wants=network-online.target local-fs.target time-sync.target docker.service vitastor-host.service
PartOf=vitastor.target
[Service]
LimitNOFILE=1048576
LimitNPROC=1048576
LimitMEMLOCK=infinity
EnvironmentFile=/etc/vitastor/docker.conf
SyslogIdentifier=vitastor-osd%i
ExecStart=bash -c 'docker run --rm -i -v /etc/vitastor:/etc/vitastor -v /dev:/dev \
$(for i in $(ls /dev/vitastor/osd%i-*); do echo --device $i:$i; done) \
--log-driver none --network host --ulimit nofile=1048576 --ulimit memlock=-1 $CONTAINER_OPTIONS --name vitastor-osd%i \
vitastor:$VITASTOR_VERSION vitastor-disk exec-osd /dev/vitastor/osd%i-data'
ExecStartPre=+docker exec vitastor vitastor-disk pre-exec /dev/vitastor/osd%i-data
ExecStop=docker stop vitastor-etcd%i
WorkingDirectory=/
PrivateTmp=false
TasksMax=infinity
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=vitastor.target

View File

@@ -0,0 +1,4 @@
[Unit]
Description=vitastor target
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,7 @@
SUBSYSTEM=="block", ENV{ID_PART_ENTRY_TYPE}=="e7009fac-a5a1-4d72-af72-53de13059903", \
OWNER="vitastor", GROUP="vitastor", \
IMPORT{program}="/usr/bin/docker exec vitastor vitastor-disk udev $devnode", \
SYMLINK+="vitastor/$env{VITASTOR_ALIAS}"
ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"

View File

@@ -0,0 +1,11 @@
#
# Configuration file for containerized Vitastor installation
# (non-Kubernetes, with systemd and udev-based orchestration)
#
# Desired Vitastor version
VITASTOR_VERSION=1.11.0
# Additional arguments for all containers
# For example, you may want to specify a custom logging driver here
CONTAINER_OPTIONS=""

View File

@@ -0,0 +1,4 @@
ETCD_IMAGE=quay.io/coreos/etcd:v3.5.18
ETCD_NAME=""
ETCD_IP=""
ETCD_INITIAL_CLUSTER=""

View File

@@ -0,0 +1,2 @@
{
}

9
docker/install.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -e
cp -urv /etc/default /host-etc/
cp -urv /etc/systemd /host-etc/
cp -urv /etc/udev /host-etc/
cp -urnv /etc/vitastor /host-etc/
cp -urnv /opt/scripts/* /host-bin/

3
docker/scripts/vitastor-cli Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
docker exec -it vitastor vitastor-cli "$@"

3
docker/scripts/vitastor-disk Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
docker exec -it vitastor vitastor-disk "$@"

3
docker/scripts/vitastor-fio Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
docker exec -it vitastor fio "$@"

3
docker/scripts/vitastor-nbd Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
docker exec -it vitastor vitastor-nbd "$@"

3
docker/sleep.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
while :; do sleep infinity; done

View File

@@ -1 +0,0 @@
deb http://vitastor.io/debian bullseye main

View File

@@ -13,7 +13,7 @@ Vitastor configuration consists of:
- [Separate OSD settings](config/pool.en.md#osd-settings)
- [Inode configuration](config/inode.en.md) i.e. image metadata like name, size and parent reference
Configuration parameters can be set in 3 places:
Configuration parameters can be set in 4 places:
- Configuration file (`/etc/vitastor/vitastor.conf` or other path)
- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
connection parameters should obviously be set in the configuration file.

View File

@@ -14,7 +14,7 @@
- [Настроек инодов](config/inode.ru.md), т.е. метаданных образов, таких, как имя, размер и ссылки на
родительский образ
Параметры конфигурации могут задаваться в 3 местах:
Параметры конфигурации могут задаваться в 4 местах:
- Файле конфигурации (`/etc/vitastor/vitastor.conf` или по другому пути)
- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
задаваться там, кроме, естественно, самих параметров соединения с etcd,

View File

@@ -13,6 +13,7 @@ affect their interaction with the cluster.
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
- [client_wait_up_timeout](#client_wait_up_timeout)
- [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback)
@@ -70,6 +71,19 @@ and clients are not blocked and just get EIO error code instead.
Retry writes on out of space errors to wait until some space is freed on
OSDs.
## client_wait_up_timeout
- Type: seconds
- Default: 16
- Can be changed online: yes
Wait for this number of seconds until PGs are up when doing operations
which require all PGs to be up. Currently only used by object listings
in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
The default value is calculated as `1 + OSD lease timeout`, which is
`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
## client_max_dirty_bytes
- Type: integer

View File

@@ -13,6 +13,7 @@
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
- [client_wait_up_timeout](#client_wait_up_timeout)
- [client_max_dirty_bytes](#client_max_dirty_bytes)
- [client_max_dirty_ops](#client_max_dirty_ops)
- [client_enable_writeback](#client_enable_writeback)
@@ -72,6 +73,19 @@ RDMA и хотите повысить пиковую производитель
Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
ожидать, пока на OSD не освободится место.
## client_wait_up_timeout
- Тип: секунды
- Значение по умолчанию: 16
- Можно менять на лету: да
Время ожидания поднятия PG при операциях, требующих активности всех PG.
В данный момент используется листингами объектов в командах, использующих
удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
## client_max_dirty_bytes
- Тип: целое число

View File

@@ -316,7 +316,7 @@ for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
decrease write performance for fast disks because page cache is an overhead
itself.
Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
(which requires disable_data_fsync) with drives having write-back cache
which can't be turned off, for example, Intel Optane. Also note that *some*
desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making

View File

@@ -43,7 +43,7 @@ Parameters:
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
- [used_for_fs](#used_for_fs)
- [used_for_app](#used_for_app)
Examples:
@@ -377,24 +377,37 @@ of the OSDs containing a data chunk for a PG.
Automatic scrubbing interval for this pool. Overrides
[global scrub_interval setting](osd.en.md#scrub_interval).
## used_for_fs
## used_for_app
- Type: string
If non-empty, the pool is marked as used for VitastorFS with metadata stored
in block image (regular Vitastor volume) named as the value of this pool parameter.
If non-empty, the pool is marked as used for a separate application, for example,
VitastorFS or S3, which allocates Vitastor volume IDs by itself and does not use
image/inode metadata in etcd.
When a pool is marked as used for VitastorFS, regular block volume creation in it
When a pool is marked as used for such app, regular block volume creation in it
is disabled (vitastor-cli refuses to create images without --force) to protect
the user from block volume and FS file ID collisions and data loss.
the user from block volume and FS/S3 volume ID collisions and data loss.
[vitastor-nfs](../usage/nfs.ru.md), in its turn, refuses to use pools not marked
Also such pools do not calculate per-inode space usage statistics in etcd because
using it for an external application implies that it may contain a very large
number of volumes and their statistics may take too much space in etcd.
Setting used_for_app to `fs:<name>` tells Vitastor that the pool is used for VitastorFS
with VitastorKV metadata base stored in a block image (regular Vitastor volume) named
`<name>`.
[vitastor-nfs](../usage/nfs.en.md), in its turn, refuses to use pools not marked
for the corresponding FS when starting. This also implies that you can use one
pool only for one VitastorFS.
The second thing that is disabled for VitastorFS pools is reporting per-inode space
usage statistics in etcd because a FS pool may store a very large number of files
and statistics for them all would take a lot of space in etcd.
If you plan to use the pool for S3, set its used_for_app to `s3:<name>`. `<name>` may
be basically anything you want (for example, `s3:standard`) - it's not validated
by Vitastor S3 components in any way.
All other values except prefixed with `fs:` or `s3:` may be used freely and don't
mean anything special for Vitastor core components. For now, you can use them as
you wish.
# Examples

View File

@@ -42,7 +42,7 @@
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
- [used_for_fs](#used_for_fs)
- [used_for_app](#used_for_app)
Примеры:
@@ -256,7 +256,7 @@ PG в Vitastor эферемерны, то есть вы можете менят
## raw_placement
- Type: string
- Тип: строка
Низкоуровневые правила генерации PG в форме DSL (доменно-специфичного языка).
Используйте, только если действительно знаете, зачем вам это надо :)
@@ -383,26 +383,39 @@ OSD с "all".
Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
## used_for_fs
## used_for_app
- Type: string
- Тип: строка
Если непусто, пул помечается как используемый для файловой системы VitastorFS с
метаданными, хранимыми в блочном образе Vitastor с именем, равным значению
этого параметра.
Если непусто, пул помечается как используемый для отдельного приложения, например,
для VitastorFS или S3, которое распределяет ID образов в пуле само и не использует
метаданные образов/инодов в etcd.
Когда пул помечается как используемый для VitastorFS, создание обычных блочных
образов в нём отключается (vitastor-cli отказывается создавать образы без --force),
чтобы защитить пользователя от коллизий ID файлов и блочных образов и, таким
образом, от потери данных.
Когда пул помечается используемым для такого приложения, создание обычных блочных
образов в нём запрещается (vitastor-cli отказывается создавать образы без --force),
чтобы защитить пользователя от коллизий ID блочных образов и томов ФС/S3, и,
таким образом, от потери данных.
Также для таких пулов отключается передача статистики в etcd по отдельным инодам,
так как использование для внешнего приложения подразумевает, что пул может содержать
очень много томов и их статистика может занять слишком много места в etcd.
Установка used_for_app в значение `fs:<name>` сообщает о том, что пул используется
для VitastorFS с базой метаданных VitastorKV, хранимой в блочном образе с именем
`<name>`.
[vitastor-nfs](../usage/nfs.ru.md), в свою очередь, при запуске отказывается
использовать для ФС пулы, не выделенные для неё. Это также означает, что один
пул может использоваться только для одной VitastorFS.
использовать для ФС пулы, не помеченные, как используемые для неё. Это также
означает, что один пул может использоваться только для одной VitastorFS.
Также для ФС-пулов отключается передача статистики в etcd по отдельным инодам,
так как ФС-пул может содержать очень много файлов и статистика по ним всем
заняла бы очень много места в etcd.
Если же вы планируете использовать пул для данных S3, установите его used_for_app
в значение `s3:<name>`, где `<name>` - любое название по вашему усмотрению
(например, `s3:standard`) - конкретное содержимое `<name>` пока никак не проверяется
компонентами Vitastor S3.
Все остальные значения used_for_app, кроме начинающихся на `fs:` или `s3:`, не
означают ничего особенного для основных компонентов Vitastor. Поэтому сейчас вы
можете использовать их свободно любым желаемым способом.
# Примеры

View File

@@ -61,6 +61,24 @@
info_ru: |
Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
ожидать, пока на OSD не освободится место.
- name: client_wait_up_timeout
type: sec
default: 16
online: true
info: |
Wait for this number of seconds until PGs are up when doing operations
which require all PGs to be up. Currently only used by object listings
in delete and merge-based commands ([vitastor-cli rm](../usage/cli.en.md#rm), merge and so on).
The default value is calculated as `1 + OSD lease timeout`, which is
`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
info_ru: |
Время ожидания поднятия PG при операциях, требующих активности всех PG.
В данный момент используется листингами объектов в командах, использующих
удаление и слияние ([vitastor-cli rm](../usage/cli.ru.md#rm), merge и подобные).
Значение по умолчанию вычисляется как `1 + время lease OSD`, равное
`1 + etcd_report_interval + max_etcd_attempts*2*etcd_quick_timeout`.
- name: client_max_dirty_bytes
type: int
default: 33554432

View File

@@ -14,8 +14,12 @@
{{../../installation/packages.en.md}}
{{../../installation/docker.en.md}}
{{../../installation/proxmox.en.md}}
{{../../installation/opennebula.en.md}}
{{../../installation/openstack.en.md}}
{{../../installation/kubernetes.en.md}}

View File

@@ -14,8 +14,12 @@
{{../../installation/packages.ru.md}}
{{../../installation/docker.ru.md}}
{{../../installation/proxmox.ru.md}}
{{../../installation/opennebula.ru.md}}
{{../../installation/openstack.ru.md}}
{{../../installation/kubernetes.ru.md}}

View File

@@ -315,7 +315,7 @@
decrease write performance for fast disks because page cache is an overhead
itself.
Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
Choose "directsync" to use [immediate_commit](layout-cluster.en.md#immediate_commit)
(which requires disable_data_fsync) with drives having write-back cache
which can't be turned off, for example, Intel Optane. Also note that *some*
desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making

View File

@@ -0,0 +1,60 @@
[Documentation](../../README.md#documentation) → Installation → Dockerized Installation
-----
[Читать на русском](docker.ru.md)
# Dockerized Installation
Vitastor may be installed in Docker/Podman. In such setups etcd, monitors and OSD
all run in containers, but everything else looks as close as possible to a usual
setup with packages:
- host network is used
- auto-start is implemented through udev and systemd
- logs are written to journald (not docker json log files)
- command-line wrapper scripts are installed to the host system to call vitastor-disk,
vitastor-cli and others through the container
Such installations may be useful when it's impossible or inconvenient to install
Vitastor from packages, for example, in exotic Linux distributions.
If you don't want just a simple containerized installation, you can also take a look
at Vitastor Kubernetes operator: https://github.com/Antilles7227/vitastor-operator
## Installing Containers
The instruction is very simple.
1. Download a Docker image of the desired version: \
`docker pull vitastor:1.10.2`
2. Install scripts to the host system: \
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:1.10.2 install.sh`
3. Reload udev rules: \
`udevadm control --reload-rules`
And you can return to [Quick Start](../intro/quickstart.en.md).
## Upgrading Containers
First make sure to check the topic [Upgrading Vitastor](../usage/admin.en.md#upgrading-vitastor)
to figure out if you need any additional steps.
Then, to upgrade a containerized installation, you just need to change the `VITASTOR_VERSION`
option in `/etc/vitastor/docker.conf` and restart all Vitastor services:
`systemctl restart vitastor.target`
## QEMU
Vitastor Docker image also contains QEMU, qemu-img and qemu-storage-daemon built with Vitastor support.
However, running QEMU in Docker is harder to setup and it depends on the used virtualization UI
(OpenNebula, Proxmox and so on). Some of them also required patched Libvirt.
That's why containerized installation of Vitastor doesn't contain a ready-made QEMU setup and it's
recommended to install QEMU from packages or build it manually.
## fio
Vitastor Docker image also contains fio and installs a wrapper called `vitastor-fio` to use it from
the host system.

View File

@@ -0,0 +1,60 @@
[Документация](../../README-ru.md#документация) → Установка → Установка в Docker
-----
[Read in English](docker.en.md)
# Установка в Docker
Vitastor можно установить в Docker/Podman. При этом etcd, мониторы и OSD запускаются
в контейнерах, но всё остальное выглядит максимально приближенно к установке из пакетов:
- используется сеть хост-системы
- для автозапуска используются udev и systemd
- журналы записываются в journald (не в json-файлы журналов docker)
- в хост-систему устанавливаются обёртки для вызова консольных инструментов vitastor-disk,
vitastor-cli и других через контейнер
Такая установка полезна тогда, когда установка из пакетов невозможна или неудобна,
например, в нестандартных Linux-дистрибутивах.
Если вам нужна не просто контейнеризованная инсталляция, вы также можете обратить внимание
на Vitastor Kubernetes-оператор: https://github.com/Antilles7227/vitastor-operator
## Установка контейнеров
Инструкция по установке максимально простая.
1. Скачайте Docker-образ желаемой версии: \
`docker pull vitastor:1.10.2`
2. Установите скрипты в хост-систему командой: \
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:1.10.2 install.sh`
3. Перезагрузите правила udev: \
`udevadm control --reload-rules`
После этого вы можете возвращаться к разделу [Быстрый старт](../intro/quickstart.ru.md).
## Обновление контейнеров
Сначала обязательно проверьте раздел [Обновление Vitastor](../usage/admin.ru.md#обновление-vitastor),
чтобы понять, не требуются ли вам какие-то дополнительные действия.
После этого для обновления Docker-инсталляции вам нужно просто поменять опцию `VITASTOR_VERSION`
в файле `/etc/vitastor/docker.conf` и перезапустить все сервисы Vitastor командой:
`systemctl restart vitastor.target`
## QEMU
В Docker-образ также входят QEMU, qemu-img и qemu-storage-daemon, собранные с поддержкой Vitastor.
Однако настроить запуск QEMU в Docker сложнее и способ запуска зависит от используемого интерфейса
виртуализации (OpenNebula, Proxmox и т.п.). Также для OpenNebula, например, требуется патченый
Libvirt.
Поэтому по умолчанию Docker-сборка пока что не включает в себя готового способа запуска QEMU
и QEMU рекомендуется устанавливать из пакетов или собирать самостоятельно.
## fio
fio также входит в Docker-контейнер vitastor, и в хост-систему устанавливается обёртка `vitastor-fio`
для запуска fio в контейнер.

View File

@@ -14,6 +14,7 @@
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
stable version from 0.9.x branch instead of 1.x
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`

View File

@@ -14,6 +14,7 @@
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`

View File

@@ -26,13 +26,13 @@
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Install Vitastor packages](../installation/packages.en.md).
- Either [install Vitastor packages](../installation/packages.en.md) or [install Vitastor in Docker](../installation/docker.en.md).
## Recommended drives
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D5-P4320, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
Intel DC-P3700/P4500/P4600, Intel D5-P4320/P5530, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
## Configure monitors
@@ -45,7 +45,8 @@ On the monitor hosts:
}
```
- Create systemd units for etcd by running: `/usr/lib/vitastor/mon/make-etcd`
- Start etcd and monitors: `systemctl enable --now etcd vitastor-mon`
Or, if you installed Vitastor in Docker, run `systemctl start vitastor-host; docker exec vitastor make-etcd`.
- Start etcd and monitors: `systemctl enable --now vitastor-etcd vitastor-mon`
## Configure OSDs

View File

@@ -26,13 +26,13 @@
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Установите пакеты Vitastor](../installation/packages.ru.md).
- Либо [установите пакеты Vitastor](../installation/packages.ru.md), либо [установите Vitastor в Docker](../installation/docker.ru.md).
## Рекомендуемые диски
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D5-P4320, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
Intel DC-P3700/P4500/P4600, Intel D5-P4320/P5530, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
## Настройте мониторы
@@ -44,8 +44,9 @@
"etcd_address": ["10.200.1.10:2379","10.200.1.11:2379","10.200.1.12:2379"]
}
```
- Инициализируйте сервисы etcd, запустив `/usr/lib/vitastor/mon/make-etcd`
- Запустите etcd и мониторы: `systemctl enable --now etcd vitastor-mon`
- Инициализируйте сервисы etcd, запустив `/usr/lib/vitastor/mon/make-etcd`.\
Либо, если вы установили Vitastor в Docker, запустите `systemctl start vitastor-host; docker exec vitastor make-etcd`.
- Запустите etcd и мониторы: `systemctl enable --now vitastor-etcd vitastor-mon`
## Настройте OSD

View File

@@ -37,7 +37,7 @@ It supports the following commands:
Global options:
```
--config_file FILE Path to Vitastor configuration file
--config_path FILE Path to Vitastor configuration file
--etcd_address URL Etcd connection address
--iodepth N Send N operations in parallel to each OSD when possible (default 32)
--parallel_osds M Work with M osds in parallel when possible (default 4)
@@ -146,6 +146,7 @@ Rename, resize image or change its readonly status. Images with children can't b
If the new size is smaller than the old size, extra data will be purged.
You should resize file system in the image, if present, before shrinking it.
* `--deleted 1|0` - Set/clear 'deleted image' flag (set automatically during unfinished deletes).
* `-f|--force` - Proceed with shrinking or setting readwrite flag even if the image has children.
* `--down-ok` - Proceed with shrinking even if some data will be left on unavailable OSDs.
@@ -221,6 +222,7 @@ Remove inode data without changing metadata.
Requires more memory, but allows to show correct removal progress.
--min-offset Purge only data starting with specified offset.
--max-offset Purge only data before specified offset.
--client_wait_up_timeout 16 Timeout for waiting until PGs are up in seconds.
```
## merge-data
@@ -396,7 +398,8 @@ Optional parameters:
| `--raw_placement <rules>` | Specify raw PG generation rules ([details](../config/pool.en.md#raw_placement)) |
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
| `--used_for_fs <name>` | Mark pool as used for VitastorFS with metadata in image <name> |
| `--used_for_app fs:<name>` | Mark pool as used for VitastorFS with metadata in image `<name>` |
| `--used_for_app s3:<name>` | Mark pool as used for S3 location with name `<name>` |
| `--pg_stripe_size <number>` | Increase object grouping stripe |
| `--max_osd_combinations 10000` | Maximum number of random combinations for LP solver input |
| `--wait` | Wait for the new pool to come online |

View File

@@ -36,7 +36,7 @@ vitastor-cli - интерфейс командной строки для адм
Глобальные опции:
```
--config_file FILE Путь к файлу конфигурации Vitastor
--config_path FILE Путь к файлу конфигурации Vitastor
--etcd_address URL Адрес соединения с etcd
--iodepth N Отправлять параллельно N операций на каждый OSD (по умолчанию 32)
--parallel_osds M Работать параллельно с M OSD (по умолчанию 4)
@@ -149,6 +149,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
Если новый размер меньше старого, "лишние" данные будут удалены, поэтому перед уменьшением
образа сначала уменьшите файловую систему в нём.
* `--deleted 1|0` - Установить/снять флаг "образ удалён" (устанавливается при незавершённом удалении).
* `-f|--force` - Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
* `--down-ok` - Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD.
@@ -226,6 +227,7 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
Требует больше памяти, но позволяет правильно печатать прогресс удаления.
--min-offset Удалять только данные, начиная с заданного смещения.
--max-offset Удалять только данные до (исключительно) заданного смещения.
--client_wait_up_timeout 16 Время ожидания поднятия PG в секундах.
```
## merge-data

View File

@@ -36,7 +36,7 @@ It will output a block device name like /dev/nbd0 which you can then use as a no
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
vitastor-nbd supports all usual Vitastor configuration options like `--config_file <path_to_config>` plus NBD-specific:
vitastor-nbd supports all usual Vitastor configuration options like `--config_path <path_to_config>` plus NBD-specific:
* `--nbd_timeout 0` \
Timeout for I/O operations in seconds after exceeding which the kernel stops the device.
@@ -54,16 +54,18 @@ vitastor-nbd supports all usual Vitastor configuration options like `--config_fi
Stay in foreground, do not daemonize.
Note that `nbd_timeout`, `nbd_max_devices` and `nbd_max_part` options may also be specified
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_file`.
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_path`.
## unmap
To unmap the device run:
```
vitastor-nbd unmap /dev/nbd0
vitastor-nbd unmap [--force] /dev/nbd0
```
If `--force` is specified, `vitastor-nbd` doesn't check if the device is actually mapped.
## ls
```

View File

@@ -41,7 +41,7 @@ vitastor-nbd map [/dev/nbdN] --image testimg
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_file <path_to_config>`,
vitastor-nbd поддерживает все обычные опции Vitastor, например, `--config_path <path_to_config>`,
плюс специфичные для NBD:
* `--nbd_timeout 0` \
@@ -62,16 +62,19 @@ vitastor-nbd поддерживает все обычные опции Vitastor,
Обратите внимание, что опции `nbd_timeout`, `nbd_max_devices` и `nbd_max_part` можно
также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
заданном опцией `--config_file`.
заданном опцией `--config_path`.
## unmap
Для отключения устройства выполните:
```
vitastor-nbd unmap /dev/nbd0
vitastor-nbd unmap [--force] /dev/nbd0
```
Если задана опция `--force`, `vitastor-nbd` не проверяет, подключено ли устройство,
перед попыткой его отключить.
## ls
```

View File

@@ -58,7 +58,7 @@ To use VitastorFS:
2. Create an image for FS metadata, preferably in a faster (SSD or replica-HDD) pool,
but you can create it in the data pool too if you want (image size doesn't matter):
`vitastor-cli create -s 10G -p fastpool testfs`
3. Mark data pool as an FS pool: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
3. Mark data pool as an FS pool: `vitastor-cli modify-pool --used-for-app fs:testfs data-pool`
4. Either mount the FS: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
5. Or start the NFS server: `vitastor-nfs start --fs testfs --pool data-pool`

View File

@@ -60,7 +60,7 @@ JSON-формате :-). Для инспекции содержимого БД
или по крайней мере на HDD, но без EC), но можно и в том же пуле, что данные
(размер образа значения не имеет):
`vitastor-cli create -s 10G -p fastpool testfs`
3. Пометьте пул данных как ФС-пул: `vitastor-cli modify-pool --used-for-fs testfs data-pool`
3. Пометьте пул данных как ФС-пул: `vitastor-cli modify-pool --used-for-app fs:testfs data-pool`
4. Либо примонтируйте ФС: `vitastor-nfs mount --fs testfs --pool data-pool /mnt/vita`
5. Либо запустите сетевой NFS-сервер: `vitastor-nfs start --fs testfs --pool data-pool`

View File

@@ -23,6 +23,9 @@ class AntiEtcdAdapter
}, {}));
const cfg_port = config.antietcd_port;
const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
is_local['0.0.0.0'] = true;
is_local['::'] = true;
is_local[''] = true;
const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
if (selected.length > 1)
{

View File

@@ -216,6 +216,7 @@ const etcd_tree = {
parent_pool?: <pool_id>,
parent_id?: <inode_t>,
readonly?: boolean,
deleted?: boolean,
}
}
}, */

View File

@@ -773,23 +773,27 @@ class Mon
}
}
}
for (const pool_id in this.state.pool.stats)
if (!this.recheck_pgs_active)
{
if (!seen_pools[pool_id])
// PG recheck also modifies /pool/stats, so don't touch it here if it's active
for (const pool_id in this.state.pool.stats)
{
txn.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
} });
delete this.state.pool.stats[pool_id];
}
else
{
const pool_stats = { ...this.state.pool.stats[pool_id] };
serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
} });
if (!seen_pools[pool_id])
{
txn.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
} });
delete this.state.pool.stats[pool_id];
}
else
{
const pool_stats = { ...this.state.pool.stats[pool_id] };
serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
} });
}
}
}
if (txn.length)

View File

@@ -1,6 +1,6 @@
{
"name": "vitastor-mon",
"version": "1.10.0",
"version": "1.11.0",
"description": "Vitastor SDS monitor service",
"main": "mon-main.js",
"scripts": {
@@ -9,7 +9,7 @@
"author": "Vitaliy Filippov",
"license": "UNLICENSED",
"dependencies": {
"antietcd": "^1.1.0",
"antietcd": "^1.1.2",
"sprintf-js": "^1.1.2",
"ws": "^7.2.5"
},

View File

@@ -8,23 +8,9 @@ const LPOptimizer = require('./lp_optimizer/lp_optimizer.js');
const { scale_pg_count } = require('./pg_utils.js');
const { make_hier_tree, filter_osds_by_root_node,
filter_osds_by_tags, filter_osds_by_block_layout, get_affinity_osds } = require('./osd_tree.js');
const { select_murmur3 } = require('./lp_optimizer/murmur3.js');
let seed;
function reset_rng()
{
seed = 0x5f020e43;
}
function rng()
{
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed + 2147483648;
}
function pick_primary(pool_config, osd_set, up_osds, aff_osds)
function pick_primary(pool_id, pg_num, pool_config, osd_set, up_osds, aff_osds)
{
let alive_set;
if (pool_config.scheme === 'replicated')
@@ -52,7 +38,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
{
return 0;
}
return alive_set[rng() % alive_set.length];
return alive_set[select_murmur3(alive_set.length, osd_num => pool_id+'/'+pg_num+'/'+osd_num)];
}
function recheck_primary(state, global_config, up_osds, osd_tree)
@@ -66,7 +52,6 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
continue;
}
const aff_osds = get_affinity_osds(pool_cfg, up_osds, osd_tree);
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.pg.config.items[pool_id])
@@ -76,7 +61,7 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
const pg_cfg = state.pg.config.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
const new_primary = pick_primary(pool_id, pg_num, state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_pg_config)
@@ -99,13 +84,12 @@ function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revis
{
const aff_osds = get_affinity_osds(state.config.pools[pool_id] || {}, up_osds, osd_tree);
const pg_items = {};
reset_rng();
new_pgs.map((osd_set, i) =>
{
osd_set = osd_set.map(osd_num => osd_num === LPOptimizer.NO_OSD ? 0 : osd_num);
pg_items[i+1] = {
osd_set,
primary: pick_primary(state.config.pools[pool_id], osd_set, up_osds, aff_osds),
primary: pick_primary(pool_id, i+1, state.config.pools[pool_id], osd_set, up_osds, aff_osds),
};
if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
prev_pgs[i].filter(osd_num => osd_num).length > 0)

View File

@@ -33,9 +33,11 @@ async function run()
console.log(config_path+' is missing');
process.exit(1);
}
if (fs.existsSync("/etc/systemd/system/etcd.service"))
const in_docker = fs.existsSync("/etc/vitastor/etcd.conf") &&
fs.existsSync("/etc/vitastor/docker.conf");
if (!in_docker && fs.existsSync("/etc/systemd/system/vitastor-etcd.service"))
{
console.log("/etc/systemd/system/etcd.service already exists");
console.log("/etc/systemd/system/vitastor-etcd.service already exists");
process.exit(1);
}
const config = JSON.parse(fs.readFileSync(config_path, { encoding: 'utf-8' }));
@@ -52,10 +54,21 @@ async function run()
console.log('No matching IPs in etcd_address from '+config_path);
process.exit(0);
}
const etcd_cluster = etcds.map((e, i) => `etcd${i}=http://${e}:2380`).join(',');
await system(`mkdir -p /var/lib/etcd${num}.etcd`);
const etcd_name = 'etcd'+etcds[num].replace(/[^0-9a-z_]/ig, '_');
const etcd_cluster = etcds.map(e => `etcd${e.replace(/[^0-9a-z_]/ig, '_')}=http://${e}:2380`).join(',');
if (in_docker)
{
let etcd_conf = fs.readFileSync("/etc/vitastor/etcd.conf", { encoding: 'utf-8' });
etcd_conf = replace_env(etcd_conf, 'ETCD_NAME', etcd_name);
etcd_conf = replace_env(etcd_conf, 'ETCD_IP', etcds[num]);
etcd_conf = replace_env(etcd_conf, 'ETCD_INITIAL_CLUSTER', etcd_cluster);
fs.writeFileSync("/etc/vitastor/etcd.conf", etcd_conf);
console.log('etcd for Vitastor configured. Run `systemctl enable --now vitastor-etcd` to start etcd');
process.exit(0);
}
await system(`mkdir -p /var/lib/etcd`);
fs.writeFileSync(
"/etc/systemd/system/etcd.service",
"/etc/systemd/system/vitastor-etcd.service",
`[Unit]
Description=etcd for vitastor
After=network-online.target local-fs.target time-sync.target
@@ -64,14 +77,14 @@ Wants=network-online.target local-fs.target time-sync.target
[Service]
Restart=always
Environment=GOGC=50
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
ExecStart=etcd --name ${etcd_name} --data-dir /var/lib/etcd \\
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
--auto-compaction-retention=10 --auto-compaction-mode=revision
WorkingDirectory=/var/lib/etcd${num}.etcd
ExecStartPre=+chown -R etcd /var/lib/etcd${num}.etcd
WorkingDirectory=/var/lib/etcd
ExecStartPre=+chown -R etcd /var/lib/etcd
User=etcd
PrivateTmp=false
TasksMax=infinity
@@ -89,6 +102,13 @@ WantedBy=multi-user.target
process.exit(0);
}
function replace_env(text, key, value)
{
let found = false;
text = text.replace(new RegExp('^'+key+'\\s*=.*', 'm'), () => { found = true; return key+'='+value; });
return found ? text : text.replace(/\s*$/, '\n')+key+'='+value+'\n';
}
function select_local_etcd(etcds)
{
const ifaces = os.networkInterfaces();

View File

@@ -5,6 +5,7 @@ Wants=network-online.target local-fs.target time-sync.target
[Service]
Restart=always
SyslogIdentifier=vitastor-mon
ExecStart=node /usr/lib/vitastor/mon/mon-main.js
WorkingDirectory=/
User=vitastor

View File

@@ -8,6 +8,7 @@ PartOf=vitastor.target
LimitNOFILE=1048576
LimitNPROC=1048576
LimitMEMLOCK=infinity
SyslogIdentifier=vitastor-osd%i
# Use the following for direct logs to files
#ExecStart=bash -c 'exec vitastor-disk exec-osd /dev/vitastor/osd%i-data >>/var/log/vitastor/osd%i.log 2>&1'
ExecStart=vitastor-disk exec-osd /dev/vitastor/osd%i-data

View File

@@ -14,8 +14,13 @@ NAN_MODULE_INIT(InitAddon)
Nan::SetPrototypeMethod(tpl, "read", NodeVitastor::Read);
Nan::SetPrototypeMethod(tpl, "write", NodeVitastor::Write);
Nan::SetPrototypeMethod(tpl, "delete", NodeVitastor::Delete);
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastor::Sync);
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastor::ReadBitmap);
Nan::SetPrototypeMethod(tpl, "on_ready", NodeVitastor::OnReady);
Nan::SetPrototypeMethod(tpl, "get_min_io_size", NodeVitastor::GetMinIoSize);
Nan::SetPrototypeMethod(tpl, "get_max_atomic_write_size", NodeVitastor::GetMaxAtomicWriteSize);
Nan::SetPrototypeMethod(tpl, "get_immediate_commit", NodeVitastor::GetImmediateCommit);
//Nan::SetPrototypeMethod(tpl, "destroy", NodeVitastor::Destroy);
Nan::Set(target, Nan::New("Client").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
@@ -63,6 +68,10 @@ NAN_MODULE_INIT(InitAddon)
Nan::Set(target, Nan::New("ENOSYS").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSYS));
Nan::Set(target, Nan::New("EAGAIN").ToLocalChecked(), Nan::New<v8::Int32>(-EAGAIN));
Nan::Set(target, Nan::New("IMMEDIATE_NONE").ToLocalChecked(), Nan::New<v8::Int32>(IMMEDIATE_NONE));
Nan::Set(target, Nan::New("IMMEDIATE_SMALL").ToLocalChecked(), Nan::New<v8::Int32>(IMMEDIATE_SMALL));
Nan::Set(target, Nan::New("IMMEDIATE_ALL").ToLocalChecked(), Nan::New<v8::Int32>(IMMEDIATE_ALL));
// Listing handle
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKVListing::Create);

View File

@@ -5,9 +5,10 @@
#define NODE_VITASTOR_READ 1
#define NODE_VITASTOR_WRITE 2
#define NODE_VITASTOR_SYNC 3
#define NODE_VITASTOR_READ_BITMAP 4
#define NODE_VITASTOR_GET_INFO 5
#define NODE_VITASTOR_DELETE 3
#define NODE_VITASTOR_SYNC 4
#define NODE_VITASTOR_READ_BITMAP 5
#define NODE_VITASTOR_GET_INFO 6
#ifndef INODE_POOL
#define INODE_POOL(inode) (uint32_t)((inode) >> (64 - POOL_ID_BITS))
@@ -80,6 +81,11 @@ NAN_METHOD(NodeVitastor::Create)
NodeVitastor* cli = new NodeVitastor();
cli->c = vitastor_c_create_uring_json(c_cfg, cfg.size());
delete[] c_cfg;
if (!cli->c)
{
Nan::ThrowError("failed to initialize io_uring (old kernel or insufficient ulimit -l?)");
return;
}
int res = vitastor_c_uring_register_eventfd(cli->c);
if (res >= 0)
@@ -133,12 +139,12 @@ NodeVitastorRequest* NodeVitastor::get_read_request(const Nan::FunctionCallbackI
return req;
}
// read(pool, inode, offset, len, callback(err, buffer, version))
// read(pool, inode, offset, length, callback(err, buffer, version))
NAN_METHOD(NodeVitastor::Read)
{
TRACE("NodeVitastor::Read");
if (info.Length() < 5)
Nan::ThrowError("Not enough arguments to read(pool, inode, offset, len, callback(err, buffer, version))");
Nan::ThrowError("Not enough arguments to read(pool, inode, offset, length, callback(err, buffer, version))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
@@ -149,6 +155,9 @@ NAN_METHOD(NodeVitastor::Read)
self->Ref();
vitastor_c_read(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, &req->iov, 1, on_read_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
NodeVitastorRequest* NodeVitastor::get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
@@ -217,6 +226,58 @@ NAN_METHOD(NodeVitastor::Write)
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
NodeVitastorRequest* NodeVitastor::get_delete_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{
uint64_t offset = get_ui64(info[argpos+0]);
uint64_t len = get_ui64(info[argpos+1]);
uint64_t version = 0;
if (!info[argpos+2].IsEmpty() &&
!info[argpos+2]->IsFunction() &&
info[argpos+2]->IsObject())
{
auto key = Nan::New<v8::String>("version").ToLocalChecked();
auto params = info[argpos+2].As<v8::Object>();
auto versionObj = Nan::Get(params, key).ToLocalChecked();
if (!versionObj.IsEmpty())
version = get_ui64(versionObj);
argpos++;
}
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
auto req = new NodeVitastorRequest(this, callback);
req->offset = offset;
req->len = len;
req->version = version;
return req;
}
// delete(pool, inode, offset, length, { version }?, callback(err))
NAN_METHOD(NodeVitastor::Delete)
{
TRACE("NodeVitastor::Delete");
if (info.Length() < 5)
Nan::ThrowError("Not enough arguments to delete(pool, inode, offset, length, { version }?, callback(err))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
uint64_t inode = get_ui64(info[1]);
auto req = self->get_delete_request(info, 2);
self->Ref();
vitastor_c_delete(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
// sync(callback(err))
@@ -233,14 +294,17 @@ NAN_METHOD(NodeVitastor::Sync)
self->Ref();
vitastor_c_sync(self->c, on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
// read_bitmap(pool, inode, offset, length, with_parents, callback(err, bitmap_buffer))
NAN_METHOD(NodeVitastor::ReadBitmap)
{
TRACE("NodeVitastor::ReadBitmap");
if (info.Length() < 6)
Nan::ThrowError("Not enough arguments to read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))");
Nan::ThrowError("Not enough arguments to read_bitmap(pool, inode, offset, length, with_parents, callback(err, bitmap_buffer))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
@@ -254,6 +318,9 @@ NAN_METHOD(NodeVitastor::ReadBitmap)
auto req = new NodeVitastorRequest(self, callback);
self->Ref();
vitastor_c_read_bitmap(self->c, ((pool << (64-POOL_ID_BITS)) | inode), offset, len, with_parents, on_read_bitmap_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long retval)
@@ -267,6 +334,67 @@ static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long
nanCallback.Call(1, args, req);
}
// on_ready(callback(err))
NAN_METHOD(NodeVitastor::OnReady)
{
TRACE("NodeVitastor::OnReady");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to on_ready(callback(err))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
auto req = new NodeVitastorRequest(self, callback);
self->Ref();
vitastor_c_on_ready(self->c, on_ready_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(self->c);
#endif
}
void NodeVitastor::on_ready_finish(void *opaque, long retval)
{
TRACE("NodeVitastor::on_ready_finish");
auto req = (NodeVitastorRequest*)opaque;
auto self = req->cli;
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
nanCallback.Call(0, NULL, req);
self->Unref();
delete req;
}
// get_min_io_size(pool_id)
NAN_METHOD(NodeVitastor::GetMinIoSize)
{
TRACE("NodeVitastor::GetMinIoSize");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to get_min_io_size(pool_id)");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
info.GetReturnValue().Set(Nan::New<v8::Number>(vitastor_c_inode_get_bitmap_granularity(self->c, INODE_WITH_POOL(pool, 1))));
}
// get_max_atomic_write_size(pool_id)
NAN_METHOD(NodeVitastor::GetMaxAtomicWriteSize)
{
TRACE("NodeVitastor::GetMaxAtomicWriteSize");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to get_max_atomic_write_size(pool_id)");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
info.GetReturnValue().Set(Nan::New<v8::Number>(vitastor_c_inode_get_block_size(self->c, INODE_WITH_POOL(pool, 1))));
}
// get_immediate_commit(pool_id)
NAN_METHOD(NodeVitastor::GetImmediateCommit)
{
TRACE("NodeVitastor::GetImmediateCommit");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to get_immediate_commit(pool_id)");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
info.GetReturnValue().Set(Nan::New<v8::Number>(vitastor_c_inode_get_immediate_commit(self->c, INODE_WITH_POOL(pool, 1))));
}
void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
{
TRACE("NodeVitastor::on_read_finish");
@@ -364,6 +492,9 @@ NAN_METHOD(NodeVitastorImage::Create)
img->Ref();
cli->Ref();
vitastor_c_watch_inode(cli->c, (char*)img->name.c_str(), on_watch_start, img);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
info.GetReturnValue().Set(info.This());
}
@@ -378,12 +509,12 @@ NodeVitastorImage::~NodeVitastorImage()
cli->Unref();
}
// read(offset, len, callback(err, buffer, version))
// read(offset, length, callback(err, buffer, version))
NAN_METHOD(NodeVitastorImage::Read)
{
TRACE("NodeVitastorImage::Read");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to read(offset, len, callback(err, buffer, version))");
Nan::ThrowError("Not enough arguments to read(offset, length, callback(err, buffer, version))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
@@ -394,12 +525,12 @@ NAN_METHOD(NodeVitastorImage::Read)
img->exec_or_wait(req);
}
// write(offset, buffer, { version }?, callback(err))
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
NAN_METHOD(NodeVitastorImage::Write)
{
TRACE("NodeVitastorImage::Write");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to write(offset, buffer, { version }?, callback(err))");
Nan::ThrowError("Not enough arguments to write(offset, buf: Buffer | Buffer[], { version }?, callback(err))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
@@ -410,6 +541,22 @@ NAN_METHOD(NodeVitastorImage::Write)
img->exec_or_wait(req);
}
// delete(offset, length, { version }?, callback(err))
NAN_METHOD(NodeVitastorImage::Delete)
{
TRACE("NodeVitastorImage::Delete");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to delete(offset, length, { version }?, callback(err))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
auto req = img->cli->get_delete_request(info, 0);
req->img = img;
req->op = NODE_VITASTOR_DELETE;
img->exec_or_wait(req);
}
// sync(callback(err))
NAN_METHOD(NodeVitastorImage::Sync)
{
@@ -427,12 +574,12 @@ NAN_METHOD(NodeVitastorImage::Sync)
img->exec_or_wait(req);
}
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
// read_bitmap(offset, length, with_parents, callback(err, bitmap_buffer))
NAN_METHOD(NodeVitastorImage::ReadBitmap)
{
TRACE("NodeVitastorImage::ReadBitmap");
if (info.Length() < 4)
Nan::ThrowError("Not enough arguments to read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))");
Nan::ThrowError("Not enough arguments to read_bitmap(offset, length, with_parents, callback(err, bitmap_buffer))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
@@ -488,6 +635,9 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_read(cli->c, ino, req->offset, req->len, &req->iov, 1, NodeVitastor::on_read_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
}
else if (req->op == NODE_VITASTOR_WRITE)
{
@@ -497,6 +647,19 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
NodeVitastor::on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
}
else if (req->op == NODE_VITASTOR_DELETE)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_delete(cli->c, ino, req->offset, req->len, req->version,
NodeVitastor::on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
}
else if (req->op == NODE_VITASTOR_SYNC)
{
@@ -506,6 +669,9 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
if (imm != IMMEDIATE_ALL)
{
vitastor_c_sync(cli->c, NodeVitastor::on_write_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
}
else
{
@@ -517,6 +683,9 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_read_bitmap(cli->c, ino, req->offset, req->len, req->with_parents, NodeVitastor::on_read_bitmap_finish, req);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(cli->c);
#endif
}
else if (req->op == NODE_VITASTOR_GET_INFO)
{
@@ -648,6 +817,9 @@ NAN_METHOD(NodeVitastorKV::Open)
delete req;
kv->Unref();
});
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(kv->cli->c);
#endif
}
// close(callback(err))
@@ -671,6 +843,9 @@ NAN_METHOD(NodeVitastorKV::Close)
delete req;
kv->Unref();
});
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(kv->cli->c);
#endif
}
// set_config({ ...config })
@@ -729,6 +904,9 @@ void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info,
delete req;
kv->Unref();
}, allow_cache);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(kv->cli->c);
#endif
}
// get(key, callback(err, value))
@@ -801,6 +979,9 @@ NAN_METHOD(NodeVitastorKV::Set)
delete cas_req;
kv->Unref();
}, cas_cb);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(kv->cli->c);
#endif
}
// del(key, callback(err), cas_compare(old_value)?)
@@ -839,6 +1020,9 @@ NAN_METHOD(NodeVitastorKV::Del)
delete cas_req;
kv->Unref();
}, cas_cb);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(kv->cli->c);
#endif
}
// list(start_key?)
@@ -909,7 +1093,7 @@ NodeVitastorKVListing::~NodeVitastorKVListing()
kv->Unref();
}
// next(callback(err, value)?)
// next(callback(err, key, value)?)
NAN_METHOD(NodeVitastorKVListing::Next)
{
TRACE("NodeVitastorKVListing::Next");
@@ -959,6 +1143,9 @@ NAN_METHOD(NodeVitastorKVListing::Next)
list->iter = req;
list->kv->Unref();
});
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_c_uring_handle_events(list->kv->cli->c);
#endif
}
// close()

View File

@@ -15,14 +15,24 @@ class NodeVitastor: public Nan::ObjectWrap
public:
// constructor({ ...config })
static NAN_METHOD(Create);
// read(pool, inode, offset, len, callback(err, buffer, version))
// read(pool_id, inode, offset, length, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
// write(pool_id, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// delete(pool_id, inode, offset, length, { version }?, callback(err))
static NAN_METHOD(Delete);
// sync(callback(err))
static NAN_METHOD(Sync);
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
// read_bitmap(pool_id, inode, offset, length, with_parents, callback(err, bitmap_buffer))
static NAN_METHOD(ReadBitmap);
// on_ready(callback(err))
static NAN_METHOD(OnReady);
// get_min_io_size(pool_id)
static NAN_METHOD(GetMinIoSize);
// get_max_atomic_write_size(pool_id)
static NAN_METHOD(GetMaxAtomicWriteSize);
// get_immediate_commit(pool_id)
static NAN_METHOD(GetImmediateCommit);
// // destroy()
// static NAN_METHOD(Destroy);
@@ -37,11 +47,13 @@ private:
static void on_io_readable(uv_poll_t* handle, int status, int revents);
static void on_read_finish(void *opaque, long retval, uint64_t version);
static void on_ready_finish(void *opaque, long retval);
static void on_write_finish(void *opaque, long retval);
static void on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap);
NodeVitastorRequest* get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
NodeVitastorRequest* get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
NodeVitastorRequest* get_delete_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
friend class NodeVitastorImage;
friend class NodeVitastorKV;
@@ -53,13 +65,15 @@ class NodeVitastorImage: public Nan::ObjectWrap
public:
// constructor(node_vitastor, name)
static NAN_METHOD(Create);
// read(offset, len, callback(err, buffer, version))
// read(offset, length, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// delete(offset, length, { version }?, callback(err))
static NAN_METHOD(Delete);
// sync(callback(err))
static NAN_METHOD(Sync);
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
// read_bitmap(offset, length, with_parents, callback(err, bitmap_buffer))
static NAN_METHOD(ReadBitmap);
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
static NAN_METHOD(GetInfo);
@@ -120,7 +134,7 @@ class NodeVitastorKVListing: public Nan::ObjectWrap
public:
// constructor(node_vitastor_kv, start_key?)
static NAN_METHOD(Create);
// next(callback(err, value)?)
// next(callback(err, key, value)?)
static NAN_METHOD(Next);
// close()
static NAN_METHOD(Close);

View File

@@ -1,6 +1,6 @@
{
"name": "vitastor",
"version": "1.7.0",
"version": "1.11.0",
"description": "Low-level native bindings to Vitastor client library",
"main": "index.js",
"keywords": [

View File

@@ -98,8 +98,8 @@ vm.elements.each 'TEMPLATE/DISK' do |d|
cmd = 'vitastor-cli'
qemu_arg = ''
if d.elements['VITASTOR_CONF']
cmd = cmd + ' --config_path ' + d.elements['VITASTOR_CONF']
qemu_arg += 'config_path='+d.elements['VITASTOR_CONF']+':'
cmd = cmd + ' --config_path ' + d.elements['VITASTOR_CONF'].text
qemu_arg += 'config_path=' + d.elements['VITASTOR_CONF'].text + ':'
end
draw = "#{bck_dir}/disk.#{did}.raw"

View File

@@ -37,16 +37,6 @@ sub run_cli
$json = 1 if !defined $json;
my $binary = delete $args{binary};
$binary = '/usr/bin/vitastor-cli' if !defined $binary;
if (!exists($args{errfunc}))
{
$args{errfunc} = sub
{
my $line = shift;
print STDERR $line;
*STDERR->flush();
$stderr .= $line;
};
}
if (!exists($args{outfunc}))
{
$retval = '';

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VITASTOR_VERSION = '1.10.0'
VITASTOR_VERSION = '1.11.0'
LOG = logging.getLogger(__name__)

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.10.0
Version: 1.11.0
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.10.0.el7.tar.gz
Source0: vitastor-1.11.0.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.10.0
Version: 1.11.0
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.10.0.el8.tar.gz
Source0: vitastor-1.11.0.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.10.0
Version: 1.11.0
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.10.0.el9.tar.gz
Source0: vitastor-1.11.0.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVITASTOR_VERSION="1.10.0")
add_definitions(-DVITASTOR_VERSION="1.11.0")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})

View File

@@ -256,6 +256,7 @@ resume_2:
}
if (entries_to_zero.size() && !bs->inmemory_meta && !bs->readonly)
{
std::sort(entries_to_zero.begin(), entries_to_zero.end());
// we have to zero out additional entries
for (i = 0; i < entries_to_zero.size(); )
{
@@ -338,6 +339,15 @@ bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_
if (*entry_csum != crc32c(0, entry, bs->dsk.clean_entry_size - 4))
{
printf("Metadata entry %ju is corrupt (checksum mismatch), skipping\n", done_cnt+i);
// zero out the invalid entry, otherwise we'll hit "tried to overwrite non-zero metadata entry" later
if (bs->inmemory_meta)
{
memset(entry, 0, bs->dsk.clean_entry_size);
}
else
{
entries_to_zero.push_back(done_cnt+i);
}
continue;
}
}

View File

@@ -6,6 +6,10 @@
#include "cluster_client_impl.h"
#include "json_util.h"
#define TRY_SEND_OFFLINE 0
#define TRY_SEND_CONNECTING 1
#define TRY_SEND_OK 2
cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json config)
{
wb = new writeback_cache_t();
@@ -59,6 +63,10 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
st_cli.parse_config(config);
st_cli.infinite_start = false;
if (!config["client_infinite_start"].is_null())
{
st_cli.infinite_start = config["client_infinite_start"].bool_value();
}
st_cli.load_global_config();
scrap_buffer_size = SCRAP_BUFFER_SIZE;
@@ -67,6 +75,18 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
cluster_client_t::~cluster_client_t()
{
if (retry_timeout_id >= 0)
{
tfd->clear_timer(retry_timeout_id);
retry_timeout_duration = 0;
retry_timeout_id = -1;
}
if (list_retry_timeout_id >= 0)
{
tfd->clear_timer(list_retry_timeout_id);
list_retry_timeout_id = -1;
list_retry_time = {};
}
msgr.repeer_pgs = [](osd_num_t){};
if (ringloop)
{
@@ -87,6 +107,46 @@ cluster_op_t::~cluster_op_t()
}
}
bool cluster_op_t::support_left_on_dead()
{
if (!parts.size())
{
return false;
}
for (auto & part: parts)
{
if (!(part.flags & PART_DONE) ||
part.op.reply.hdr.opcode != OSD_OP_DELETE ||
part.op.reply.hdr.retval != 0 ||
!(part.op.reply.del.flags & OSD_DEL_SUPPORT_LEFT_ON_DEAD))
{
return false;
}
}
return true;
}
std::vector<osd_num_t> cluster_op_t::get_left_on_dead()
{
std::set<osd_num_t> osds;
for (auto & part: parts)
{
if ((part.flags & PART_DONE) ||
part.op.reply.hdr.opcode == OSD_OP_DELETE &&
part.op.reply.hdr.retval == 0 &&
(part.op.reply.del.flags & OSD_DEL_LEFT_ON_DEAD) != 0)
{
int del_count = (OSD_PACKET_SIZE-sizeof(part.op.reply.del)) / sizeof(uint32_t);
if (del_count > part.op.reply.del.left_on_dead_count)
del_count = part.op.reply.del.left_on_dead_count;
uint32_t *left_on_dead = (uint32_t*)((&part.op.reply.del) + 1);
for (int i = 0; i < del_count; i++)
osds.insert(left_on_dead[i]);
}
}
return std::vector<osd_num_t>(osds.begin(), osds.end());
}
void cluster_client_t::continue_raw_ops(osd_num_t peer_osd)
{
auto it = raw_ops.find(peer_osd);
@@ -134,12 +194,12 @@ void cluster_client_t::unshift_op(cluster_op_t *op)
void cluster_client_t::calc_wait(cluster_op_t *op)
{
op->prev_wait = 0;
if (op->opcode == OSD_OP_WRITE)
if (op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE)
{
for (auto prev = op->prev; prev; prev = prev->prev)
{
if (prev->opcode == OSD_OP_SYNC ||
prev->opcode == OSD_OP_WRITE && !(op->flags & OP_FLUSH_BUFFER) && (prev->flags & OP_FLUSH_BUFFER))
(prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_DELETE) && !(op->flags & OP_FLUSH_BUFFER) && (prev->flags & OP_FLUSH_BUFFER))
{
op->prev_wait++;
}
@@ -151,7 +211,8 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
{
for (auto prev = op->prev; prev; prev = prev->prev)
{
if (prev->opcode == OSD_OP_SYNC || prev->opcode == OSD_OP_WRITE && (!(prev->flags & OP_IMMEDIATE_COMMIT) || enable_writeback))
if (prev->opcode == OSD_OP_SYNC || (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_DELETE) &&
(!(prev->flags & OP_IMMEDIATE_COMMIT) || enable_writeback))
{
op->prev_wait++;
}
@@ -167,7 +228,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc)
{
if (opcode != OSD_OP_WRITE && opcode != OSD_OP_SYNC)
if (opcode != OSD_OP_WRITE && opcode != OSD_OP_DELETE && opcode != OSD_OP_SYNC)
{
return;
}
@@ -176,10 +237,10 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
while (next)
{
auto n2 = next->next;
if (opcode == OSD_OP_WRITE
if ((opcode == OSD_OP_WRITE || opcode == OSD_OP_DELETE)
? (next->opcode == OSD_OP_SYNC && (!(flags & OP_IMMEDIATE_COMMIT) || enable_writeback) ||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER))
: (next->opcode == OSD_OP_SYNC || next->opcode == OSD_OP_WRITE))
(next->opcode == OSD_OP_WRITE || next->opcode == OSD_OP_DELETE) && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER))
: (next->opcode == OSD_OP_SYNC || next->opcode == OSD_OP_WRITE || next->opcode == OSD_OP_DELETE))
{
next->prev_wait += inc;
assert(next->prev_wait >= 0);
@@ -252,16 +313,17 @@ void cluster_client_t::erase_op(cluster_op_t *op)
}
if (flags & OP_FLUSH_BUFFER)
{
auto overflow = std::move(wb->writeback_overflow);
int i = 0;
while (i < wb->writeback_overflow.size() && wb->writebacks_active < client_max_writeback_iodepth)
while (i < overflow.size() && wb->writebacks_active < client_max_writeback_iodepth)
{
execute_internal(wb->writeback_overflow[i]);
execute_internal(overflow[i]);
i++;
}
if (i > 0)
{
wb->writeback_overflow.erase(wb->writeback_overflow.begin(), wb->writeback_overflow.begin()+i);
}
overflow.erase(overflow.begin(), overflow.begin()+i);
assert(!wb->writeback_overflow.size());
wb->writeback_overflow.swap(overflow);
}
}
@@ -314,7 +376,7 @@ void cluster_client_t::reset_retry_timer(int new_duration)
{
return;
}
if (retry_timeout_id)
if (retry_timeout_id >= 0)
{
tfd->clear_timer(retry_timeout_id);
}
@@ -322,7 +384,7 @@ void cluster_client_t::reset_retry_timer(int new_duration)
retry_timeout_id = tfd->set_timer(retry_timeout_duration, false, [this](int)
{
int time_passed = retry_timeout_duration;
retry_timeout_id = 0;
retry_timeout_id = -1;
retry_timeout_duration = 0;
continue_ops(time_passed);
});
@@ -397,6 +459,16 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
}
// client_retry_enospc
client_retry_enospc = config["client_retry_enospc"].is_null() ? true : config["client_retry_enospc"].bool_value();
// client_wait_up_timeout
if (!config["client_wait_up_timeout"].is_null())
client_wait_up_timeout = config["client_wait_up_timeout"].uint64_value();
else
{
auto etcd_report_interval = config["etcd_report_interval"].uint64_value();
if (!etcd_report_interval)
etcd_report_interval = 5;
client_wait_up_timeout = 1+etcd_report_interval+(st_cli.max_etcd_attempts*(2*st_cli.etcd_quick_timeout)+999)/1000;
}
// log_level
log_level = config["log_level"].uint64_value();
msgr.parse_config(config);
@@ -434,7 +506,7 @@ void cluster_client_t::on_change_pool_config_hook()
// And now they have to be resliced!
for (auto op = op_queue_head; op; op = op->next)
{
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ ||
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE || op->opcode == OSD_OP_READ ||
op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) &&
INODE_POOL(op->cur_inode) == pool_item.first)
{
@@ -457,6 +529,7 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
}
// Always continue to resume operations hung because of lack of the primary OSD
continue_ops();
continue_lists();
}
bool cluster_client_t::get_immediate_commit(uint64_t inode)
@@ -477,6 +550,7 @@ void cluster_client_t::on_change_osd_state_hook(uint64_t peer_osd)
if (msgr.wanted_peers.find(peer_osd) != msgr.wanted_peers.end())
{
msgr.connect_peer(peer_osd, st_cli.peer_states[peer_osd]);
continue_lists();
}
}
@@ -559,7 +633,8 @@ bool cluster_client_t::flush()
void cluster_client_t::execute(cluster_op_t *op)
{
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_WRITE)
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP &&
op->opcode != OSD_OP_WRITE && op->opcode != OSD_OP_DELETE)
{
op->retval = -EINVAL;
auto cb = std::move(op->callback);
@@ -571,7 +646,7 @@ void cluster_client_t::execute(cluster_op_t *op)
offline_ops.push_back(op);
return;
}
op->flags = op->flags & OSD_OP_IGNORE_READONLY; // the only allowed flag
op->flags = op->flags & (OSD_OP_IGNORE_READONLY | OSD_OP_WAIT_UP_TIMEOUT); // allowed client flags
execute_internal(op);
}
@@ -592,7 +667,7 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
{
return;
}
if (op->opcode == OSD_OP_WRITE && enable_writeback && !(op->flags & OP_FLUSH_BUFFER) &&
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && enable_writeback && !(op->flags & OP_FLUSH_BUFFER) &&
!op->version /* no CAS writeback */)
{
if (wb->writebacks_active >= client_max_writeback_iodepth)
@@ -603,7 +678,7 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
}
// Just copy and acknowledge the operation
wb->copy_write(op, CACHE_DIRTY);
while (wb->writeback_bytes + op->len > client_max_buffered_bytes || wb->writeback_queue_size > client_max_buffered_ops)
while (wb->writeback_bytes > client_max_buffered_bytes || wb->writeback_queue_size > client_max_buffered_ops)
{
// Initiate some writeback (asynchronously)
wb->start_writebacks(this, 1);
@@ -613,7 +688,7 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
cb(op);
return;
}
if (op->opcode == OSD_OP_WRITE && !(op->flags & OP_IMMEDIATE_COMMIT))
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && !(op->flags & OP_IMMEDIATE_COMMIT))
{
if (!(op->flags & OP_FLUSH_BUFFER) && !op->version /* no CAS write-repeat */)
{
@@ -633,7 +708,10 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
};
execute_internal(sync_op);
}
dirty_bytes += op->len;
if (op->opcode != OSD_OP_DELETE)
{
dirty_bytes += op->len;
}
dirty_ops++;
}
else if (op->opcode == OSD_OP_SYNC)
@@ -718,6 +796,36 @@ bool cluster_client_t::check_rw(cluster_op_t *op)
return false;
}
}
op->deoptimise_snapshot = false;
if (enable_writeback && (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP))
{
auto ino_it = st_cli.inode_config.find(op->inode);
if (ino_it != st_cli.inode_config.end())
{
int chain_size = 0;
while (ino_it != st_cli.inode_config.end() && ino_it->second.parent_id)
{
// Check for loops - FIXME check it in etcd_state_client
if (ino_it->second.parent_id == op->inode ||
chain_size > st_cli.inode_config.size())
{
op->retval = -EINVAL;
auto cb = std::move(op->callback);
cb(op);
return false;
}
if (INODE_POOL(ino_it->second.parent_id) == INODE_POOL(ino_it->first) &&
wb->has_inode(ino_it->second.parent_id))
{
// Deoptimise reads - we have dirty data for one of the parent layer(s).
op->deoptimise_snapshot = true;
break;
}
chain_size++;
ino_it = st_cli.inode_config.find(ino_it->second.parent_id);
}
}
}
return true;
}
@@ -777,9 +885,48 @@ resume_1:
{
if (!(op->parts[i].flags & PART_SENT))
{
if (!try_send(op, i))
int is_ok = try_send(op, i);
if (is_ok != TRY_SEND_OK)
{
// We'll need to retry again
if (op->flags & OSD_OP_WAIT_UP_TIMEOUT)
{
if (is_ok != TRY_SEND_OFFLINE)
{
// Reset "wait_up" timer
op->wait_up_until = {};
}
else if (!op->wait_up_until.tv_sec && !client_wait_up_timeout)
{
// Don't wait for the PG to come up at all and fail
op->parts[i].flags |= PART_ERROR;
if (!op->retval)
op->retval = -ETIMEDOUT;
break;
}
else if (!op->wait_up_until.tv_sec)
{
// Set "wait_up" timer
clock_gettime(CLOCK_REALTIME, &op->wait_up_until);
op->wait_up_until.tv_sec += client_wait_up_timeout;
}
else
{
// Check if the timeout expired
timespec tv;
clock_gettime(CLOCK_REALTIME, &tv);
if (tv.tv_sec > op->wait_up_until.tv_sec ||
tv.tv_sec == op->wait_up_until.tv_sec &&
tv.tv_nsec > op->wait_up_until.tv_nsec)
{
// Fail
op->parts[i].flags |= PART_ERROR;
if (!op->retval)
op->retval = -ETIMEDOUT;
break;
}
}
}
if (op->parts[i].flags & PART_RETRY)
{
op->retry_after = client_retry_interval;
@@ -810,12 +957,21 @@ resume_2:
{
// Check parent inode
auto ino_it = st_cli.inode_config.find(op->cur_inode);
while (ino_it != st_cli.inode_config.end() && ino_it->second.parent_id &&
INODE_POOL(ino_it->second.parent_id) == INODE_POOL(op->cur_inode) &&
// Check for loops
ino_it->second.parent_id != op->inode)
// Skip parents from the same pool
int skipped = 0;
while (!op->deoptimise_snapshot &&
ino_it != st_cli.inode_config.end() && ino_it->second.parent_id &&
INODE_POOL(ino_it->second.parent_id) == INODE_POOL(op->cur_inode))
{
// Skip parents from the same pool
// Check for loops - FIXME check it in etcd_state_client
if (ino_it->second.parent_id == op->inode ||
skipped > st_cli.inode_config.size())
{
op->retval = -EINVAL;
erase_op(op);
return 1;
}
skipped++;
ino_it = st_cli.inode_config.find(ino_it->second.parent_id);
}
if (ino_it != st_cli.inode_config.end() &&
@@ -994,7 +1150,7 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
if (end == begin)
{
op->done_count++;
op->parts[i].flags = PART_DONE;
op->parts[i].flags = PART_SENT|PART_DONE;
}
}
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
@@ -1053,7 +1209,7 @@ bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len
return false;
}
bool cluster_client_t::try_send(cluster_op_t *op, int i)
int cluster_client_t::try_send(cluster_op_t *op, int i)
{
if (!msgr_initialized)
{
@@ -1077,9 +1233,9 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
);
uint64_t meta_rev = 0;
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE && !op->deoptimise_snapshot)
{
auto ino_it = st_cli.inode_config.find(op->inode);
auto ino_it = st_cli.inode_config.find(op->cur_inode);
if (ino_it != st_cli.inode_config.end())
meta_rev = ino_it->second.mod_revision;
}
@@ -1109,14 +1265,15 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
};
part->op.iov = part->iov;
msgr.outbox_push(&part->op);
return true;
return TRY_SEND_OK;
}
else if (msgr.wanted_peers.find(primary_osd) == msgr.wanted_peers.end())
{
msgr.connect_peer(primary_osd, st_cli.peer_states[primary_osd]);
return TRY_SEND_CONNECTING;
}
}
return false;
return TRY_SEND_OFFLINE;
}
int cluster_client_t::continue_sync(cluster_op_t *op)
@@ -1188,13 +1345,12 @@ resume_1:
void cluster_client_t::send_sync(cluster_op_t *op, cluster_op_part_t *part)
{
auto peer_it = msgr.osd_peer_fds.find(part->osd_num);
assert(peer_it != msgr.osd_peer_fds.end());
auto peer_fd = msgr.osd_peer_fds.at(part->osd_num);
part->flags |= PART_SENT;
op->inflight_count++;
part->op = (osd_op_t){
.op_type = OSD_OP_OUT,
.peer_fd = peer_it->second,
.peer_fd = peer_fd,
.req = {
.hdr = {
.magic = SECONDARY_OSD_OP_MAGIC,
@@ -1228,9 +1384,11 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
{
// Operation failed, retry
part->flags |= PART_ERROR;
if (!op->retval || op->retval == -EPIPE || part->op.reply.hdr.retval == -EIO)
if (!op->retval || op->retval == -EPIPE ||
part->op.reply.hdr.retval == -ENOSPC && op->retval == -ETIMEDOUT ||
part->op.reply.hdr.retval == -EIO)
{
// Error priority: EIO > ENOSPC > EPIPE
// Error priority: EIO > ENOSPC > ETIMEDOUT > EPIPE
op->retval = part->op.reply.hdr.retval;
}
int stop_fd = -1;
@@ -1293,7 +1451,7 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
}
}
else if (op->opcode == OSD_OP_WRITE)
else if (op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE)
{
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
}

View File

@@ -11,12 +11,11 @@
#define DEFAULT_CLIENT_MAX_BUFFERED_BYTES 32*1024*1024
#define DEFAULT_CLIENT_MAX_BUFFERED_OPS 1024
#define DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH 256
#define INODE_LIST_DONE 1
#define INODE_LIST_HAS_UNSTABLE 2
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
#define OSD_OP_READ_CHAIN_BITMAP 0x102
#define OSD_OP_IGNORE_READONLY 0x08
#define OSD_OP_WAIT_UP_TIMEOUT 0x10
struct cluster_op_t;
@@ -41,7 +40,8 @@ struct cluster_op_t
// for reads and writes within a single object (stripe),
// reads can return current version and writes can use "CAS" semantics
uint64_t version = 0;
// now only OSD_OP_IGNORE_READONLY is supported
// flags: OSD_OP_IGNORE_READONLY - ignore inode readonly flag
// OSD_OP_WAIT_UP_TIMEOUT - do not retry the operation infinitely if PG is inactive, only for for <wait_up_timeout>
uint64_t flags = 0;
// negative retval is an error number
// write and read return len on success
@@ -53,12 +53,18 @@ struct cluster_op_t
void *bitmap_buf = NULL;
std::function<void(cluster_op_t*)> callback;
~cluster_op_t();
// for deletions, remove after 'atomic delete':
bool support_left_on_dead();
std::vector<osd_num_t> get_left_on_dead();
protected:
int state = 0;
uint64_t cur_inode; // for snapshot reads
bool needs_reslice = false;
bool needs_reslice: 1;
bool deoptimise_snapshot: 1;
int retry_after = 0;
int inflight_count = 0, done_count = 0;
timespec wait_up_until = {};
std::vector<cluster_op_part_t> parts;
void *part_bitmaps = NULL;
unsigned bitmap_buf_size = 0;
@@ -71,6 +77,7 @@ protected:
struct inode_list_t;
struct inode_list_osd_t;
struct inode_list_pg_t;
class writeback_cache_t;
// FIXME: Split into public and private interfaces
@@ -95,8 +102,9 @@ class cluster_client_t
int client_retry_interval = 50; // ms
int client_eio_retry_interval = 1000; // ms
bool client_retry_enospc = true;
int client_wait_up_timeout = 16; // sec (for listings)
int retry_timeout_id = 0;
int retry_timeout_id = -1;
int retry_timeout_duration = 0;
std::vector<cluster_op_t*> offline_ops;
cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
@@ -110,6 +118,8 @@ class cluster_client_t
bool pgs_loaded = false;
ring_consumer_t consumer;
std::vector<std::function<void(void)>> on_ready_hooks;
int list_retry_timeout_id = -1;
timespec list_retry_time;
std::vector<inode_list_t*> lists;
std::multimap<osd_num_t, osd_op_t*> raw_ops;
int continuing_ops = 0;
@@ -135,11 +145,10 @@ public:
bool get_immediate_commit(uint64_t inode);
void continue_ops(int time_passed = 0);
inode_list_t *list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
int list_pg_count(inode_list_t *lst);
const std::vector<osd_num_t> & list_inode_get_inactive_osds(inode_list_t *lst);
void list_inode_next(inode_list_t *lst, int next_pgs);
void list_inode(inode_t inode, uint64_t min_offset, uint64_t max_offset, int max_parallel_pgs, std::function<void(
int status, int pgs_left, pg_num_t pg_num, std::set<object_id>&& objects)> pg_callback);
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }
uint64_t next_op_id();
@@ -158,7 +167,7 @@ protected:
bool check_rw(cluster_op_t *op);
void slice_rw(cluster_op_t *op);
void reset_retry_timer(int new_duration);
bool try_send(cluster_op_t *op, int i);
int try_send(cluster_op_t *op, int i);
int continue_sync(cluster_op_t *op);
void send_sync(cluster_op_t *op, cluster_op_part_t *part);
void handle_op_part(cluster_op_part_t *part);
@@ -167,8 +176,14 @@ protected:
void calc_wait(cluster_op_t *op);
void inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc);
void continue_lists();
void continue_listing(inode_list_t *lst);
bool continue_listing(inode_list_t *lst);
bool restart_listing(inode_list_t* lst);
void retry_start_pg_listing(inode_list_pg_t *pg);
int start_pg_listing(inode_list_pg_t *pg);
void send_list(inode_list_osd_t *cur_list);
void set_list_retry_timeout(int ms, timespec new_time);
void finish_list_pg(inode_list_pg_t *pg, bool retry_epipe);
bool check_finish_listing(inode_list_t *lst);
void continue_raw_ops(osd_num_t peer_osd);
friend class writeback_cache_t;

View File

@@ -42,10 +42,10 @@ public:
std::multimap<uint64_t, uint64_t*> flushed_buffers; // flush_id => refcnt
~writeback_cache_t();
bool has_inode(uint64_t inode);
dirty_buf_it_t find_dirty(uint64_t inode, uint64_t offset);
bool is_left_merged(dirty_buf_it_t dirty_it);
bool is_right_merged(dirty_buf_it_t dirty_it);
bool is_merged(const dirty_buf_it_t & dirty_it);
void copy_write(cluster_op_t *op, int state, uint64_t new_flush_id = 0);
int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd, pool_id_t pool_id, pg_num_t pg_num);
void start_writebacks(cluster_client_t *cli, int count);

View File

@@ -2,9 +2,17 @@
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
#include <algorithm>
#include "assert.h"
#include "pg_states.h"
#include "cluster_client.h"
#define LIST_PG_INIT 0
#define LIST_PG_WAIT_ACTIVE 1
#define LIST_PG_WAIT_CONNECT 2
#define LIST_PG_WAIT_RETRY 3
#define LIST_PG_SENT 4
#define LIST_PG_DONE 5
struct inode_list_t;
struct inode_list_pg_t;
@@ -13,20 +21,22 @@ struct inode_list_osd_t
{
inode_list_pg_t *pg = NULL;
osd_num_t osd_num = 0;
bool sent = false;
};
struct inode_list_pg_t
{
inode_list_t *lst = NULL;
int pos = 0;
pg_num_t pg_num;
osd_num_t cur_primary;
bool has_unstable = false;
int sent = 0;
int done = 0;
int errcode = 0;
pg_num_t pg_num = 0;
osd_num_t cur_primary = 0;
int state = 0;
int inflight_ops = 0;
timespec wait_until;
std::vector<inode_list_osd_t> list_osds;
bool has_unstable = false;
std::set<object_id> objects;
std::vector<osd_num_t> inactive_osds;
};
struct inode_list_t
@@ -34,175 +44,295 @@ struct inode_list_t
cluster_client_t *cli = NULL;
pool_id_t pool_id = 0;
inode_t inode = 0;
uint64_t min_offset = 0;
uint64_t max_offset = 0;
int max_parallel_pgs = 16;
bool fallback = false;
int inflight_pgs = 0;
std::map<osd_num_t, int> inflight_per_osd;
int done_pgs = 0;
int want = 0;
std::vector<osd_num_t> inactive_osds;
int onstack = 0;
std::vector<inode_list_pg_t*> pgs;
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
pg_num_t real_pg_count = 0;
std::function<void(int status, int pgs_left, pg_num_t pg_num, std::set<object_id>&& objects)> callback;
};
inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback)
void cluster_client_t::list_inode(inode_t inode, uint64_t min_offset, uint64_t max_offset, int max_parallel_pgs, std::function<void(
int status, int pgs_left, pg_num_t pg_num, std::set<object_id>&& objects)> pg_callback)
{
init_msgr();
int skipped_pgs = 0;
pool_id_t pool_id = INODE_POOL(inode);
if (!pool_id || st_cli.pool_config.find(pool_id) == st_cli.pool_config.end())
{
if (log_level > 0)
{
fprintf(stderr, "Pool %u does not exist\n", pool_id);
}
return NULL;
pg_callback(-EINVAL, 0, 0, std::set<object_id>());
return;
}
auto pg_stripe_size = st_cli.pool_config.at(pool_id).pg_stripe_size;
if (min_offset)
min_offset = (min_offset/pg_stripe_size) * pg_stripe_size;
inode_list_t *lst = new inode_list_t();
lst->cli = this;
lst->pool_id = pool_id;
lst->inode = inode;
lst->callback = callback;
auto pool_cfg = st_cli.pool_config[pool_id];
std::set<osd_num_t> inactive_osd_set;
for (auto & pg_item: pool_cfg.pg_config)
{
auto & pg = pg_item.second;
if (pg.pause || !pg.cur_primary || !(pg.cur_state & PG_ACTIVE))
{
skipped_pgs++;
if (log_level > 0)
{
fprintf(stderr, "PG %u is inactive, skipping\n", pg_item.first);
}
continue;
}
inode_list_pg_t *r = new inode_list_pg_t();
r->lst = lst;
r->pg_num = pg_item.first;
r->cur_primary = pg.cur_primary;
if (pg.cur_state != PG_ACTIVE)
{
// Not clean
std::set<osd_num_t> all_peers;
for (osd_num_t pg_osd: pg.target_set)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (osd_num_t pg_osd: pg.all_peers)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (auto & hist_item: pg.target_history)
{
for (auto pg_osd: hist_item)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
}
for (osd_num_t peer_osd: all_peers)
{
if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end())
{
r->list_osds.push_back((inode_list_osd_t){
.pg = r,
.osd_num = peer_osd,
.sent = false,
});
}
else
{
inactive_osd_set.insert(peer_osd);
}
}
}
else
{
// Clean
r->list_osds.push_back((inode_list_osd_t){
.pg = r,
.osd_num = pg.cur_primary,
.sent = false,
});
}
lst->pgs.push_back(r);
}
std::sort(lst->pgs.begin(), lst->pgs.end(), [](inode_list_pg_t *a, inode_list_pg_t *b)
{
return a->cur_primary < b->cur_primary ? true : false;
});
for (int i = 0; i < lst->pgs.size(); i++)
{
lst->pgs[i]->pos = i;
}
lst->inactive_osds.insert(lst->inactive_osds.end(), inactive_osd_set.begin(), inactive_osd_set.end());
lst->min_offset = min_offset;
lst->max_offset = max_offset;
lst->callback = pg_callback;
lst->max_parallel_pgs = max_parallel_pgs <= 0 ? 16 : max_parallel_pgs;
lists.push_back(lst);
return lst;
}
int cluster_client_t::list_pg_count(inode_list_t *lst)
{
return lst->pgs.size();
}
const std::vector<osd_num_t> & cluster_client_t::list_inode_get_inactive_osds(inode_list_t *lst)
{
return lst->inactive_osds;
}
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
{
if (next_pgs >= 0)
{
lst->want += next_pgs;
}
continue_listing(lst);
}
void cluster_client_t::continue_listing(inode_list_t *lst)
bool cluster_client_t::continue_listing(inode_list_t *lst)
{
if (lst->done_pgs >= lst->pgs.size())
if (lst->onstack > 0)
{
return;
return true;
}
if (lst->want <= 0)
lst->onstack++;
if (restart_listing(lst))
{
return;
}
for (int i = 0; i < lst->pgs.size(); i++)
{
if (lst->pgs[i] && lst->pgs[i]->sent < lst->pgs[i]->list_osds.size())
for (int i = 0; i < lst->pgs.size() && lst->inflight_pgs < lst->max_parallel_pgs; i++)
{
for (int j = 0; j < lst->pgs[i]->list_osds.size(); j++)
retry_start_pg_listing(lst->pgs[i]);
}
}
if (check_finish_listing(lst))
{
// Do not change lst->onstack because it's already freed
return false;
}
lst->onstack--;
return true;
}
bool cluster_client_t::restart_listing(inode_list_t* lst)
{
auto pool_it = st_cli.pool_config.find(lst->pool_id);
// We want listing to be consistent. To achieve it we should:
// 1) retry listing of each PG if its state changes
// 2) abort listing if PG count changes during listing
// 3) ideally, only talk to the primary OSD - this will be done separately
// So first we add all PGs without checking their state
if (pool_it == st_cli.pool_config.end() ||
lst->real_pg_count != pool_it->second.real_pg_count)
{
for (auto pg: lst->pgs)
{
if (pg->inflight_ops > 0)
{
send_list(&lst->pgs[i]->list_osds[j]);
if (lst->want <= 0)
{
return;
}
// Wait until all in-progress listings complete or fail
return false;
}
}
for (auto pg: lst->pgs)
{
delete pg;
}
if (log_level > 0 && lst->real_pg_count)
{
fprintf(stderr, "PG count in pool %u changed during listing\n", lst->pool_id);
}
lst->pgs.clear();
if (pool_it == st_cli.pool_config.end())
{
// Unknown pool
lst->callback(-EINVAL, 0, 0, std::set<object_id>());
return false;
}
else if (lst->done_pgs)
{
// PG count changed during listing, it should fail
lst->callback(-EAGAIN, 0, 0, std::set<object_id>());
return false;
}
else
{
lst->real_pg_count = pool_it->second.real_pg_count;
for (pg_num_t pg_num = 1; pg_num <= lst->real_pg_count; pg_num++)
{
inode_list_pg_t *pg = new inode_list_pg_t();
pg->lst = lst;
pg->pg_num = pg_num;
lst->pgs.push_back(pg);
}
}
}
return true;
}
void cluster_client_t::retry_start_pg_listing(inode_list_pg_t *pg)
{
if (pg->state == LIST_PG_SENT || pg->state == LIST_PG_DONE)
{
return;
}
if (pg->state == LIST_PG_WAIT_RETRY)
{
// Check if the timeout expired
timespec tv;
clock_gettime(CLOCK_REALTIME, &tv);
if (tv.tv_sec < pg->wait_until.tv_sec ||
tv.tv_sec == pg->wait_until.tv_sec && tv.tv_nsec < pg->wait_until.tv_nsec)
{
return;
}
}
int new_st = start_pg_listing(pg);
if (new_st == LIST_PG_SENT || new_st == LIST_PG_WAIT_CONNECT)
{
// sent => wait for completion
// not connected, but OSD state exists => wait for PG or OSD state change infinitely
pg->state = new_st;
return;
}
if (new_st == LIST_PG_WAIT_ACTIVE && pg->state != LIST_PG_WAIT_ACTIVE)
{
if (!client_wait_up_timeout)
{
fprintf(stderr, "PG %u/%u is inactive, skipping listing\n", pg->lst->pool_id, pg->pg_num);
pg->errcode = -EPIPE;
pg->list_osds.clear();
pg->objects.clear();
finish_list_pg(pg, false);
return;
}
pg->state = new_st;
clock_gettime(CLOCK_REALTIME, &pg->wait_until);
pg->wait_until.tv_sec += client_wait_up_timeout;
if (log_level > 1)
{
fprintf(stderr, "Waiting for PG %u/%u to become active for %d seconds\n", pg->lst->pool_id, pg->pg_num, client_wait_up_timeout);
}
set_list_retry_timeout(client_wait_up_timeout*1000, pg->wait_until);
return;
}
assert(pg->state == LIST_PG_WAIT_ACTIVE);
// Check if the timeout expired
timespec tv;
clock_gettime(CLOCK_REALTIME, &tv);
if (tv.tv_sec > pg->wait_until.tv_sec ||
tv.tv_sec == pg->wait_until.tv_sec && tv.tv_nsec >= pg->wait_until.tv_nsec)
{
fprintf(stderr, "Failed to wait for PG %u/%u to become active, skipping listing\n", pg->lst->pool_id, pg->pg_num);
pg->errcode = -EPIPE;
pg->list_osds.clear();
pg->objects.clear();
finish_list_pg(pg, false);
}
}
void cluster_client_t::set_list_retry_timeout(int ms, timespec new_time)
{
if (!list_retry_time.tv_sec || list_retry_time.tv_sec > new_time.tv_sec ||
list_retry_time.tv_sec == new_time.tv_sec && list_retry_time.tv_nsec > new_time.tv_nsec)
{
list_retry_time = new_time;
if (list_retry_timeout_id >= 0)
{
tfd->clear_timer(list_retry_timeout_id);
}
list_retry_timeout_id = tfd->set_timer(ms, false, [this](int timer_id)
{
list_retry_timeout_id = -1;
list_retry_time = {};
continue_lists();
});
}
}
int cluster_client_t::start_pg_listing(inode_list_pg_t *pg)
{
auto & pool_cfg = st_cli.pool_config.at(pg->lst->pool_id);
auto pg_it = pool_cfg.pg_config.find(pg->pg_num);
assert(pg->lst->real_pg_count == pool_cfg.real_pg_count);
if (pg_it == pool_cfg.pg_config.end() ||
pg_it->second.pause ||
!pg_it->second.cur_primary ||
!(pg_it->second.cur_state & PG_ACTIVE))
{
// PG is (temporarily?) unavailable
return LIST_PG_WAIT_ACTIVE;
}
pg->inactive_osds.clear();
std::set<osd_num_t> all_peers;
if (pg_it->second.cur_state != PG_ACTIVE && pg->lst->fallback)
{
// Not clean and OSDs don't support listing from primary
for (osd_num_t pg_osd: pg_it->second.target_set)
all_peers.insert(pg_osd);
for (osd_num_t pg_osd: pg_it->second.all_peers)
all_peers.insert(pg_osd);
for (auto & hist_item: pg_it->second.target_history)
for (auto pg_osd: hist_item)
all_peers.insert(pg_osd);
// Remove zero OSD number
all_peers.erase(0);
// Remove unconnectable peers except cur_primary
for (auto peer_it = all_peers.begin(); peer_it != all_peers.end(); )
{
if (*peer_it != pg_it->second.cur_primary &&
st_cli.peer_states[*peer_it].is_null())
{
pg->inactive_osds.push_back(*peer_it);
all_peers.erase(peer_it++);
}
else
peer_it++;
}
}
else
{
// Clean
all_peers.insert(pg_it->second.cur_primary);
}
// Check that we're connected to all PG OSDs
bool conn = true;
for (osd_num_t peer_osd: all_peers)
{
if (msgr.osd_peer_fds.find(peer_osd) == msgr.osd_peer_fds.end())
{
// Initiate connection
if (st_cli.peer_states[peer_osd].is_null())
{
return LIST_PG_WAIT_ACTIVE;
}
msgr.connect_peer(peer_osd, st_cli.peer_states[peer_osd]);
conn = false;
}
}
if (!conn)
{
return LIST_PG_WAIT_CONNECT;
}
// Send all listings at once as the simplest way to guarantee that we connect
// to the exact same OSDs that are listed in PG state
pg->errcode = 0;
pg->list_osds.clear();
pg->has_unstable = false;
pg->objects.clear();
pg->cur_primary = pg_it->second.cur_primary;
for (osd_num_t peer_osd: all_peers)
{
pg->list_osds.push_back((inode_list_osd_t){
.pg = pg,
.osd_num = peer_osd,
});
}
for (auto & list_osd: pg->list_osds)
{
send_list(&list_osd);
}
return LIST_PG_SENT;
}
void cluster_client_t::send_list(inode_list_osd_t *cur_list)
{
if (cur_list->sent)
{
return;
}
if (msgr.osd_peer_fds.find(cur_list->osd_num) == msgr.osd_peer_fds.end())
{
// Initiate connection
msgr.connect_peer(cur_list->osd_num, st_cli.peer_states[cur_list->osd_num]);
return;
}
if (!cur_list->pg->inflight_ops)
cur_list->pg->lst->inflight_pgs++;
cur_list->pg->inflight_ops++;
auto & pool_cfg = st_cli.pool_config[cur_list->pg->lst->pool_id];
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
@@ -220,6 +350,9 @@ void cluster_client_t::send_list(inode_list_osd_t *cur_list)
.pg_stripe_size = pool_cfg.pg_stripe_size,
.min_inode = cur_list->pg->lst->inode,
.max_inode = cur_list->pg->lst->inode,
.min_stripe = cur_list->pg->lst->min_offset,
.max_stripe = cur_list->pg->lst->max_offset,
.flags = (uint64_t)(cur_list->pg->lst->fallback ? 0 : OSD_LIST_PRIMARY),
},
};
op->callback = [this, cur_list](osd_op_t *op)
@@ -228,6 +361,29 @@ void cluster_client_t::send_list(inode_list_osd_t *cur_list)
{
fprintf(stderr, "Failed to get PG %u/%u object list from OSD %ju (retval=%jd), skipping\n",
cur_list->pg->lst->pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval);
if (!cur_list->pg->errcode ||
cur_list->pg->errcode == -EPIPE ||
op->reply.hdr.retval != -EPIPE)
{
cur_list->pg->errcode = op->reply.hdr.retval;
}
}
else if ((op->req.sec_list.flags & OSD_LIST_PRIMARY) &&
!(op->reply.sec_list.flags & OSD_LIST_PRIMARY))
{
// OSD is old and doesn't support listing from primary
if (log_level > 0)
{
fprintf(
stderr, "[PG %u/%u] Primary OSD doesn't support consistent listings, falling back to listings from all peers\n",
cur_list->pg->lst->pool_id, cur_list->pg->pg_num
);
}
cur_list->pg->lst->fallback = true;
if (!cur_list->pg->errcode)
{
cur_list->pg->errcode = -EPIPE;
}
}
else
{
@@ -256,55 +412,66 @@ void cluster_client_t::send_list(inode_list_osd_t *cur_list)
}
}
delete op;
auto lst = cur_list->pg->lst;
auto pg = cur_list->pg;
pg->done++;
if (pg->done >= pg->list_osds.size())
{
int status = 0;
lst->done_pgs++;
if (lst->done_pgs >= lst->pgs.size())
{
status |= INODE_LIST_DONE;
}
if (pg->has_unstable)
{
status |= INODE_LIST_HAS_UNSTABLE;
}
lst->callback(lst, std::move(pg->objects), pg->pg_num, pg->cur_primary, status);
lst->pgs[pg->pos] = NULL;
delete pg;
if (lst->done_pgs >= lst->pgs.size())
{
// All done
for (int i = 0; i < lists.size(); i++)
{
if (lists[i] == lst)
{
lists.erase(lists.begin()+i, lists.begin()+i+1);
break;
}
}
delete lst;
return;
}
}
else
{
lst->want++;
}
continue_listing(lst);
cur_list->pg->inflight_ops--;
if (!cur_list->pg->inflight_ops)
cur_list->pg->lst->inflight_pgs--;
finish_list_pg(cur_list->pg, true);
continue_listing(cur_list->pg->lst);
};
msgr.outbox_push(op);
cur_list->sent = true;
cur_list->pg->sent++;
cur_list->pg->lst->want--;
}
void cluster_client_t::finish_list_pg(inode_list_pg_t *pg, bool retry_epipe)
{
auto lst = pg->lst;
if (pg->inflight_ops == 0)
{
if (pg->errcode == -EPIPE && retry_epipe)
{
// Retry listing after <client_retry_interval> ms on EPIPE
pg->state = LIST_PG_WAIT_RETRY;
clock_gettime(CLOCK_REALTIME, &pg->wait_until);
pg->wait_until.tv_nsec += client_retry_interval*1000000;
pg->wait_until.tv_sec += (pg->wait_until.tv_nsec / 1000000000);
pg->wait_until.tv_nsec = (pg->wait_until.tv_nsec % 1000000000);
set_list_retry_timeout(client_retry_interval, pg->wait_until);
return;
}
lst->done_pgs++;
pg->state = LIST_PG_DONE;
lst->callback(pg->errcode, lst->pgs.size()-lst->done_pgs, pg->pg_num, std::move(pg->objects));
pg->objects.clear();
pg->inactive_osds.clear();
}
}
void cluster_client_t::continue_lists()
{
for (auto lst: lists)
for (int i = lists.size()-1; i >= 0; i--)
{
continue_listing(lst);
continue_listing(lists[i]);
}
}
bool cluster_client_t::check_finish_listing(inode_list_t *lst)
{
if (lst->done_pgs >= lst->pgs.size())
{
for (auto pg: lst->pgs)
{
delete pg;
}
lst->pgs.clear();
for (int i = 0; i < lists.size(); i++)
{
if (lists[i] == lst)
{
lists.erase(lists.begin()+i, lists.begin()+i+1);
break;
}
}
delete lst;
return true;
}
return false;
}

View File

@@ -9,7 +9,7 @@ writeback_cache_t::~writeback_cache_t()
{
for (auto & bp: dirty_buffers)
{
if (!--(*bp.second.refcnt))
if (bp.second.buf && !--(*bp.second.refcnt))
{
free(bp.second.refcnt); // refcnt is allocated with the buffer
}
@@ -17,6 +17,15 @@ writeback_cache_t::~writeback_cache_t()
dirty_buffers.clear();
}
bool writeback_cache_t::has_inode(uint64_t inode)
{
auto dirty_it = dirty_buffers.lower_bound((object_id){
.inode = inode,
.stripe = 0,
});
return dirty_it != dirty_buffers.end() && dirty_it->first.inode == inode;
}
dirty_buf_it_t writeback_cache_t::find_dirty(uint64_t inode, uint64_t offset)
{
auto dirty_it = dirty_buffers.lower_bound((object_id){
@@ -33,7 +42,11 @@ dirty_buf_it_t writeback_cache_t::find_dirty(uint64_t inode, uint64_t offset)
break;
}
}
return dirty_it;
if (dirty_it != dirty_buffers.end() && dirty_it->first.inode == inode)
{
return dirty_it;
}
return dirty_buffers.end();
}
bool writeback_cache_t::is_left_merged(dirty_buf_it_t dirty_it)
@@ -43,6 +56,7 @@ bool writeback_cache_t::is_left_merged(dirty_buf_it_t dirty_it)
auto prev_it = dirty_it;
prev_it--;
if (prev_it->first.inode == dirty_it->first.inode &&
(prev_it->second.buf != NULL) == (dirty_it->second.buf != NULL) &&
prev_it->first.stripe+prev_it->second.len == dirty_it->first.stripe &&
prev_it->second.state == CACHE_DIRTY)
{
@@ -58,6 +72,7 @@ bool writeback_cache_t::is_right_merged(dirty_buf_it_t dirty_it)
next_it++;
if (next_it != dirty_buffers.end() &&
next_it->first.inode == dirty_it->first.inode &&
(next_it->second.buf != NULL) == (dirty_it->second.buf != NULL) &&
next_it->first.stripe == dirty_it->first.stripe+dirty_it->second.len &&
next_it->second.state == CACHE_DIRTY)
{
@@ -66,11 +81,6 @@ bool writeback_cache_t::is_right_merged(dirty_buf_it_t dirty_it)
return false;
}
bool writeback_cache_t::is_merged(const dirty_buf_it_t & dirty_it)
{
return is_left_merged(dirty_it) || is_right_merged(dirty_it);
}
void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flush_id)
{
// Save operation for replay when one of PGs goes out of sync
@@ -99,16 +109,22 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
.inode = op->inode,
.stripe = new_end,
}, (cluster_buffer_t){
.buf = dirty_it->second.buf + new_end - dirty_it->first.stripe,
.buf = dirty_it->second.buf ? dirty_it->second.buf + new_end - dirty_it->first.stripe : NULL,
.len = old_end - new_end,
.state = dirty_it->second.state,
.flush_id = dirty_it->second.flush_id,
.refcnt = dirty_it->second.refcnt,
});
(*dirty_it->second.refcnt)++;
if (dirty_it->second.buf)
{
(*dirty_it->second.refcnt)++;
}
if (dirty_it->second.state == CACHE_DIRTY)
{
writeback_bytes -= op->len;
if (dirty_it->second.buf)
{
writeback_bytes -= op->len;
}
writeback_queue_size++;
}
break;
@@ -118,8 +134,11 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
// Only leave the beginning
if (dirty_it->second.state == CACHE_DIRTY)
{
writeback_bytes -= old_end - op->offset;
if (is_left_merged(dirty_it) && !is_right_merged(dirty_it))
if (dirty_it->second.buf)
{
writeback_bytes -= old_end - op->offset;
}
if (is_right_merged(dirty_it))
{
writeback_queue_size++;
}
@@ -133,8 +152,11 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
// Only leave the end
if (dirty_it->second.state == CACHE_DIRTY)
{
writeback_bytes -= new_end - dirty_it->first.stripe;
if (!is_left_merged(dirty_it) && is_right_merged(dirty_it))
if (dirty_it->second.buf)
{
writeback_bytes -= new_end - dirty_it->first.stripe;
}
if (is_left_merged(dirty_it))
{
writeback_queue_size++;
}
@@ -143,7 +165,7 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
.inode = op->inode,
.stripe = new_end,
}, (cluster_buffer_t){
.buf = dirty_it->second.buf + new_end - dirty_it->first.stripe,
.buf = dirty_it->second.buf ? dirty_it->second.buf + new_end - dirty_it->first.stripe : NULL,
.len = old_end - new_end,
.state = dirty_it->second.state,
.flush_id = dirty_it->second.flush_id,
@@ -156,13 +178,25 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
else
{
// Remove the whole buffer
if (dirty_it->second.state == CACHE_DIRTY && !is_merged(dirty_it))
if (dirty_it->second.state == CACHE_DIRTY)
{
writeback_bytes -= dirty_it->second.len;
assert(writeback_queue_size > 0);
writeback_queue_size--;
if (dirty_it->second.buf)
{
writeback_bytes -= dirty_it->second.len;
}
bool lm = is_left_merged(dirty_it);
bool rm = is_right_merged(dirty_it);
if (!lm && !rm)
{
assert(writeback_queue_size > 0);
writeback_queue_size--;
}
else if (lm && rm)
{
writeback_queue_size++;
}
}
if (!--(*dirty_it->second.refcnt))
if (dirty_it->second.buf && !--(*dirty_it->second.refcnt))
{
free(dirty_it->second.refcnt);
}
@@ -170,9 +204,13 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
}
}
// Overlapping buffers are removed, just insert the new one
uint64_t *refcnt = (uint64_t*)malloc_or_die(sizeof(uint64_t) + op->len);
uint8_t *buf = (uint8_t*)refcnt + sizeof(uint64_t);
*refcnt = 1;
bool is_del = op->opcode == OSD_OP_DELETE;
uint64_t *refcnt = is_del ? NULL : (uint64_t*)malloc_or_die(sizeof(uint64_t) + op->len);
uint8_t *buf = is_del ? NULL : ((uint8_t*)refcnt + sizeof(uint64_t));
if (!is_del)
{
*refcnt = 1;
}
dirty_it = dirty_buffers.emplace_hint(dirty_it, (object_id){
.inode = op->inode,
.stripe = op->offset,
@@ -185,9 +223,11 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
});
if (state == CACHE_DIRTY)
{
writeback_bytes += op->len;
writeback_bytes += is_del ? 0 : op->len;
// Track consecutive write-back operations
if (!is_merged(dirty_it))
bool lm = is_left_merged(dirty_it);
bool rm = is_right_merged(dirty_it);
if (!lm && !rm)
{
// <writeback_queue> is OK to contain more than actual number of consecutive
// requests as long as it doesn't miss anything. But <writeback_queue_size>
@@ -198,14 +238,22 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flu
.stripe = op->offset,
});
}
else if (lm && rm)
{
assert(writeback_queue_size > 0);
writeback_queue_size--;
}
}
uint64_t pos = 0, len = op->len, iov_idx = 0;
while (len > 0 && iov_idx < op->iov.count)
if (!is_del)
{
auto & iov = op->iov.buf[iov_idx];
memcpy(buf + pos, iov.iov_base, iov.iov_len);
pos += iov.iov_len;
iov_idx++;
uint64_t pos = 0, len = op->len, iov_idx = 0;
while (len > 0 && iov_idx < op->iov.count)
{
auto & iov = op->iov.buf[iov_idx];
memcpy(buf + pos, iov.iov_base, iov.iov_len);
pos += iov.iov_len;
iov_idx++;
}
}
}
@@ -219,7 +267,7 @@ int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd,
for (auto wr_it = dirty_buffers.begin(), flush_it = wr_it, last_it = wr_it; ; )
{
bool end = wr_it == dirty_buffers.end();
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING;
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING && wr_it->second.state != CACHE_DIRTY;
if (peer_osd)
flush_this = flush_this && cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
if (pool_id && pg_num)
@@ -250,7 +298,7 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
bool is_writeback = from_it->second.state == CACHE_DIRTY;
cluster_op_t *op = new cluster_op_t;
op->flags = OSD_OP_IGNORE_READONLY|OP_FLUSH_BUFFER;
op->opcode = OSD_OP_WRITE;
op->opcode = from_it->second.buf ? OSD_OP_WRITE : OSD_OP_DELETE;
op->cur_inode = op->inode = from_it->first.inode;
op->offset = from_it->first.stripe;
op->len = prev_it->first.stripe + prev_it->second.len - from_it->first.stripe;
@@ -260,9 +308,12 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
{
it->second.state = CACHE_REPEATING;
it->second.flush_id = flush_id;
(*it->second.refcnt)++;
flushed_buffers.emplace(flush_id, it->second.refcnt);
op->iov.push_back(it->second.buf, it->second.len);
if (it->second.buf)
{
(*it->second.refcnt)++;
flushed_buffers.emplace(flush_id, it->second.refcnt);
op->iov.push_back(it->second.buf, it->second.len);
}
calc_len += it->second.len;
}
assert(calc_len == op->len);
@@ -334,10 +385,12 @@ void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
}
auto from_it = dirty_it;
uint64_t off = dirty_it->first.stripe;
bool is_del = (dirty_it->second.buf == NULL);
while (from_it != dirty_buffers.begin())
{
from_it--;
if (from_it->second.state != CACHE_DIRTY ||
(from_it->second.buf == NULL) != is_del ||
from_it->first.inode != req.inode ||
from_it->first.stripe+from_it->second.len != off)
{
@@ -352,6 +405,7 @@ void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
while (to_it != dirty_buffers.end())
{
if (to_it->second.state != CACHE_DIRTY ||
(to_it->second.buf == NULL) != is_del ||
to_it->first.inode != req.inode ||
to_it->first.stripe != off)
{
@@ -364,6 +418,7 @@ void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
assert(writeback_queue_size > 0);
writeback_queue_size--;
writeback_bytes -= off - from_it->first.stripe;
assert(writeback_queue_size > 0 || !writeback_bytes);
flush_buffers(cli, from_it, to_it);
}
queue_copy.erase(queue_copy.begin(), queue_copy.begin()+i);
@@ -391,15 +446,27 @@ static void copy_to_op(cluster_op_t *op, uint64_t offset, uint8_t *buf, uint64_t
auto & v = op->iov.buf[iov_idx];
auto begin = (cur_offset < offset ? offset : cur_offset);
auto end = (cur_offset+v.iov_len > offset+len ? offset+len : cur_offset+v.iov_len);
memcpy(
(uint8_t*)v.iov_base + begin - cur_offset,
buf + (cur_offset <= offset ? 0 : cur_offset-offset),
end - begin
);
if (!buf)
{
memset((uint8_t*)v.iov_base + begin - cur_offset, 0, end - begin);
}
else
{
memcpy(
(uint8_t*)v.iov_base + begin - cur_offset,
buf + (cur_offset <= offset ? 0 : cur_offset-offset),
end - begin
);
}
cur_offset += v.iov_len;
iov_idx++;
}
}
if (!buf)
{
// Bitmap is initially zero, don't set it
return;
}
// Set bitmap bits
int start_bit = (offset-op->offset)/bitmap_granularity;
int end_bit = (offset-op->offset+len)/bitmap_granularity;
@@ -449,7 +516,8 @@ bool writeback_cache_t::read_from_cache(cluster_op_t *op, uint32_t bitmap_granul
{
// Copy data
dirty_copied = true;
copy_to_op(op, prev, dirty_it->second.buf + prev - dirty_it->first.stripe, cur-prev, bitmap_granularity);
copy_to_op(op, prev, dirty_it->second.buf ? (dirty_it->second.buf + prev - dirty_it->first.stripe) : NULL,
cur-prev, bitmap_granularity);
}
skip_prev = skip;
prev = cur;
@@ -461,7 +529,8 @@ bool writeback_cache_t::read_from_cache(cluster_op_t *op, uint32_t bitmap_granul
{
// Copy data
dirty_copied = true;
copy_to_op(op, prev, dirty_it->second.buf + prev - dirty_it->first.stripe, cur-prev, bitmap_granularity);
copy_to_op(op, prev, dirty_it->second.buf ? (dirty_it->second.buf + prev - dirty_it->first.stripe) : NULL,
cur-prev, bitmap_granularity);
}
dirty_it++;
}
@@ -497,8 +566,10 @@ void writeback_cache_t::fsync_ok()
{
if (uw_it->second.state == CACHE_FLUSHING)
{
if (!--(*uw_it->second.refcnt))
if (uw_it->second.buf && !--(*uw_it->second.refcnt))
{
free(uw_it->second.refcnt);
}
dirty_buffers.erase(uw_it++);
}
else

View File

@@ -889,7 +889,11 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
if (!pc.scrub_interval)
pc.scrub_interval = 0;
// Mark pool as VitastorFS pool (disable per-inode stats and block volume creation)
pc.used_for_fs = pool_item.second["used_for_fs"].as_string();
pc.used_for_app = pool_item.second["used_for_fs"].as_string();
if (pc.used_for_app != "")
pc.used_for_app = "fs:"+pc.used_for_app;
else
pc.used_for_app = pool_item.second["used_for_app"].as_string();
// Immediate Commit Mode
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value(), IMMEDIATE_ALL)
@@ -1217,6 +1221,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
.size = value["size"].uint64_value(),
.parent_id = parent_inode_num,
.readonly = value["readonly"].bool_value(),
.deleted = value["deleted"].bool_value(),
.meta = value["meta"],
.mod_revision = kv.mod_revision,
});
@@ -1305,6 +1310,10 @@ json11::Json::object etcd_state_client_t::serialize_inode_cfg(inode_config_t *cf
{
new_cfg["readonly"] = true;
}
if (cfg->deleted)
{
new_cfg["deleted"] = true;
}
if (cfg->meta.is_object())
{
new_cfg["meta"] = cfg->meta;

View File

@@ -61,7 +61,7 @@ struct pool_config_t
uint64_t pg_stripe_size;
std::map<pg_num_t, pg_config_t> pg_config;
uint64_t scrub_interval;
std::string used_for_fs;
std::string used_for_app;
int backfillfull;
};
@@ -72,6 +72,7 @@ struct inode_config_t
uint64_t size = 0;
inode_t parent_id = 0;
bool readonly = false;
bool deleted = false;
// Arbitrary metadata
json11::Json meta;
// Change revision of the metadata in etcd

View File

@@ -275,8 +275,8 @@ const char *help_text =
" --foreground 1\n"
" Stay in foreground, do not daemonize.\n"
"\n"
"vitastor-nbd unmap /dev/nbdN\n"
" Unmap an ioctl-mapped NBD device.\n"
"vitastor-nbd unmap [--force] /dev/nbdN\n"
" Unmap an ioctl-mapped NBD device. Do not check if it's actually mapped if --force is specified.\n"
"\n"
"vitastor-nbd ls [--json]\n"
" List ioctl-mapped Vitastor NBD devices, optionally in JSON format.\n"
@@ -313,7 +313,7 @@ const char *help_text =
#endif
"Use vitastor-nbd --help <command> for command details or vitastor-nbd --help --all for all details.\n"
"\n"
"All usual Vitastor config options like --config_file <path_to_config> may also be specified in CLI.\n"
"All usual Vitastor config options like --config_path <path_to_config> may also be specified in CLI.\n"
;
class nbd_proxy
@@ -372,7 +372,8 @@ public:
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") || i == narg-1 ? "1" : args[++i];
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") ||
!strcmp(opt, "force") || i == narg-1 ? "1" : args[++i];
}
else if (pos == 0)
{
@@ -381,8 +382,9 @@ public:
}
else if (pos == 1)
{
char c = 0;
int n = 0;
if (sscanf(args[i], "/dev/nbd%d", &n) > 0)
if (sscanf(args[i], "/dev/nbd%d%c", &n, &c) == 1)
cfg["dev_num"] = n;
else
cfg["dev_num"] = args[i];
@@ -404,18 +406,14 @@ public:
}
else if (cfg["command"] == "unmap")
{
if (cfg["dev_num"].is_null())
if (!cfg["dev_num"].is_number() &&
cfg["dev_num"].string_value() != "0" &&
!cfg["dev_num"].uint64_value())
{
fprintf(stderr, "device name or number is missing\n");
exit(1);
}
if (cfg["netlink"].is_null())
{
ioctl_unmap(cfg["dev_num"].uint64_value());
}
else
{
}
ioctl_unmap(cfg["dev_num"].uint64_value(), cfg["force"].bool_value());
}
#ifdef HAVE_NBD_NETLINK_H
else if (cfg["command"] == "netlink-map")
@@ -444,9 +442,18 @@ help:
}
}
void ioctl_unmap(int dev_num)
void ioctl_unmap(int dev_num, bool force)
{
char path[64] = { 0 };
// Check if mapped
sprintf(path, "/sys/block/nbd%d/pid", dev_num);
if (access(path, F_OK) != 0)
{
fprintf(stderr, "/dev/nbd%d is not mapped: /sys/block/nbd%d/pid does not exist\n", dev_num, dev_num);
if (!force)
exit(1);
}
// Run unmap
sprintf(path, "/dev/nbd%d", dev_num);
int r, nbd = open(path, O_RDWR);
if (nbd < 0)
@@ -610,36 +617,43 @@ help:
{
if (!cfg["dev_num"].is_null())
{
if (run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg) < 0)
int r;
if ((r = run_nbd(sockfd, cfg["dev_num"].int64_value(), device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg)) != 0)
{
perror("run_nbd");
fprintf(stderr, "run_nbd: %s\n", strerror(-r));
exit(1);
}
}
else
{
// Find an unused device
auto mapped = list_mapped();
int i = 0;
while (true)
{
if (mapped.find("/dev/nbd"+std::to_string(i)) != mapped.end())
{
i++;
continue;
}
int r = run_nbd(sockfd, i, device_size, NBD_FLAG_SEND_FLUSH, nbd_timeout, bg);
if (r == 0)
{
printf("/dev/nbd%d\n", i);
break;
}
else if (r == -1 && errno == ENOENT)
else if (r == -ENOENT)
{
fprintf(stderr, "No free NBD devices found\n");
exit(1);
}
else if (r == -2 && errno == EBUSY)
else if (r == -EBUSY)
{
i++;
}
else
{
perror("run_nbd");
fprintf(stderr, "run_nbd: %s\n", strerror(-r));
exit(1);
}
}
@@ -869,81 +883,114 @@ protected:
// Check handle size
assert(sizeof(cur_req.handle) == 8);
char path[64] = { 0 };
sprintf(path, "/dev/nbd%d", dev_num);
int r, nbd = open(path, O_RDWR), qd_fd;
if (nbd < 0)
int notifyfd[2] = { 0 };
if (socketpair(AF_UNIX, SOCK_STREAM, 0, notifyfd) < 0)
{
return -1;
return -errno;
}
r = ioctl(nbd, NBD_SET_SOCK, sockfd[1]);
if (r < 0)
if (!fork())
{
goto end_close;
}
r = ioctl(nbd, NBD_SET_BLKSIZE, 4096);
if (r < 0)
{
goto end_unmap;
}
r = ioctl(nbd, NBD_SET_SIZE, size);
if (r < 0)
{
goto end_unmap;
}
ioctl(nbd, NBD_SET_FLAGS, flags);
if (timeout > 0)
{
r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)timeout);
// Do all NBD configuration in the child process, after the last fork.
// Why? It's needed because there is a race condition in the Linux kernel nbd driver
// in nbd_add_socket() - it saves `current` task pointer as `nbd->task_setup` and
// then rechecks if the new `current` is the same. Problem is that if that process
// is already dead, `current` may be freed and then replaced by another process
// with the same pointer value. So the check passes and NBD allows a different process
// to set up a device which is already set up. Proper fix would have to be done in the
// kernel code, but the workaround is obviously to perform NBD setup from the process
// which will then actually call NBD_DO_IT. That process stays alive during the whole
// time of NBD device execution and the (nbd->task_setup != current) check always
// works correctly, and we don't accidentally break previous NBD devices while setting
// up a new device. Forking to check every device is of course rather slow, so we also
// do an additional check by calling list_mapped() before searching for a free NBD device.
if (bg)
{
daemonize_fork();
}
close(notifyfd[0]);
sprintf(path, "/dev/nbd%d", dev_num);
int r, nbd = open(path, O_RDWR), qd_fd;
if (nbd < 0)
{
write(notifyfd[1], &errno, sizeof(errno));
exit(1);
}
r = ioctl(nbd, NBD_SET_SOCK, sockfd[1]);
if (r < 0)
{
goto end_close;
}
r = ioctl(nbd, NBD_SET_BLKSIZE, 4096);
if (r < 0)
{
goto end_unmap;
}
}
// Configure request size
sprintf(path, "/sys/block/nbd%d/queue/max_sectors_kb", dev_num);
qd_fd = open(path, O_WRONLY);
if (qd_fd < 0)
{
goto end_unmap;
}
r = write(qd_fd, "32768", 5);
if (r != 5)
{
fprintf(stderr, "Warning: Failed to configure max_sectors_kb\n");
}
close(qd_fd);
if (!fork())
{
// Run in child
r = ioctl(nbd, NBD_SET_SIZE, size);
if (r < 0)
{
goto end_unmap;
}
ioctl(nbd, NBD_SET_FLAGS, flags);
if (timeout > 0)
{
r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)timeout);
if (r < 0)
{
goto end_unmap;
}
}
// Configure request size
sprintf(path, "/sys/block/nbd%d/queue/max_sectors_kb", dev_num);
qd_fd = open(path, O_WRONLY);
if (qd_fd < 0)
{
goto end_unmap;
}
r = write(qd_fd, "32768", 5);
if (r != 5)
{
fprintf(stderr, "Warning: Failed to configure max_sectors_kb\n");
}
close(qd_fd);
// Notify parent
errno = 0;
write(notifyfd[1], &errno, sizeof(errno));
close(notifyfd[1]);
close(sockfd[0]);
if (bg)
{
daemonize();
daemonize_reopen_stdio();
}
r = ioctl(nbd, NBD_DO_IT);
if (r < 0)
{
fprintf(stderr, "NBD device terminated with error: %s\n", strerror(errno));
fprintf(stderr, "NBD device /dev/nbd%d terminated with error: %s\n", dev_num, strerror(errno));
}
close(sockfd[1]);
ioctl(nbd, NBD_CLEAR_QUE);
ioctl(nbd, NBD_CLEAR_SOCK);
exit(0);
}
close(sockfd[1]);
close(nbd);
return 0;
end_close:
r = errno;
close(nbd);
errno = r;
return -2;
write(notifyfd[1], &errno, sizeof(errno));
close(nbd);
exit(2);
end_unmap:
r = errno;
ioctl(nbd, NBD_CLEAR_SOCK);
close(nbd);
errno = r;
return -3;
write(notifyfd[1], &errno, sizeof(errno));
ioctl(nbd, NBD_CLEAR_SOCK);
close(nbd);
exit(3);
}
// Parent - check status
close(notifyfd[1]);
int child_errno = 0;
int ok = read(notifyfd[0], &child_errno, sizeof(child_errno));
close(notifyfd[0]);
if (ok && !child_errno)
{
close(sockfd[1]);
return 0;
}
return -child_errno;
}
void submit_send()

View File

@@ -51,6 +51,11 @@
#define LOC_CORRUPTED 2
#define LOC_INCONSISTENT 4
#define OSD_LIST_PRIMARY 1
#define OSD_DEL_SUPPORT_LEFT_ON_DEAD 1
#define OSD_DEL_LEFT_ON_DEAD 2
// common request and reply headers
struct __attribute__((__packed__)) osd_op_header_t
{
@@ -196,6 +201,9 @@ struct __attribute__((__packed__)) osd_op_sec_list_t
uint64_t min_stripe, max_stripe;
// max stable object count
uint32_t stable_limit;
// flags - OSD_LIST_PRIMARY or 0
// for OSD_LIST_PRIMARY, only a single-PG listing is allowed
uint64_t flags;
};
struct __attribute__((__packed__)) osd_reply_sec_list_t
@@ -204,9 +212,11 @@ struct __attribute__((__packed__)) osd_reply_sec_list_t
// stable object version count. header.retval = total object version count
// FIXME: maybe change to the number of bytes in the reply...
uint64_t stable_count;
// flags - OSD_LIST_PRIMARY or 0
uint64_t flags;
};
// read or write to the primary OSD (must be within individual stripe)
// read, write or delete command for the primary OSD (must be within individual stripe)
struct __attribute__((__packed__)) osd_op_rw_t
{
osd_op_header_t header;
@@ -235,6 +245,20 @@ struct __attribute__((__packed__)) osd_reply_rw_t
uint64_t version;
};
struct __attribute__((__packed__)) osd_reply_del_t
{
osd_reply_header_t header;
// OSD_DEL_SUPPORT_LEFT_ON_DEAD and/or OSD_DEL_LEFT_ON_DEAD or 0
uint32_t flags;
// for deletes, if flags & OSD_DEL_LEFT_ON_DEAD:
// count of OSDs from which the object could be not deleted
// these come directly after this del_left_on_dead_list_size as uint32_t[]
// FIXME it's kind of a hack and will be removed in the future, when Vitastor will
// have 'atomic deletions', i.e. when it will be able to remember deleted objects
// and complete deletions automatically after extra OSDs are started
uint32_t left_on_dead_count;
};
// sync to the primary OSD
struct __attribute__((__packed__)) osd_op_sync_t
{
@@ -307,6 +331,7 @@ union osd_any_reply_t
osd_reply_sec_list_t sec_list;
osd_reply_show_config_t show_conf;
osd_reply_rw_t rw;
osd_reply_del_t del;
osd_reply_sync_t sync;
osd_reply_describe_t describe;
uint8_t buf[OSD_PACKET_SIZE];

View File

@@ -294,7 +294,9 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
qemu_mutex_lock(&client->mutex);
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_schedule_uring_handler(client);
#endif
qemu_mutex_unlock(&client->mutex);
while (!task->complete)
@@ -566,6 +568,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
static void vitastor_close(BlockDriverState *bs)
{
VitastorClient *client = bs->opaque;
qemu_mutex_lock(&client->mutex);
vitastor_c_destroy(client->proxy);
if (client->fds)
{
@@ -573,7 +576,6 @@ static void vitastor_close(BlockDriverState *bs)
client->fds = NULL;
client->fd_alloc = client->fd_count = 0;
}
qemu_mutex_destroy(&client->mutex);
if (client->config_path)
g_free(client->config_path);
if (client->etcd_host)
@@ -584,6 +586,8 @@ static void vitastor_close(BlockDriverState *bs)
g_free(client->image);
free(client->last_bitmap);
client->last_bitmap = NULL;
qemu_mutex_unlock(&client->mutex);
qemu_mutex_destroy(&client->mutex);
}
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
@@ -749,7 +753,9 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
qemu_mutex_lock(&client->mutex);
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_schedule_uring_handler(client);
#endif
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
@@ -783,7 +789,9 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
qemu_mutex_lock(&client->mutex);
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_schedule_uring_handler(client);
#endif
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
@@ -863,7 +871,9 @@ static int coroutine_fn vitastor_co_block_status(
task.bitmap = client->last_bitmap = NULL;
qemu_mutex_lock(&client->mutex);
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_schedule_uring_handler(client);
#endif
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
{
@@ -950,7 +960,9 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
qemu_mutex_lock(&client->mutex);
vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
vitastor_schedule_uring_handler(client);
#endif
qemu_mutex_unlock(&client->mutex);
while (!task.complete)

View File

@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor
Description: Vitastor client library
Version: 1.10.0
Version: 1.11.0
Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir}

View File

@@ -127,6 +127,7 @@ vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, v
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
self->ringloop = ringloop;
self->cli = new cluster_client_t(self->ringloop, self->tfd, cfg_json);
ringloop->loop();
return self;
}
@@ -150,6 +151,7 @@ vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_ho
self->ringloop = ringloop;
self->epmgr = new epoll_manager_t(self->ringloop);
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
ringloop->loop();
return self;
}
@@ -183,6 +185,7 @@ vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len)
self->ringloop = ringloop;
self->epmgr = new epoll_manager_t(self->ringloop);
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
ringloop->loop();
return self;
}
@@ -222,6 +225,18 @@ int vitastor_c_is_ready(vitastor_c *client)
return client->cli->is_ready();
}
void vitastor_c_on_ready(vitastor_c *client, VitastorIOHandler cb, void *opaque)
{
client->cli->on_ready([=]()
{
cb(opaque, 0);
});
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_uring_wait_ready(vitastor_c *client)
{
while (!client->cli->is_ready())
@@ -276,6 +291,10 @@ void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64
delete op;
};
client->cli->execute(op);
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
@@ -297,6 +316,31 @@ void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint6
delete op;
};
client->cli->execute(op);
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_delete(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
VitastorIOHandler cb, void *opaque)
{
cluster_op_t *op = new cluster_op_t;
op->opcode = OSD_OP_DELETE;
op->inode = inode;
op->offset = offset;
op->len = len;
op->version = check_version;
op->callback = [cb, opaque](cluster_op_t *op)
{
cb(opaque, op->retval);
delete op;
};
client->cli->execute(op);
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_read_bitmap(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
@@ -319,6 +363,10 @@ void vitastor_c_read_bitmap(vitastor_c *client, uint64_t inode, uint64_t offset,
delete op;
};
client->cli->execute(op);
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque)
@@ -331,6 +379,10 @@ void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque)
delete op;
};
client->cli->execute(op);
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_watch_inode(vitastor_c *client, char *image, VitastorIOHandler cb, void *opaque)
@@ -340,6 +392,10 @@ void vitastor_c_watch_inode(vitastor_c *client, char *image, VitastorIOHandler c
auto watch = client->cli->st_cli.watch_inode(std::string(image));
cb(opaque, (long)watch);
});
if (client->ringloop)
{
client->ringloop->loop();
}
}
void vitastor_c_close_watch(vitastor_c *client, void *handle)

View File

@@ -7,7 +7,7 @@
#define VITASTOR_QEMU_PROXY_H
// C API wrapper version
#define VITASTOR_C_API_VERSION 4
#define VITASTOR_C_API_VERSION 5
#ifndef POOL_ID_BITS
#define POOL_ID_BITS 16
@@ -51,6 +51,7 @@ vitastor_c *vitastor_c_create_epoll_json(const char **options, int options_len);
void* vitastor_c_get_internal_client(vitastor_c *client);
void vitastor_c_destroy(vitastor_c *client);
int vitastor_c_is_ready(vitastor_c *client);
void vitastor_c_on_ready(vitastor_c *client, VitastorIOHandler cb, void *opaque);
int vitastor_c_uring_register_eventfd(vitastor_c *client);
void vitastor_c_uring_wait_ready(vitastor_c *client);
void vitastor_c_uring_handle_events(vitastor_c *client);
@@ -62,6 +63,8 @@ void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
struct iovec *iov, int iovcnt, VitastorIOHandler cb, void *opaque);
void vitastor_c_delete(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
VitastorIOHandler cb, void *opaque);
void vitastor_c_read_bitmap(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
int with_parents, VitastorReadBitmapHandler cb, void *opaque);
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque);

View File

@@ -51,8 +51,9 @@ static const char* help_text =
" Rename, resize image or change its readonly status. Images with children can't be made read-write.\n"
" If the new size is smaller than the old size, extra data will be purged.\n"
" You should resize file system in the image, if present, before shrinking it.\n"
" -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n"
" --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n"
" --deleted 1|0 Set/clear 'deleted image' flag (set automatically during unfinished deletes).\n"
" -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n"
" --down-ok Proceed with shrinking even if some data will be left on unavailable OSDs.\n"
"\n"
"vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M]\n"
" [count=N] [seek/oseek=N] [skip/iseek=M] [iodepth=N] [status=progress]\n"
@@ -101,6 +102,7 @@ static const char* help_text =
" Requires more memory, but allows to show correct removal progress.\n"
" --min-offset Purge only data starting with specified offset.\n"
" --max-offset Purge only data before specified offset.\n"
" --client_wait_up_timeout 16 Timeout for waiting until PGs are up in seconds.\n"
"\n"
"vitastor-cli merge-data <from> <to> [--target <target>]\n"
" Merge layer data without changing metadata. Merge <from>..<to> to <target>.\n"
@@ -184,7 +186,8 @@ static const char* help_text =
" --raw_placement <rules> Specify raw PG generation rules (see documentation for details)\n"
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
" --scrub_interval <time> Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y\n"
" --used_for_fs <name> Mark pool as used for VitastorFS with metadata in image <name>\n"
" --used_for_app fs:<name> Mark pool as used for VitastorFS with metadata in image <name>\n"
" --used_for_app s3:<name> Mark pool as used for S3 location with name <name>\n"
" --pg_stripe_size <number> Increase object grouping stripe\n"
" --max_osd_combinations 10000 Maximum number of random combinations for LP solver input\n"
" --wait Wait for the new pool to come online\n"
@@ -196,7 +199,7 @@ static const char* help_text =
"vitastor-cli modify-pool|pool-modify <id|name> [--name <new_name>] [PARAMETERS...]\n"
" Modify an existing pool. Modifiable parameters:\n"
" [-s|--pg_size <number>] [--pg_minsize <number>] [-n|--pg_count <count>]\n"
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--used_for_fs <name>]\n"
" [--failure_domain <level>] [--root_node <node>] [--osd_tags <tags>] [--used_for_app <type>:<name>]\n"
" [--max_osd_combinations <number>] [--primary_affinity_tags <tags>] [--scrub_interval <time>]\n"
" [--level_placement <rules>] [--raw_placement <rules>]\n"
" Non-modifiable parameters (changing them WILL lead to data loss):\n"
@@ -222,7 +225,7 @@ static const char* help_text =
"Use vitastor-cli --help <command> for command details or vitastor-cli --help --all for all details.\n"
"\n"
"GLOBAL OPTIONS:\n"
" --config_file FILE Path to Vitastor configuration file\n"
" --config_path FILE Path to Vitastor configuration file\n"
" --etcd_address URL Etcd connection address\n"
" --iodepth N Send N operations in parallel to each OSD when possible (default 32)\n"
" --parallel_osds M Work with M osds in parallel when possible (default 4)\n"
@@ -430,13 +433,22 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
else if (cmd[0] == "rm")
{
// Remove multiple snapshots and rebase their children
if (cmd.size() > 1)
if (cfg["exact"].bool_value() || cfg["matching"].bool_value())
{
cfg["from"] = cmd[1];
if (cmd.size() > 2)
cfg["to"] = cmd[2];
cmd.erase(cmd.begin(), cmd.begin()+1);
cfg["globs"] = cmd;
action_cb = p->start_rm_wildcard(cfg);
}
else
{
if (cmd.size() > 1)
{
cfg["from"] = cmd[1];
if (cmd.size() > 2)
cfg["to"] = cmd[2];
}
action_cb = p->start_rm(cfg);
}
action_cb = p->start_rm(cfg);
}
else if (cmd[0] == "describe")
{

View File

@@ -30,7 +30,7 @@ struct cli_result_t
class cli_tool_t
{
public:
uint64_t iodepth = 4, parallel_osds = 32;
uint64_t iodepth = 32, parallel_osds = 4;
bool progress = false;
bool list_first = false;
bool json_output = false;

View File

@@ -92,12 +92,12 @@ struct image_creator_t
{
new_pool_id = pools.begin()->first;
}
if (new_pool_id && !pools.at(new_pool_id).used_for_fs.empty() && !force)
if (new_pool_id && !pools.at(new_pool_id).used_for_app.empty() && !force)
{
result = (cli_result_t){
.err = EINVAL,
.text = "Pool "+pools.at(new_pool_id).name+
" is used for VitastorFS "+pools.at(new_pool_id).used_for_fs+
" is used for application "+pools.at(new_pool_id).used_for_app+
". Use --force if you really know what you are doing",
};
state = 100;
@@ -192,7 +192,7 @@ resume_3:
}
} while (!parent->etcd_result["succeeded"].bool_value());
// Save into inode_config for library users to be able to take it from there immediately
new_cfg.mod_revision = parent->etcd_result["responses"][0]["response_put"]["header"]["revision"].uint64_value();
new_cfg.mod_revision = parent->etcd_result["header"]["revision"].uint64_value();
parent->cli->st_cli.insert_inode_config(new_cfg);
result = (cli_result_t){
.err = 0,
@@ -215,6 +215,7 @@ resume_3:
goto resume_3;
else if (state == 4)
goto resume_4;
// FIXME: take all info from etcd requests, not mixed with st_cli.inode_config
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name+"@"+new_snap)
@@ -269,7 +270,7 @@ resume_4:
}
} while (!parent->etcd_result["succeeded"].bool_value());
// Save into inode_config for library users to be able to take it from there immediately
new_cfg.mod_revision = parent->etcd_result["responses"][0]["response_put"]["header"]["revision"].uint64_value();
new_cfg.mod_revision = parent->etcd_result["header"]["revision"].uint64_value();
parent->cli->st_cli.insert_inode_config(new_cfg);
result = (cli_result_t){
.err = 0,
@@ -286,6 +287,7 @@ resume_4:
json11::Json::object get_next_id()
{
assert(new_pool_id);
return json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
@@ -321,6 +323,17 @@ resume_4:
goto resume_2;
else if (state == 3)
goto resume_3;
if (!new_pool_id)
{
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name)
{
new_pool_id = INODE_POOL(ic.first);
break;
}
}
}
parent->etcd_txn(json11::Json::object { { "success", json11::Json::array {
get_next_id(),
json11::Json::object {
@@ -401,7 +414,7 @@ resume_3:
auto kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]);
size = kv.value["size"].uint64_value();
new_parent_id = kv.value["parent_id"].uint64_value();
uint64_t parent_pool_id = kv.value["parent_pool_id"].uint64_value();
uint64_t parent_pool_id = kv.value["parent_pool"].uint64_value();
if (new_parent_id)
{
new_parent_id = INODE_WITH_POOL(parent_pool_id ? parent_pool_id : old_pool_id, new_parent_id);
@@ -413,7 +426,7 @@ resume_3:
void attempt_create()
{
new_cfg = {
new_cfg = (inode_config_t){
.num = INODE_WITH_POOL(new_pool_id, new_id),
.name = image_name,
.size = size,

View File

@@ -68,6 +68,7 @@ struct image_lister_t
{ "pool_name", good_pool ? pool_it->second.name : "? (ID:"+std::to_string(INODE_POOL(ic.second.num))+")" },
{ "inode_num", INODE_NO_POOL(ic.second.num) },
{ "inode_id", ic.second.num },
{ "deleted", ic.second.deleted },
};
if (ic.second.parent_id)
{
@@ -371,7 +372,8 @@ resume_1:
kv.second["delete_q"] = format_q(kv.second["delete_queue"].number_value());
}
kv.second["size_fmt"] = format_size(kv.second["size"].uint64_value());
kv.second["ro"] = kv.second["readonly"].bool_value() ? "RO" : "-";
kv.second["ro"] = kv.second["deleted"].bool_value() ? "DEL" :
(kv.second["readonly"].bool_value() ? "RO" : "-");
}
result.text = print_table(to_list(), cols, parent->color);
state = 100;
@@ -544,7 +546,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_ls(json11::Json cfg)
lister->list_pool_name = lister->list_pool_id ? "" : cfg["pool"].as_string();
lister->show_stats = cfg["long"].bool_value();
lister->show_delete = cfg["del"].bool_value();
lister->sort_field = cfg["sort"].string_value();
lister->sort_field = cfg["sort"].string_value() != "" ? cfg["sort"].string_value() : "name";
lister->reverse = cfg["reverse"].bool_value();
lister->max_count = cfg["count"].uint64_value();
for (auto & item: cfg["names"].array_items())

View File

@@ -51,6 +51,7 @@ struct snap_merger_t
btree::safe_btree_set<uint64_t> merge_offsets;
btree::safe_btree_set<uint64_t>::iterator oit;
std::map<inode_t, std::vector<uint64_t>> layer_lists;
std::map<inode_t, int> list_errcode;
std::map<inode_t, uint64_t> layer_block_size;
std::map<inode_t, uint64_t> layer_list_pos;
std::vector<snap_rw_op_t*> continue_rwo, continue_rwo2;
@@ -251,6 +252,7 @@ struct snap_merger_t
// Get parents and so on
start_merge();
// First list lower layers
list_errcode.clear();
list_layers(true);
state = 1;
resume_1:
@@ -259,6 +261,15 @@ struct snap_merger_t
// Wait for lists
return;
}
if (list_errcode.size())
{
result = (cli_result_t){
.err = EIO,
.text = "Failed to list lower layer(s) in some PGs, merging would be incorrect",
};
state = 100;
return;
}
if (merge_offsets.size() > 0)
{
state = 2;
@@ -295,6 +306,7 @@ struct snap_merger_t
state = 3;
resume_3:
// Then list upper layers
list_errcode.clear();
list_layers(false);
state = 4;
resume_4:
@@ -303,6 +315,15 @@ struct snap_merger_t
// Wait for lists
return;
}
if (list_errcode.size() > 0)
{
result = (cli_result_t){
.err = EIO,
.text = "Failed to list upper layer(s) in some PGs, merging would be incorrect",
};
state = 100;
return;
}
state = 5;
processed = 0;
to_process = merge_offsets.size();
@@ -368,9 +389,13 @@ struct snap_merger_t
if (lower ? (sp.second < target_rank) : (sp.second > target_rank))
{
lists_todo++;
inode_list_t* lst = parent->cli->list_inode_start(src, [this, src](
inode_list_t *lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)
parent->cli->list_inode(src, 0, 0, parent->parallel_osds, [this, src](
int errcode, int pgs_left, pg_num_t pg_num, std::set<object_id>&& objects)
{
if (errcode)
{
list_errcode[src] = errcode;
}
uint64_t layer_block = layer_block_size.at(src);
for (object_id obj: objects)
{
@@ -391,12 +416,18 @@ struct snap_merger_t
layer_list[pos++] = obj.stripe;
}
}
if (status & INODE_LIST_DONE)
if (!pgs_left)
{
auto & name = parent->cli->st_cli.inode_config.at(src).name;
if (parent->progress)
if (list_errcode.find(src) != list_errcode.end())
{
printf("Got listing of layer %s (inode %ju in pool %u)\n", name.c_str(), INODE_NO_POOL(src), INODE_POOL(src));
fprintf(stderr, "Failed to get listing of layer %s (inode %ju in pool %u): %s (code %d)\n",
name.c_str(), INODE_NO_POOL(src), INODE_POOL(src), strerror(-list_errcode[src]), list_errcode[src]);
}
else if (parent->progress)
{
fprintf(stderr, "Got listing of layer %s (inode %ju in pool %u)\n",
name.c_str(), INODE_NO_POOL(src), INODE_POOL(src));
}
if (delete_source)
{
@@ -406,12 +437,7 @@ struct snap_merger_t
lists_todo--;
continue_merge_reent();
}
else
{
parent->cli->list_inode_next(lst, 1);
}
});
parent->cli->list_inode_next(lst, parent->parallel_osds);
}
}
}
@@ -428,7 +454,7 @@ struct snap_merger_t
{
if (op->retval < 0)
{
fprintf(stderr, "error reading target bitmap at offset %jx: %s\n", op->offset, strerror(-op->retval));
fprintf(stderr, "Warning: failed to read target bitmap at offset %jx: %s\n", op->offset, strerror(-op->retval));
}
else
{
@@ -585,7 +611,7 @@ struct snap_merger_t
subop->inode = inode_num;
subop->offset = offset;
subop->len = 0;
subop->flags = OSD_OP_IGNORE_READONLY;
subop->flags = OSD_OP_IGNORE_READONLY | OSD_OP_WAIT_UP_TIMEOUT;
subop->callback = [](cluster_op_t *subop)
{
if (subop->retval != 0)

View File

@@ -4,6 +4,7 @@
#include "cli.h"
#include "cluster_client.h"
#include "str_util.h"
#include "json_util.h"
// Rename, resize image (and purge extra data on shrink) or change its readonly status
struct image_changer_t
@@ -15,6 +16,7 @@ struct image_changer_t
uint64_t new_size = 0;
bool force_size = false, inc_size = false;
bool set_readonly = false, set_readwrite = false, force = false;
bool set_deleted = false, new_deleted = false;
bool down_ok = false;
// interval between fsyncs
int fsync_interval = 128;
@@ -82,6 +84,7 @@ struct image_changer_t
}
if ((!set_readwrite || !cfg.readonly) &&
(!set_readonly || cfg.readonly) &&
(!set_deleted || cfg.deleted == new_deleted) &&
(!new_size && !force_size || cfg.size == new_size || cfg.size >= new_size && inc_size) &&
(new_name == "" || new_name == image_name))
{
@@ -141,6 +144,10 @@ resume_1:
return;
}
}
if (set_deleted)
{
cfg.deleted = new_deleted;
}
if (new_name != "")
{
cfg.name = new_name;
@@ -219,7 +226,7 @@ resume_2:
return;
}
// Save into inode_config for library users to be able to take it from there immediately
cfg.mod_revision = parent->etcd_result["responses"][0]["response_put"]["header"]["revision"].uint64_value();
cfg.mod_revision = parent->etcd_result["header"]["revision"].uint64_value();
if (new_name != "")
{
parent->cli->st_cli.inode_by_name.erase(image_name);
@@ -251,6 +258,8 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_modify(json11::Json cfg)
changer->force = cfg["force"].bool_value();
changer->set_readonly = cfg["readonly"].bool_value();
changer->set_readwrite = cfg["readwrite"].bool_value();
changer->set_deleted = !cfg["deleted"].is_null();
changer->new_deleted = json_is_true(cfg["deleted"]);
changer->fsync_interval = cfg["fsync_interval"].uint64_value();
if (!changer->fsync_interval)
changer->fsync_interval = 128;

View File

@@ -90,8 +90,8 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
value = sz;
}
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_fs" ||
key == "raw_placement")
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_app" ||
key == "used_for_fs" || key == "raw_placement")
{
if (!value.is_string())
{
@@ -156,8 +156,13 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
{
new_cfg.erase("parity_chunks");
}
if (new_cfg.find("used_for_fs") != new_cfg.end() && new_cfg["used_for_fs"].string_value() == "")
if (new_cfg.find("used_for_app") != new_cfg.end() && new_cfg["used_for_app"].string_value() == "")
{
new_cfg.erase("used_for_app");
}
if (new_cfg.find("used_for_app") == new_cfg.end() && new_cfg.find("used_for_fs") != new_cfg.end())
{
new_cfg["used_for_app"] = "fs:"+new_cfg["used_for_fs"].string_value();
new_cfg.erase("used_for_fs");
}

View File

@@ -460,7 +460,7 @@ resume_8:
}
if (osd_bs && osd_bs != UINT32_MAX && osd_bs != parent->cli->st_cli.global_block_size)
{
fprintf(stderr, "Auto-selecting block_size=%s because all pool OSDs use it\n", format_size(osd_bs).c_str());
fprintf(stderr, "Auto-selecting block_size=%s because all pool OSDs use it\n", format_size(osd_bs, false, true).c_str());
upd["block_size"] = osd_bs;
}
}
@@ -479,7 +479,7 @@ resume_8:
}
if (osd_bg && osd_bg != UINT32_MAX && osd_bg != parent->cli->st_cli.global_bitmap_granularity)
{
fprintf(stderr, "Auto-selecting bitmap_granularity=%s because all pool OSDs use it\n", format_size(osd_bg).c_str());
fprintf(stderr, "Auto-selecting bitmap_granularity=%s because all pool OSDs use it\n", format_size(osd_bg, false, true).c_str());
upd["bitmap_granularity"] = osd_bg;
}
}

Some files were not shown because too many files have changed in this diff Show More