Compare commits

...

23 Commits

Author SHA1 Message Date
0d1b6d0760 OpenSSL support in http_client.cpp
All checks were successful
Test / test_change_pg_count_ec (push) Has been skipped
Test / test_change_pg_size (push) Has been skipped
Test / test_create_nomaxid (push) Has been skipped
Test / test_etcd_fail (push) Has been skipped
Test / test_failure_domain (push) Has been skipped
Test / test_interrupted_rebalance (push) Has been skipped
Test / test_interrupted_rebalance_imm (push) Has been skipped
Test / test_interrupted_rebalance_ec (push) Has been skipped
Test / test_interrupted_rebalance_ec_imm (push) Has been skipped
Test / test_minsize_1 (push) Has been skipped
Test / test_move_reappear (push) Has been skipped
Test / test_rebalance_verify (push) Has been skipped
Test / test_rebalance_verify_imm (push) Has been skipped
Test / test_rebalance_verify_ec (push) Has been skipped
Test / test_rebalance_verify_ec_imm (push) Has been skipped
Test / test_rm (push) Has been skipped
Test / test_snapshot (push) Has been skipped
Test / test_snapshot_ec (push) Has been skipped
Test / test_splitbrain (push) Has been skipped
Test / test_write (push) Has been skipped
Test / test_write_xor (push) Has been skipped
Test / test_write_no_same (push) Has been skipped
Test / test_heal_pg_size_2 (push) Has been skipped
Test / test_heal_ec (push) Has been skipped
Test / test_scrub (push) Has been skipped
Test / test_scrub_zero_osd_2 (push) Has been skipped
Test / test_scrub_xor (push) Has been skipped
Test / test_scrub_pg_size_3 (push) Has been skipped
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been skipped
Test / test_scrub_ec (push) Has been skipped
2023-06-19 02:17:45 +03:00
83cacba226 Fix patched-qemu build 2023-06-19 01:47:55 +03:00
2c8f0bc6d5 Add a note about Debian 12 2023-06-19 01:08:41 +03:00
7ae5b0e368 Add patch for libvirt 9.0 2023-06-19 01:07:08 +03:00
926be372fd Release 0.9.2
All checks were successful
Test / test_change_pg_count (push) Successful in 35s
Test / test_change_pg_count_ec (push) Successful in 34s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 1m1s
Test / test_failure_domain (push) Successful in 9s
Test / test_interrupted_rebalance (push) Successful in 1m26s
Test / test_interrupted_rebalance_imm (push) Successful in 2m15s
Test / test_interrupted_rebalance_ec (push) Successful in 1m31s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_minsize_1 (push) Successful in 13s
Test / test_rebalance_verify (push) Successful in 2m49s
Test / test_rebalance_verify_imm (push) Successful in 2m45s
Test / test_rebalance_verify_ec (push) Successful in 3m9s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m3s
Test / test_rm (push) Successful in 12s
Test / test_snapshot (push) Successful in 24s
Test / test_snapshot_ec (push) Successful in 35s
Test / test_splitbrain (push) Successful in 21s
Test / test_write (push) Successful in 1m10s
Test / test_write_xor (push) Successful in 2m23s
Test / test_write_no_same (push) Successful in 19s
Test / test_heal_pg_size_2 (push) Successful in 3m58s
Test / test_heal_ec (push) Successful in 4m8s
Test / test_scrub (push) Successful in 1m3s
Test / test_scrub_zero_osd_2 (push) Successful in 36s
Test / test_scrub_xor (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 51s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s
Test / test_scrub_ec (push) Successful in 43s
- Measure and report scrub I/O statistics in vitastor-cli status
- Make aggregated statistics in vitastor-cli status much smoother
  (first derive, then sum instead of first summing and then deriving)
- Fix an old rare bug leading to journal corruption
  (try to use scrub if you think you're affected...)
- Do not start EC PGs without at least <data chunks> OSDs in each old set
  (prevents spurious read errors with EC during reconnections/restarts)
- Fix failed assert(!scrub_list_op) on OSD restart with pending scrubs
- Fix future planned scrubs not starting because of incorrect time comparison
- Build packages for Debian 12 (Bookworm)
2023-06-18 19:44:33 +03:00
6222779b52 Support debian bookworm (12) build 2023-06-18 19:44:33 +03:00
a4186e20aa First derive, then sum per-OSD statistics instead of first summing and then deriving
All checks were successful
Test / test_change_pg_count (push) Successful in 43s
Test / test_change_pg_count_ec (push) Successful in 37s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_failure_domain (push) Successful in 16s
Test / test_interrupted_rebalance (push) Successful in 1m49s
Test / test_interrupted_rebalance_imm (push) Successful in 1m38s
Test / test_interrupted_rebalance_ec (push) Successful in 1m49s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s
Test / test_minsize_1 (push) Successful in 13s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 3m2s
Test / test_rebalance_verify_imm (push) Successful in 2m53s
Test / test_rebalance_verify_ec (push) Successful in 3m9s
Test / test_rebalance_verify_ec_imm (push) Successful in 5m27s
Test / test_rm (push) Successful in 17s
Test / test_snapshot (push) Successful in 34s
Test / test_snapshot_ec (push) Successful in 29s
Test / test_splitbrain (push) Successful in 22s
Test / test_write (push) Successful in 37s
Test / test_write_xor (push) Successful in 44s
Test / test_write_no_same (push) Successful in 16s
Test / test_heal_pg_size_2 (push) Successful in 3m31s
Test / test_heal_ec (push) Successful in 4m20s
Test / test_scrub (push) Successful in 38s
Test / test_scrub_zero_osd_2 (push) Successful in 30s
Test / test_scrub_xor (push) Successful in 32s
Test / test_scrub_pg_size_3 (push) Successful in 42s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 37s
Test / test_scrub_ec (push) Successful in 34s
This makes statistics reported by vitastor-cli status much smoother
2023-06-18 01:32:24 +03:00
c74a424930 Report scrub I/O in vitastor-cli status 2023-06-17 21:11:21 +03:00
32f2c4dd27 Measure scrub statistics 2023-06-17 20:56:26 +03:00
3ad16b9a1a Fix auto_scrubs not starting because of < vs <= =))
All checks were successful
Test / test_change_pg_count (push) Successful in 41s
Test / test_change_pg_count_ec (push) Successful in 36s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 1m20s
Test / test_failure_domain (push) Successful in 12s
Test / test_interrupted_rebalance (push) Successful in 2m1s
Test / test_interrupted_rebalance_imm (push) Successful in 1m55s
Test / test_interrupted_rebalance_ec (push) Successful in 1m48s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_move_reappear (push) Successful in 51s
Test / test_rebalance_verify (push) Successful in 3m19s
Test / test_rebalance_verify_imm (push) Successful in 3m9s
Test / test_rebalance_verify_ec (push) Successful in 3m21s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m4s
Test / test_rm (push) Successful in 17s
Test / test_snapshot (push) Successful in 23s
Test / test_snapshot_ec (push) Successful in 26s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 1m35s
Test / test_write_xor (push) Successful in 2m29s
Test / test_write_no_same (push) Successful in 29s
Test / test_heal_pg_size_2 (push) Successful in 4m11s
Test / test_heal_ec (push) Successful in 5m4s
Test / test_scrub (push) Successful in 55s
Test / test_scrub_zero_osd_2 (push) Successful in 41s
Test / test_scrub_xor (push) Successful in 37s
Test / test_scrub_pg_size_3 (push) Successful in 57s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 46s
Test / test_scrub_ec (push) Successful in 31s
2023-06-17 17:32:21 +03:00
1c2df841c2 Fix failed assert(!scrub_list_op) on OSD restart with pending scrubs
All checks were successful
Test / test_change_pg_count (push) Successful in 38s
Test / test_change_pg_count_ec (push) Successful in 38s
Test / test_change_pg_size (push) Successful in 9s
Test / test_create_nomaxid (push) Successful in 8s
Test / test_etcd_fail (push) Successful in 53s
Test / test_failure_domain (push) Successful in 15s
Test / test_interrupted_rebalance (push) Successful in 2m3s
Test / test_interrupted_rebalance_imm (push) Successful in 2m55s
Test / test_interrupted_rebalance_ec (push) Successful in 2m43s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m31s
Test / test_minsize_1 (push) Successful in 14s
Test / test_move_reappear (push) Successful in 17s
Test / test_rebalance_verify (push) Successful in 3m9s
Test / test_rebalance_verify_imm (push) Successful in 3m10s
Test / test_rebalance_verify_ec (push) Successful in 3m15s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m45s
Test / test_rm (push) Successful in 18s
Test / test_snapshot (push) Successful in 35s
Test / test_snapshot_ec (push) Successful in 18s
Test / test_splitbrain (push) Successful in 15s
Test / test_write (push) Successful in 35s
Test / test_write_xor (push) Successful in 1m12s
Test / test_write_no_same (push) Successful in 15s
Test / test_heal_pg_size_2 (push) Successful in 3m41s
Test / test_scrub (push) Successful in 33s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_3 (push) Successful in 51s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 31s
Test / test_scrub_ec (push) Successful in 26s
2023-06-17 17:02:54 +03:00
aa5dacc7a9 Do not start EC PGs without at least pg_data_size connections to old OSDs from each set
All checks were successful
Test / test_change_pg_count (push) Successful in 36s
Test / test_change_pg_count_ec (push) Successful in 38s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 1m13s
Test / test_failure_domain (push) Successful in 11s
Test / test_interrupted_rebalance (push) Successful in 1m51s
Test / test_interrupted_rebalance_imm (push) Successful in 1m43s
Test / test_interrupted_rebalance_ec (push) Successful in 1m47s
Test / test_minsize_1 (push) Successful in 43s
Test / test_move_reappear (push) Successful in 43s
Test / test_rebalance_verify (push) Successful in 3m16s
Test / test_rebalance_verify_imm (push) Successful in 3m9s
Test / test_rebalance_verify_ec (push) Successful in 3m8s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m10s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 22s
Test / test_snapshot_ec (push) Successful in 25s
Test / test_splitbrain (push) Successful in 15s
Test / test_write (push) Successful in 1m44s
Test / test_write_xor (push) Successful in 2m29s
Test / test_write_no_same (push) Successful in 22s
Test / test_heal_pg_size_2 (push) Successful in 4m37s
Test / test_heal_ec (push) Successful in 4m4s
Test / test_scrub (push) Successful in 48s
Test / test_scrub_zero_osd_2 (push) Successful in 41s
Test / test_scrub_xor (push) Successful in 39s
Test / test_scrub_pg_size_3 (push) Successful in 47s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 41s
Test / test_scrub_ec (push) Successful in 34s
2023-06-17 02:16:30 +03:00
affe8fc270 Raise timeout also for add_osd and rebalance_verify 2023-06-17 00:29:03 +03:00
4fdc49bdc7 Add another assert-type check (it does not fire, just as a safety measure for the future) 2023-06-17 00:07:22 +03:00
86b4682975 Put get_trim_pos into the "critical section". Fixes rare journal corruption issue
The consequence of this issue was that in some very rare cases (only reproduced
under load in CI when running 4+ tests in parallel) small write data written to
journal could overwrite journal entries.

Also add an assert-type safety check to be able to catch this issue in the
future again in case of a regression.
2023-06-17 00:06:42 +03:00
bdd48e4cf1 Release 0.9.1
All checks were successful
Test / test_change_pg_count (push) Successful in 35s
Test / test_change_pg_count_ec (push) Successful in 32s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 6s
Test / test_etcd_fail (push) Successful in 49s
Test / test_failure_domain (push) Successful in 9s
Test / test_interrupted_rebalance (push) Successful in 1m4s
Test / test_interrupted_rebalance_imm (push) Successful in 57s
Test / test_interrupted_rebalance_ec (push) Successful in 1m2s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 53s
Test / test_minsize_1 (push) Successful in 14s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 1m45s
Test / test_rebalance_verify_imm (push) Successful in 1m41s
Test / test_rebalance_verify_ec (push) Successful in 1m53s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m48s
Test / test_rm (push) Successful in 10s
Test / test_snapshot (push) Successful in 14s
Test / test_snapshot_ec (push) Successful in 16s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 49s
Test / test_write_xor (push) Successful in 1m1s
Test / test_write_no_same (push) Successful in 11s
Test / test_heal_pg_size_2 (push) Successful in 3m12s
Test / test_scrub (push) Successful in 28s
Test / test_scrub_zero_osd_2 (push) Successful in 22s
Test / test_scrub_xor (push) Successful in 34s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 26s
Test / test_scrub_ec (push) Successful in 24s
- Fix "Client XX command out of sync" messages sometimes happening on OSD reconnections
- Fix a bug where EC reads parallel with writes to the same object failed with -ERANGE error
- Slightly reduce the amount of metadata writes during journal flushing
- Correctly unmap NBD volumes when Proxmox forces map_volume use (with SWTPM and maybe some other cases)
2023-06-10 11:42:49 +03:00
af8c3411cd Correctly unmap NBD when Proxmox forces map_volume use (with SWTPM and maybe something else) 2023-06-08 01:31:49 +03:00
9c405009f3 Use randrw in test_heal
All checks were successful
Test / test_change_pg_count (push) Successful in 33s
Test / test_change_pg_count_ec (push) Successful in 32s
Test / test_change_pg_size (push) Successful in 8s
Test / test_create_nomaxid (push) Successful in 9s
Test / test_etcd_fail (push) Successful in 51s
Test / test_failure_domain (push) Successful in 12s
Test / test_interrupted_rebalance (push) Successful in 1m4s
Test / test_interrupted_rebalance_imm (push) Successful in 1m0s
Test / test_interrupted_rebalance_ec (push) Successful in 1m20s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 55s
Test / test_minsize_1 (push) Successful in 22s
Test / test_move_reappear (push) Successful in 22s
Test / test_rebalance_verify (push) Successful in 1m47s
Test / test_rebalance_verify_imm (push) Successful in 1m45s
Test / test_rebalance_verify_ec (push) Successful in 2m3s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m52s
Test / test_rm (push) Successful in 11s
Test / test_snapshot (push) Successful in 15s
Test / test_snapshot_ec (push) Successful in 16s
Test / test_splitbrain (push) Successful in 12s
Test / test_write (push) Successful in 31s
Test / test_write_xor (push) Successful in 47s
Test / test_write_no_same (push) Successful in 11s
Test / test_heal_pg_size_2 (push) Successful in 3m12s
Test / test_scrub (push) Successful in 24s
Test / test_scrub_zero_osd_2 (push) Successful in 26s
Test / test_scrub_xor (push) Successful in 26s
Test / test_scrub_pg_size_3 (push) Successful in 25s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 42s
Test / test_scrub_ec (push) Successful in 33s
2023-06-03 00:49:53 +03:00
f9fbea25a4 Remove double write when old and new locations are in the same metadata block
Also add another metadata entry fool-safety check which, ideally, will never fire %)
2023-06-03 00:47:10 +03:00
2c9a10d081 Fix an idiotic bug leading to failed reads with -ERANGE with EC :D 2023-06-03 00:44:52 +03:00
150968070f Slightly improve some debug prints
All checks were successful
Test / test_change_pg_count (push) Successful in 30s
Test / test_change_pg_count_ec (push) Successful in 31s
Test / test_change_pg_size (push) Successful in 7s
Test / test_create_nomaxid (push) Successful in 7s
Test / test_etcd_fail (push) Successful in 45s
Test / test_failure_domain (push) Successful in 8s
Test / test_interrupted_rebalance (push) Successful in 1m3s
Test / test_interrupted_rebalance_imm (push) Successful in 55s
Test / test_interrupted_rebalance_ec (push) Successful in 1m30s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 57s
Test / test_minsize_1 (push) Successful in 20s
Test / test_move_reappear (push) Successful in 16s
Test / test_rebalance_verify (push) Successful in 1m49s
Test / test_rebalance_verify_imm (push) Successful in 1m40s
Test / test_rebalance_verify_ec (push) Successful in 2m4s
Test / test_rebalance_verify_ec_imm (push) Successful in 1m51s
Test / test_rm (push) Successful in 14s
Test / test_snapshot (push) Successful in 16s
Test / test_snapshot_ec (push) Successful in 17s
Test / test_splitbrain (push) Successful in 14s
Test / test_write (push) Successful in 41s
Test / test_write_xor (push) Successful in 49s
Test / test_write_no_same (push) Successful in 10s
Test / test_heal_pg_size_2 (push) Successful in 2m58s
Test / test_scrub (push) Successful in 23s
Test / test_scrub_zero_osd_2 (push) Successful in 19s
Test / test_scrub_xor (push) Successful in 17s
Test / test_scrub_pg_size_3 (push) Successful in 24s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 28s
Test / test_scrub_ec (push) Successful in 25s
2023-05-29 01:04:16 +03:00
cdfc74665b Close client FDs only when destroying the client, after handling all async reads/writes
All checks were successful
Test / test_change_pg_count (push) Successful in 50s
Test / test_change_pg_count_ec (push) Successful in 58s
Test / test_change_pg_size (push) Successful in 17s
Test / test_create_nomaxid (push) Successful in 19s
Test / test_etcd_fail (push) Successful in 58s
Test / test_failure_domain (push) Successful in 14s
Test / test_interrupted_rebalance (push) Successful in 1m31s
Test / test_interrupted_rebalance_imm (push) Successful in 1m0s
Test / test_interrupted_rebalance_ec (push) Successful in 1m23s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m16s
Test / test_minsize_1 (push) Successful in 38s
Test / test_move_reappear (push) Successful in 54s
Test / test_rebalance_verify (push) Successful in 2m26s
Test / test_rebalance_verify_imm (push) Successful in 2m7s
Test / test_rebalance_verify_ec (push) Successful in 2m51s
Test / test_rebalance_verify_ec_imm (push) Successful in 2m15s
Test / test_rm (push) Successful in 22s
Test / test_snapshot (push) Successful in 40s
Test / test_snapshot_ec (push) Successful in 34s
Test / test_splitbrain (push) Successful in 23s
Test / test_write (push) Successful in 1m7s
Test / test_write_xor (push) Successful in 2m13s
Test / test_write_no_same (push) Successful in 18s
Test / test_heal_pg_size_2 (push) Successful in 5m14s
Test / test_scrub (push) Successful in 36s
Test / test_scrub_zero_osd_2 (push) Successful in 40s
Test / test_scrub_xor (push) Successful in 54s
Test / test_scrub_pg_size_3 (push) Successful in 54s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m12s
Test / test_scrub_ec (push) Successful in 1m10s
Fixes "Client XX command out of sync" sometimes happening on reconnections
2023-05-25 00:52:43 +03:00
3f60fecd7c Fix typo 2023-05-21 18:37:01 +03:00
49 changed files with 1177 additions and 219 deletions

View File

@@ -71,7 +71,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 3
timeout-minutes: 10
run: /root/vitastor/tests/test_add_osd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -323,7 +323,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 3
timeout-minutes: 10
run: /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -341,7 +341,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 3
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -359,7 +359,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 3
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -377,7 +377,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 3
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'

View File

@@ -9,7 +9,8 @@ for my $line (<>)
chomp $line;
my $test_name = $1;
my $timeout = 3;
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_interrupted_rebalance')
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
$test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
{
$timeout = 10;
}

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "0.9.0")
set(VERSION "0.9.2")
add_subdirectory(src)

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.9.0
VERSION ?= v0.9.2
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.9.0
image: vitalif/vitastor-csi:v0.9.2
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.9.0
image: vitalif/vitastor-csi:v0.9.2
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.9.0"
vitastorCSIDriverVersion = "0.9.2"
)
// Config struct fills the parameters of request or user input

7
debian/build-vitastor-bookworm.sh vendored Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
cat < vitastor.Dockerfile > ../Dockerfile
cd ..
mkdir -p packages
sudo podman build --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
rm Dockerfile

4
debian/changelog vendored
View File

@@ -1,10 +1,10 @@
vitastor (0.9.0-1) unstable; urgency=medium
vitastor (0.9.2-1) unstable; urgency=medium
* Bugfixes
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
vitastor (0.9.0-1) unstable; urgency=medium
vitastor (0.9.2-1) unstable; urgency=medium
* Implement NFS proxy
* Add documentation

View File

@@ -1,4 +1,4 @@
# Build patched QEMU for Debian Buster or Bullseye/Sid inside a container
# Build patched QEMU for Debian inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
ARG REL=
@@ -15,17 +15,19 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep qemu
# To build a custom version
#RUN cp /root/packages/qemu-orig/* /root
RUN apt-get --download-only source qemu
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch patches/qemu-6.1-vitastor.patch src/qemu_driver.c /root/vitastor/patches/
ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
RUN set -e; \
apt-get install -y wget; \
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
@@ -37,23 +39,14 @@ RUN set -e; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \
dpkg-source -x /root/qemu*.dsc; \
if ls -d /root/packages/qemu-$REL/qemu-5.0*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-5.0*); \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch $D/debian/patches; \
echo qemu-5.0-vitastor.patch >> $D/debian/patches/series; \
elif ls /root/packages/qemu-$REL/qemu-6.1*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-6.1*); \
cp /root/vitastor/patches/qemu-6.1-vitastor.patch $D/debian/patches; \
echo qemu-6.1-vitastor.patch >> $D/debian/patches/series; \
else \
cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
echo qemu-5.1-vitastor.patch >> $P/series; \
fi; \
QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*(\d+\.\d+).*!$1!'); \
D=$(ls -d qemu*/); \
cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
cd /root/packages/qemu-$REL/qemu-*/; \
quilt push -a; \
quilt add block/vitastor.c; \
cp /root/vitastor/patches/qemu_driver.c block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \

View File

@@ -1,4 +1,4 @@
# Build Vitastor packages for Debian Buster or Bullseye/Sid inside a container
# Build Vitastor packages for Debian inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
ARG REL=
@@ -15,11 +15,12 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep fio
RUN apt-get --download-only source fio
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
@@ -34,8 +35,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.9.0; \
cd vitastor-0.9.0; \
cp -r /root/vitastor vitastor-0.9.2; \
cd vitastor-0.9.2; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -48,8 +49,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.0.orig.tar.xz vitastor-0.9.0; \
cd vitastor-0.9.0; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
cd vitastor-0.9.2; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -430,7 +430,7 @@ Flusher - это микро-поток (корутина), которая коп
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот

View File

@@ -474,7 +474,7 @@
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот

View File

@@ -11,7 +11,8 @@
- Trust Vitastor package signing key:
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Add Vitastor package repository to your /etc/apt/sources.list:
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- For Debian 10 (Buster) also enable backports repository:
`deb http://deb.debian.org/debian buster-backports main`

View File

@@ -11,7 +11,8 @@
- Добавьте ключ репозитория Vitastor:
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Для Debian 10 (Buster) также включите репозиторий backports:
`deb http://deb.debian.org/debian buster-backports main`

View File

@@ -391,6 +391,7 @@ class Mon
this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000;
this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree));
this.signals_set = false;
this.stat_time = Date.now();
this.ws = null;
this.ws_alive = false;
this.ws_keepalive_timer = null;
@@ -1410,65 +1411,75 @@ class Mon
}
}
derive_osd_stats(st, prev)
{
const zero_stats = { op: { bps: 0n, iops: 0n, lat: 0n }, subop: { iops: 0n, lat: 0n }, recovery: { bps: 0n, iops: 0n } };
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!st || !st.time || prev && (prev.time || this.stat_time/1000) >= st.time)
{
return diff;
}
const timediff = BigInt(st.time*1000 - (prev && prev.time*1000 || this.stat_time));
for (const op in st.op_stats||{})
{
const pr = prev && prev.op_stats && prev.op_stats[op];
let c = st.op_stats[op];
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
const pr = prev && prev.subop_stats && prev.subop_stats[op];
let c = st.subop_stats[op];
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
let c = st.recovery_stats[op];
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
return diff;
}
sum_op_stats(timestamp, prev_stats)
{
const op_stats = {}, subop_stats = {}, recovery_stats = {};
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!prev_stats || prev_stats.timestamp >= timestamp)
{
return sum_diff;
}
const tm = BigInt(timestamp - (prev_stats.timestamp || 0));
// Sum derived values instead of deriving summed
for (const osd in this.state.osd.stats)
{
const st = this.state.osd.stats[osd]||{};
for (const op in st.op_stats||{})
const derived = this.derive_osd_stats(this.state.osd.stats[osd],
this.prev_stats && this.prev_stats.osd_stats && this.prev_stats.osd_stats[osd]);
for (const type in derived)
{
op_stats[op] = op_stats[op] || { count: 0n, usec: 0n, bytes: 0n };
op_stats[op].count += BigInt(st.op_stats[op].count||0);
op_stats[op].usec += BigInt(st.op_stats[op].usec||0);
op_stats[op].bytes += BigInt(st.op_stats[op].bytes||0);
}
for (const op in st.subop_stats||{})
{
subop_stats[op] = subop_stats[op] || { count: 0n, usec: 0n };
subop_stats[op].count += BigInt(st.subop_stats[op].count||0);
subop_stats[op].usec += BigInt(st.subop_stats[op].usec||0);
}
for (const op in st.recovery_stats||{})
{
recovery_stats[op] = recovery_stats[op] || { count: 0n, bytes: 0n };
recovery_stats[op].count += BigInt(st.recovery_stats[op].count||0);
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
for (const op in derived[type])
{
for (const k in derived[type][op])
{
sum_diff[type][op] = sum_diff[type][op] || {};
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
}
}
}
}
if (prev_stats && prev_stats.timestamp >= timestamp)
{
prev_stats = null;
}
const tm = prev_stats ? BigInt(timestamp - prev_stats.timestamp) : 0;
for (const op in op_stats)
{
if (prev_stats && prev_stats.op_stats && prev_stats.op_stats[op])
{
op_stats[op].bps = (op_stats[op].bytes - prev_stats.op_stats[op].bytes) * 1000n / tm;
op_stats[op].iops = (op_stats[op].count - prev_stats.op_stats[op].count) * 1000n / tm;
op_stats[op].lat = (op_stats[op].usec - prev_stats.op_stats[op].usec)
/ ((op_stats[op].count - prev_stats.op_stats[op].count) || 1n);
}
}
for (const op in subop_stats)
{
if (prev_stats && prev_stats.subop_stats && prev_stats.subop_stats[op])
{
subop_stats[op].iops = (subop_stats[op].count - prev_stats.subop_stats[op].count) * 1000n / tm;
subop_stats[op].lat = (subop_stats[op].usec - prev_stats.subop_stats[op].usec)
/ ((subop_stats[op].count - prev_stats.subop_stats[op].count) || 1n);
}
}
for (const op in recovery_stats)
{
if (prev_stats && prev_stats.recovery_stats && prev_stats.recovery_stats[op])
{
recovery_stats[op].bps = (recovery_stats[op].bytes - prev_stats.recovery_stats[op].bytes) * 1000n / tm;
recovery_stats[op].iops = (recovery_stats[op].count - prev_stats.recovery_stats[op].count) * 1000n / tm;
}
}
return { op_stats, subop_stats, recovery_stats };
return sum_diff;
}
sum_object_counts()
@@ -1627,7 +1638,8 @@ class Mon
this.prev_stats ? this.prev_stats.inode_stats : null,
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
);
this.prev_stats = { timestamp, ...stats, inode_stats };
this.prev_stats = { timestamp, inode_stats, osd_stats: { ...this.state.osd.stats } };
this.stat_time = Date.now();
stats.object_counts = object_counts;
stats.object_bytes = object_bytes;
stats = this.serialize_bigints(stats);
@@ -1743,13 +1755,14 @@ class Mon
else if (key_parts[0] === 'osd' && key_parts[1] === 'stats')
{
// Recheck OSD tree on OSD addition/deletion
const osd_num = key_parts[2];
if ((!old) != (!kv.value) || old && kv.value && old.size != kv.value.size)
{
this.schedule_recheck();
}
// Recheck PGs <osd_out_time> after last OSD statistics report
this.schedule_next_recheck_at(
!this.state.osd.stats[key[2]] ? 0 : this.state.osd.stats[key[2]].time+this.config.osd_out_time
!this.state.osd.stats[osd_num] ? 0 : this.state.osd.stats[osd_num].time+this.config.osd_out_time
);
}
}

View File

@@ -388,8 +388,6 @@ sub unmap_volume
my ($class, $storeid, $scfg, $volname, $snapname) = @_;
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
return 1 if !$scfg->{vitastor_nbd};
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
$name .= '@'.$snapname if $snapname;
@@ -413,7 +411,7 @@ sub activate_volume
sub deactivate_volume
{
my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
$class->unmap_volume($storeid, $scfg, $volname, $snapname);
$class->unmap_volume($storeid, $scfg, $volname, $snapname) if $scfg->{vitastor_nbd};
return 1;
}

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '0.9.0'
VERSION = '0.9.2'
LOG = logging.getLogger(__name__)

View File

@@ -0,0 +1,644 @@
commit e6f935157944279c2c0634915c3c00feeec748c9
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
Date: Mon Jun 19 00:58:19 2023 +0300
Add Vitastor support
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
index aaad4a3..5f5daa8 100644
--- a/include/libvirt/libvirt-storage.h
+++ b/include/libvirt/libvirt-storage.h
@@ -326,6 +326,7 @@ typedef enum {
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
} virConnectListAllStoragePoolsFlags;
int virConnectListAllStoragePools(virConnectPtr conn,
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 45965fa..b7c23d3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7103,7 +7103,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
src->configFile = virXPathString("string(./config/@file)", ctxt);
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
src->query = virXMLPropString(node, "query");
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
@@ -30121,6 +30122,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
case VIR_STORAGE_POOL_MPATH:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 5a9bf20..05058b8 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -494,6 +494,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_RBD:
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
}
}
- /* internal snapshots and config files are currently supported only with rbd: */
+ /* internal snapshots are currently supported only with rbd: */
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
if (src->snapshot) {
@@ -550,11 +551,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
"only with 'rbd' disks"));
return -1;
}
-
+ }
+ /* config files are currently supported only with rbd and vitastor: */
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
if (src->configFile) {
virReportError(VIR_ERR_XML_ERROR, "%s",
_("<config> element is currently supported "
- "only with 'rbd' disks"));
+ "only with 'rbd' and 'vitastor' disks"));
return -1;
}
}
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index 6cb0a20..8bf7de9 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -1972,6 +1972,35 @@
</element>
</define>
+ <define name="diskSourceNetworkProtocolVitastor">
+ <element name="source">
+ <interleave>
+ <attribute name="protocol">
+ <value>vitastor</value>
+ </attribute>
+ <ref name="diskSourceCommon"/>
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <optional>
+ <attribute name="query"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="diskSourceNetworkHost"/>
+ </zeroOrMore>
+ <optional>
+ <element name="config">
+ <attribute name="file">
+ <ref name="absFilePath"/>
+ </attribute>
+ <empty/>
+ </element>
+ </optional>
+ <empty/>
+ </interleave>
+ </element>
+ </define>
+
<define name="diskSourceNetworkProtocolISCSI">
<element name="source">
<attribute name="protocol">
@@ -2264,6 +2293,7 @@
<ref name="diskSourceNetworkProtocolSimple"/>
<ref name="diskSourceNetworkProtocolVxHS"/>
<ref name="diskSourceNetworkProtocolNFS"/>
+ <ref name="diskSourceNetworkProtocolVitastor"/>
</choice>
</define>
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index f5a9636..8339bc4 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
"logical", "disk", "iscsi",
"iscsi-direct", "scsi", "mpath",
"rbd", "sheepdog", "gluster",
- "zfs", "vstorage",
+ "zfs", "vstorage", "vitastor",
);
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
.formatToString = virStorageFileFormatTypeToString,
}
},
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
+ .poolOptions = {
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
+ VIR_STORAGE_POOL_SOURCE_NAME),
+ },
+ .volOptions = {
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
+ .formatFromString = virStorageVolumeFormatFromString,
+ .formatToString = virStorageFileFormatTypeToString,
+ }
+ },
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
.poolOptions = {
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
@@ -542,6 +554,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
_("element 'name' is mandatory for RBD pool"));
return -1;
}
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("element 'name' is mandatory for Vitastor pool"));
+ return -1;
+ }
if (options->formatFromString) {
g_autofree char *format = NULL;
@@ -1132,6 +1149,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
* files, so they don't have a target */
if (def->type != VIR_STORAGE_POOL_RBD &&
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
def->type != VIR_STORAGE_POOL_GLUSTER &&
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index fc67957..720c07e 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -103,6 +103,7 @@ typedef enum {
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
VIR_STORAGE_POOL_ZFS, /* ZFS */
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
VIR_STORAGE_POOL_LAST,
} virStoragePoolType;
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index cecd7e8..d7b79a4 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -87,6 +87,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
"ssh",
"vxhs",
"nfs",
+ "vitastor",
);
@@ -1286,6 +1287,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
return 24007;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_RBD:
/* we don't provide a default for RBD */
return 0;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index 14a6825..eb4acac 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -128,6 +128,7 @@ typedef enum {
VIR_STORAGE_NET_PROTOCOL_SSH,
VIR_STORAGE_NET_PROTOCOL_VXHS,
VIR_STORAGE_NET_PROTOCOL_NFS,
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
VIR_STORAGE_NET_PROTOCOL_LAST
} virStorageNetProtocol;
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index e6c187e..035b423 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -1433,6 +1433,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
return 1;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_RBD:
case VIR_STORAGE_POOL_LAST:
@@ -1918,6 +1919,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index 8490034..ab2cdaa 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 17ac880..59711b5 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -970,6 +970,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
index 6919325..55ffc32 100644
--- a/src/libxl/xen_xl.c
+++ b/src/libxl/xen_xl.c
@@ -1445,6 +1445,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index e865aa1..40162af 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -604,6 +604,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
}
+static virJSONValue *
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
+{
+ virJSONValue *ret = NULL;
+ virStorageNetHostDef *host;
+ size_t i;
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+ g_autofree char *etcd = NULL;
+
+ for (i = 0; i < src->nhosts; i++) {
+ host = src->hosts + i;
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
+ return NULL;
+ }
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
+ }
+ if (src->nhosts > 0) {
+ etcd = virBufferContentAndReset(&buf);
+ }
+
+ if (virJSONValueObjectAdd(&ret,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
+
+ return ret;
+}
+
+
static virJSONValue *
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
{
@@ -917,6 +949,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
return NULL;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
+ return NULL;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
@@ -1860,6 +1898,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
case VIR_STORAGE_NET_PROTOCOL_SSH:
@@ -2242,6 +2281,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2eb5653..60ee82d 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -4958,7 +4958,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
if (src->query &&
(actualType != VIR_STORAGE_TYPE_NETWORK ||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("query is supported only with HTTP(S) protocols"));
return -1;
@@ -10129,6 +10130,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
break;
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index b841680..a6be771 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -373,6 +373,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
@@ -578,6 +579,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index d90c1c9..e853457 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1627,6 +1627,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
index e48ae72..2017ccc 100644
--- a/src/storage_file/storage_source_backingstore.c
+++ b/src/storage_file/storage_source_backingstore.c
@@ -284,6 +284,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
}
+static int
+virStorageSourceParseVitastorColonString(const char *colonstr,
+ virStorageSource *src)
+{
+ char *p, *e, *next;
+ g_autofree char *options = NULL;
+
+ /* optionally skip the "vitastor:" prefix if provided */
+ if (STRPREFIX(colonstr, "vitastor:"))
+ colonstr += strlen("vitastor:");
+
+ options = g_strdup(colonstr);
+
+ p = options;
+ while (*p) {
+ /* find : delimiter or end of string */
+ for (e = p; *e && *e != ':'; ++e) {
+ if (*e == '\\') {
+ e++;
+ if (*e == '\0')
+ break;
+ }
+ }
+ if (*e == '\0') {
+ next = e; /* last kv pair */
+ } else {
+ next = e + 1;
+ *e = '\0';
+ }
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
+ sep[1] == ';' ||
+ sep[1] == ' ')) {
+ *sep = '\0';
+ sep += 2;
+ break;
+ }
+ }
+
+ if (virStorageSourceRBDAddHost(src, h) < 0)
+ return -1;
+
+ h = sep;
+ }
+ }
+
+ p = next;
+ }
+
+ if (!src->path) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int
virStorageSourceParseNBDColonString(const char *nbdstr,
virStorageSource *src)
@@ -396,6 +465,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
@@ -984,6 +1058,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
return 0;
}
+static int
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
+ virJSONValue *json,
+ const char *jsonstr G_GNUC_UNUSED,
+ int opaque G_GNUC_UNUSED)
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;
+
+ src->type = VIR_STORAGE_TYPE_NETWORK;
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
+
+ /* legacy syntax passed via 'filename' option */
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
+ return virStorageSourceParseVitastorColonString(filename, src);
+
+ if (!image) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("missing image name in Vitastor backing volume "
+ "JSON specification"));
+ return -1;
+ }
+
+ src->path = g_strdup(image);
+ src->configFile = g_strdup(conf);
+ src->query = g_strdup(etcd_prefix);
+
+ if (servers) {
+ nservers = virJSONValueArraySize(servers);
+
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
+ src->nhosts = nservers;
+
+ for (i = 0; i < nservers; i++) {
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
+ virJSONValueArrayGet(servers, i)) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
virJSONValue *json,
@@ -1162,6 +1284,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index bd6f063..cce34e1 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -7338,6 +7338,7 @@ testStorageVolumeTypeForPool(int pooltype)
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
return VIR_STORAGE_VOL_NETWORK;
case VIR_STORAGE_POOL_LOGICAL:
case VIR_STORAGE_POOL_DISK:
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
index eee75af..8bd0a57 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='no'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
index 805950a..852df0d 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='yes'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
index e8e40d6..db55fe5 100644
--- a/tests/storagepoolxml2argvtest.c
+++ b/tests/storagepoolxml2argvtest.c
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_VSTORAGE:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_LAST:
default:
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
index 8a98c6a..4b1bbd4 100644
--- a/tools/virsh-pool.c
+++ b/tools/virsh-pool.c
@@ -1221,6 +1221,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
case VIR_STORAGE_POOL_VSTORAGE:
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
+ break;
case VIR_STORAGE_POOL_LAST:
break;
}

View File

@@ -24,4 +24,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.9.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.0$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.0.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.9.0
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.0.el7.tar.gz
Source0: vitastor-0.9.2.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.0.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.9.0
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.0.el8.tar.gz
Source0: vitastor-0.9.2.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.0.el9.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.9.0
Version: 0.9.2
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.0.el9.tar.gz
Source0: vitastor-0.9.2.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="0.9.0")
add_definitions(-DVERSION="0.9.2")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -56,6 +56,11 @@ if (ISAL_LIBRARIES)
add_definitions(-DWITH_ISAL)
endif (ISAL_LIBRARIES)
find_package(OpenSSL)
if (OPENSSL_FOUND)
add_definitions(-DWITH_OPENSSL)
endif (OPENSSL_FOUND)
add_custom_target(build_tests)
add_custom_target(test
COMMAND

View File

@@ -536,14 +536,27 @@ resume_1:
return false;
}
// zero out old metadata entry
{
clean_disk_entry *old_entry = (clean_disk_entry*)((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size);
if (old_entry->oid.inode != 0 && old_entry->oid != cur.oid)
{
printf("Fatal error (metadata corruption or bug): tried to wipe metadata entry %lu (%lx:%lx v%lu) as old location of %lx:%lx\n",
old_clean_loc >> bs->dsk.block_order, old_entry->oid.inode, old_entry->oid.stripe,
old_entry->version, cur.oid.inode, cur.oid.stripe);
exit(1);
}
}
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
await_sqe(15);
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_old.sector
);
wait_count++;
if (meta_old.sector != meta_new.sector)
{
await_sqe(15);
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_old.sector
);
wait_count++;
}
}
if (has_delete)
{
@@ -662,53 +675,58 @@ resume_1:
return false;
}
flusher->trimming = true;
// First update journal "superblock" and only then update <used_start> in memory
await_sqe(12);
*((journal_entry_start*)flusher->journal_superblock) = {
.crc32 = 0,
.magic = JOURNAL_MAGIC,
.type = JE_START,
.size = sizeof(journal_entry_start),
.reserved = 0,
.journal_start = new_trim_pos,
.version = JOURNAL_VERSION,
};
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
data->iov = (struct iovec){ flusher->journal_superblock, bs->dsk.journal_block_size };
data->callback = simple_callback_w;
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
wait_count++;
resume_13:
if (wait_count > 0)
// Recheck the position with the "lock" taken
new_trim_pos = bs->journal.get_trim_pos();
if (new_trim_pos != bs->journal.used_start)
{
wait_state = 13;
return false;
}
if (!bs->disable_journal_fsync)
{
await_sqe(20);
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
// First update journal "superblock" and only then update <used_start> in memory
await_sqe(12);
*((journal_entry_start*)flusher->journal_superblock) = {
.crc32 = 0,
.magic = JOURNAL_MAGIC,
.type = JE_START,
.size = sizeof(journal_entry_start),
.reserved = 0,
.journal_start = new_trim_pos,
.version = JOURNAL_VERSION,
};
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
data->iov = (struct iovec){ flusher->journal_superblock, bs->dsk.journal_block_size };
data->callback = simple_callback_w;
resume_21:
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
wait_count++;
resume_13:
if (wait_count > 0)
{
wait_state = 21;
wait_state = 13;
return false;
}
}
bs->journal.used_start = new_trim_pos;
if (!bs->disable_journal_fsync)
{
await_sqe(20);
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
data->callback = simple_callback_w;
resume_21:
if (wait_count > 0)
{
wait_state = 21;
return false;
}
}
bs->journal.used_start = new_trim_pos;
#ifdef BLOCKSTORE_DEBUG
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
#endif
if (bs->journal.flush_journal && !flusher->flush_queue.size())
{
assert(bs->journal.used_start == bs->journal.next_free);
printf("Journal flushed\n");
exit(0);
}
}
flusher->trimming = false;
}
if (bs->journal.flush_journal && !flusher->flush_queue.size())
{
assert(bs->journal.used_start == bs->journal.next_free);
printf("Journal flushed\n");
exit(0);
}
}
// All done
flusher->active_flushers--;

View File

@@ -236,14 +236,6 @@ journal_t::~journal_t()
uint64_t journal_t::get_trim_pos()
{
auto journal_used_it = used_sectors.lower_bound(used_start);
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->first,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
);
#endif
if (journal_used_it == used_sectors.end())
{
// Journal is cleared to its end, restart from the beginning
@@ -256,12 +248,26 @@ uint64_t journal_t::get_trim_pos()
else
{
// next_free does not need updating during trim
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it->first, journal_used_it->second
);
#endif
return journal_used_it->first;
}
}
else if (journal_used_it->first > used_start)
{
// Journal is cleared up to <journal_used_it>
#ifdef BLOCKSTORE_DEBUG
printf(
"Trimming journal (used_start=%08lx, next_free=%08lx, dirty_start=%08lx, new_start=%08lx, new_refcount=%ld)\n",
used_start, next_free, dirty_start,
journal_used_it->first, journal_used_it->second
);
#endif
return journal_used_it->first;
}
// Can't trim journal

View File

@@ -138,10 +138,6 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
{
dirty_entry& dirty = dirty_it->second;
bool version_ok = !IS_IN_FLIGHT(dirty.state) && read_op->version >= dirty_it->first.version;
if (IS_SYNCED(dirty.state))
{
version_ok = true;
}
if (version_ok)
{
if (IS_DELETE(dirty.state))

View File

@@ -179,7 +179,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
{
object_id oid = dirty_it->first.oid;
#ifdef BLOCKSTORE_DEBUG
printf("Unblock writes-after-delete %lx:%lx v%lx\n", oid.inode, oid.stripe, dirty_it->first.version);
printf("Unblock writes-after-delete %lx:%lx v%lu\n", oid.inode, oid.stripe, dirty_it->first.version);
#endif
dirty_it = dirty_end;
// Unblock operations blocked by delete flushing

View File

@@ -458,6 +458,16 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
big_to_flush++;
}
}
else if (IS_IN_FLIGHT(dirty_it->second.state))
{
// mark_stable should never be called for in-flight or submitted writes
printf(
"BUG: Attempt to mark_stable object %lx:%lx v%lu state of which is %x\n",
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
dirty_it->second.state
);
exit(1);
}
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
IS_DELETE(dirty_it->second.state)))
{

View File

@@ -409,7 +409,23 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
);
#endif
// Figure out where data will be
journal.next_free = (journal.next_free + op->len) <= journal.len ? journal.next_free : dsk.journal_block_size;
auto next_next_free = (journal.next_free + op->len) <= journal.len ? journal.next_free : dsk.journal_block_size;
if (op->len > 0)
{
auto journal_used_it = journal.used_sectors.lower_bound(next_next_free);
if (journal_used_it != journal.used_sectors.end() &&
journal_used_it->first < next_next_free + op->len)
{
printf(
"BUG: Attempt to overwrite used offset (%lx, %lu refs) of the journal with the object %lx:%lx v%lu: data at %lx, len %x!"
" Journal used_start=%08lx (%lu refs), next_free=%08lx, dirty_start=%08lx\n",
journal_used_it->first, journal_used_it->second, op->oid.inode, op->oid.stripe, op->version, next_next_free, op->len,
journal.used_start, journal.used_sectors[journal.used_start], journal.next_free, journal.dirty_start
);
exit(1);
}
}
journal.next_free = next_next_free;
je->oid = op->oid;
je->version = op->version;
je->offset = op->offset;

View File

@@ -12,7 +12,7 @@ static const char *obj_states[] = { "clean", "misplaced", "degraded", "incomplet
// Print cluster status:
// etcd, mon, osd states
// raw/used space, object states, pool states, pg states
// client io, recovery io, rebalance io
// client io, recovery io, rebalance io, scrub io
struct status_printer_t
{
cli_tool_t *parent;
@@ -252,18 +252,25 @@ resume_2:
}
more_states.resize(more_states.size()-2);
std::string recovery_io;
int io_indent = 0;
{
uint64_t deg_bps = agg_stats["recovery_stats"]["degraded"]["bps"].uint64_value();
uint64_t deg_iops = agg_stats["recovery_stats"]["degraded"]["iops"].uint64_value();
uint64_t misp_bps = agg_stats["recovery_stats"]["misplaced"]["bps"].uint64_value();
uint64_t misp_iops = agg_stats["recovery_stats"]["misplaced"]["iops"].uint64_value();
uint64_t scrub_bps = agg_stats["op_stats"]["scrub"]["bps"].uint64_value();
uint64_t scrub_iops = agg_stats["op_stats"]["scrub"]["iops"].uint64_value();
if (misp_iops > 0 || misp_bps > 0 || no_rebalance)
io_indent = 3;
else if (deg_iops > 0 || deg_bps > 0 || no_recovery)
io_indent = 2;
if (deg_iops > 0 || deg_bps > 0)
{
recovery_io += " recovery: "+std::string(no_recovery ? "disabled, " : "")+
recovery_io += " recovery: "+str_repeat(" ", io_indent-2)+std::string(no_recovery ? "disabled, " : "")+
format_size(deg_bps)+"/s, "+format_size(deg_iops, true)+" op/s\n";
}
else if (no_recovery)
recovery_io += " recovery: disabled\n";
recovery_io += " recovery: disabled\n";
if (misp_iops > 0 || misp_bps > 0)
{
recovery_io += " rebalance: "+std::string(no_rebalance ? "disabled, " : "")+
@@ -271,6 +278,13 @@ resume_2:
}
else if (no_rebalance)
recovery_io += " rebalance: disabled\n";
if (scrub_iops > 0 || scrub_bps > 0)
{
recovery_io += " scrub: "+str_repeat(" ", io_indent+1)+std::string(no_scrub ? "disabled, " : "")+
format_size(scrub_bps)+"/s, "+format_size(scrub_iops, true)+" op/s\n";
}
else if (no_scrub)
recovery_io += " scrub: "+str_repeat(" ", io_indent+1)+"disabled\n";
}
printf(
" cluster:\n"
@@ -298,7 +312,7 @@ resume_2:
pools_active, pool_count,
pgs_by_state_str.c_str(),
readonly ? " (read-only mode)" : "",
recovery_io.size() > 0 ? " " : "",
str_repeat(" ", io_indent).c_str(),
format_size(agg_stats["op_stats"]["primary_read"]["bps"].uint64_value()).c_str(),
format_size(agg_stats["op_stats"]["primary_read"]["iops"].uint64_value(), true).c_str(),
format_size(agg_stats["op_stats"]["primary_write"]["bps"].uint64_value()).c_str(),

View File

@@ -27,10 +27,19 @@ static void parse_http_headers(std::string & res, http_response_t *parsed);
struct http_co_t
{
#ifdef WITH_OPENSSL
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl_cli = NULL;
BIO *ssl_rbio = NULL;
BIO *ssl_wbio = NULL;
std::vector<uint8_t> encrypted_out;
#endif
timerfd_manager_t *tfd;
std::function<void(const http_response_t*)> response_callback;
int request_timeout = 0;
bool ssl = false;
std::string host;
std::string request;
std::string ws_outbox;
@@ -46,7 +55,7 @@ struct http_co_t
int timeout_id = -1;
int epoll_events = 0;
int sent = 0;
std::vector<char> rbuf;
std::vector<uint8_t> rbuf;
iovec read_iov, send_iov;
msghdr read_msg = { 0 }, send_msg = { 0 };
http_response_t parsed;
@@ -259,6 +268,12 @@ void http_response_t::parse_json_response(std::string & error, json11::Json & r)
http_co_t::~http_co_t()
{
#ifdef WITH_OPENSSL
if (ssl_cli)
{
SSL_free(ssl_cli);
}
#endif
close_connection();
}
@@ -275,6 +290,16 @@ void http_co_t::close_connection()
close(peer_fd);
peer_fd = -1;
}
#ifdef WITH_OPENSSL
if (ssl_ctx)
{
// Frees context, client and bios at once
SSL_free(ssl_ctx);
ssl_rbio = NULL;
ssl_wbio = NULL;
ssl_cli = NULL;
}
#endif
state = HTTP_CO_CLOSED;
connected_host = "";
response = "";
@@ -304,6 +329,27 @@ void http_co_t::start_connection()
}
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
epoll_events = 0;
#ifdef WITH_OPENSSL
// https://wiki.openssl.org/index.php/Hostname_validation
if (ssl)
{
if (!ssl_ctx)
ssl_ctx = SSL_CTX_new(TLS_method());
ssl_rbio = BIO_new(BIO_s_mem());
ssl_wbio = BIO_new(BIO_s_mem());
ssl_cli = SSL_new(ssl_ctx);
if (!ssl_ctx || !ssl_cli || !ssl_rbio || !ssl_wbio)
{
parsed = { .error = std::string("openssl initialization failed: ")+ERR_get_error(NULL) };
response_callback(&parsed);
response_callback = NULL;
stackout();
return;
}
SSL_set_connect_state(ssl_cli);
SSL_set_bio(ssl_cli, ssl_rbio, ssl_wbio);
}
#endif
// Finally call connect
int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
if (r < 0 && errno != EINPROGRESS)
@@ -432,11 +478,11 @@ void http_co_t::submit_read(bool check_timeout)
stackin();
int res;
again:
if (rbuf.size() != READ_BUFFER_SIZE)
if (rbuf.capacity()-rbuf.size() < READ_BUFFER_SIZE)
{
rbuf.resize(READ_BUFFER_SIZE);
rbuf.reserve(rbuf.size() + READ_BUFFER_SIZE);
}
read_iov = { .iov_base = rbuf.data(), .iov_len = READ_BUFFER_SIZE };
read_iov = { .iov_base = rbuf.data()+rbuf.size(), .iov_len = READ_BUFFER_SIZE };
read_msg.msg_iov = &read_iov;
read_msg.msg_iovlen = 1;
res = recvmsg(peer_fd, &read_msg, 0);
@@ -466,22 +512,177 @@ again:
else if (res <= 0)
{
// < 0 means error, 0 means EOF
epoll_events = epoll_events & ~EPOLLIN;
if (state == HTTP_CO_HEADERS_RECEIVED)
std::swap(parsed.body, response);
close_connection();
if (res < 0)
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
run_cb_and_clear();
on_read_error(res);
}
else
{
response += std::string(rbuf.data(), res);
if (ssl)
handle_ssl_read(rbuf);
else
response += std::string((char*)rbuf.data(), res);
rbuf.resize(0);
handle_read();
}
stackout();
}
void http_co_t::on_read_error(int res)
{
epoll_events = epoll_events & ~EPOLLIN;
if (state == HTTP_CO_HEADERS_RECEIVED)
std::swap(parsed.body, response);
close_connection();
if (res < 0)
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
run_cb_and_clear();
}
int http_co_t::do_ssl_handshake()
{
stackin();
int r;
while (1)
{
r = SSL_do_handshake(ssl_cli);
if (r == SSL_ERROR_WANT_WRITE)
{
r = ssl_encrypt();
if (r >= 0)
submit_send();
else
{
r = -r;
break;
}
}
else
{
if (r == SSL_ERROR_WANT_READ || r == SSL_ERROR_NONE)
{
// OK or wait until we have more incoming data
r = 0;
}
break;
}
}
stackout();
return r;
}
// Enqueue outbound encrypted TLS data
int http_co_t::ssl_encrypt()
{
stackin();
int queued = 0;
while (true)
{
if (encrypted_out.size() >= encrypted_out.capacity()/2)
encrypted_out.reserve(encrypted_out.size() < READ_BUFFER_SIZE ? encrypted_out.size() + READ_BUFFER_SIZE : 2*encrypted_out.size());
int r = BIO_read(ssl_wbio, encrypted_out.data()+encrypted_out.size(), encrypted_out.capacity()-encrypted_out.size());
if (r > 0)
{
queued += r;
encrypted_out.resize(encrypted_out.size()+r);
}
else
{
if (!BIO_should_retry(ssl_wbio))
queued = r;
break;
}
}
stackout();
return queued;
}
void http_co_t::handle_ssl_write()
{
stackin();
int r = 0;
while (sent < request.size())
{
if (!SSL_is_init_finished(ssl_cli))
{
if (do_ssl_handshake() != 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
break;
}
else
{
int n = SSL_write(ssl_cli, request.data()+sent, request.size()-sent);
if (n > 0)
sent += n;
else if (get_sslstatus(ssl_cli, n) == SSLSTATUS_FAIL)
{
on_read_error(-EIO);
break;
}
else
break;
}
r = ssl_encrypt();
if (r >= 0)
submit_send();
else
{
on_read_error(-EIO);
break;
}
}
stackout();
}
// Process incoming encrypted TLS data
void http_co_t::handle_ssl_read()
{
stackin();
int size = rbuf.size();
int done = 0;
while (done < size)
{
int n = BIO_write(ssl_rbio, rbuf.data()+done, size-done);
if (n > 0)
{
done += n;
}
if (n <= 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
{
if (do_ssl_handshake() != 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
break;
}
do
{
if (response.capacity() - response.size() < READ_BUFFER_SIZE)
response.reserve(2*response.size() < response.size() + READ_BUFFER_SIZE ? response.size() + READ_BUFFER_SIZE : 2*response.size());
n = SSL_read(ssl_cli, response.data() + response.size(), READ_BUFFER_SIZE);
if (n <= 0)
{
n = SSL_get_error(ssl_cli, n);
if (n == SSL_ERROR_WANT_READ)
break;
}
} while (n > 0);
}
if (done < size)
memmove(rbuf.data(), rbuf.data()+done, size-done);
rbuf.resize(size-done);
stackout();
}
bool http_co_t::handle_read()
{
stackin();

View File

@@ -251,6 +251,10 @@ void osd_messenger_t::try_connect_peer_addr(osd_num_t peer_osd, const char *peer
return;
}
clients[peer_fd] = new osd_client_t();
if (log_level > 0)
{
fprintf(stderr, "Connecting to OSD %lu at %s:%d (client %d)\n", peer_osd, peer_host, peer_port, peer_fd);
}
clients[peer_fd]->peer_addr = addr;
clients[peer_fd]->peer_port = peer_port;
clients[peer_fd]->peer_fd = peer_fd;
@@ -313,7 +317,10 @@ void osd_messenger_t::handle_peer_epoll(int peer_fd, int epoll_events)
if (epoll_events & EPOLLRDHUP)
{
// Stop client
fprintf(stderr, "[OSD %lu] client %d disconnected\n", this->osd_num, peer_fd);
if (log_level > 0)
{
fprintf(stderr, "[OSD %lu] client %d disconnected\n", this->osd_num, peer_fd);
}
stop_client(peer_fd, true);
}
else if (epoll_events & EPOLLIN)

View File

@@ -50,7 +50,7 @@ struct osd_client_t
sockaddr_storage peer_addr;
int peer_port;
int peer_fd;
int peer_fd = -1;
int peer_state;
int connect_timeout_id = -1;
int ping_time_remaining = 0;
@@ -87,11 +87,7 @@ struct osd_client_t
std::vector<iovec> send_list, next_send_list;
std::vector<msgr_sendp_t> outbox, next_outbox;
~osd_client_t()
{
free(in_buf);
in_buf = NULL;
}
~osd_client_t();
};
struct osd_wanted_peer_t
@@ -177,6 +173,8 @@ public:
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg);
#endif
void measure_exec(osd_op_t *cur_op);
protected:
void try_connect_peer(uint64_t osd_num);
void try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port);
@@ -188,7 +186,6 @@ protected:
void cancel_op(osd_op_t *op);
bool try_send(osd_client_t *cl);
void measure_exec(osd_op_t *cur_op);
void handle_send(int result, osd_client_t *cl);
bool handle_read(int result, osd_client_t *cl);

View File

@@ -150,8 +150,10 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
);
if (cur_op->req.hdr.opcode == OSD_OP_READ ||
cur_op->req.hdr.opcode == OSD_OP_WRITE)
cur_op->req.hdr.opcode == OSD_OP_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_SCRUB)
{
// req.rw.len is internally set to the full object size for scrubs
stats.op_stat_bytes[cur_op->req.hdr.opcode] += cur_op->req.rw.len;
}
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ ||

View File

@@ -122,17 +122,6 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
// Cancel outbound operations
cancel_osd_ops(cl);
}
#ifndef __MOCK__
// And close the FD only when everything is done
// ...because peer_fd number can get reused after close()
close(peer_fd);
#ifdef WITH_RDMA
if (cl->rdma_conn)
{
delete cl->rdma_conn;
}
#endif
#endif
// Find the item again because it can be invalidated at this point
it = clients.find(peer_fd);
if (it != clients.end())
@@ -145,3 +134,25 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
delete cl;
}
}
osd_client_t::~osd_client_t()
{
free(in_buf);
in_buf = NULL;
if (peer_fd >= 0)
{
// Close the FD only when the client is actually destroyed
// Which only happens when all references are cleared
close(peer_fd);
peer_fd = -1;
}
#ifndef __MOCK__
#ifdef WITH_RDMA
if (rdma_conn)
{
delete rdma_conn;
rdma_conn = NULL;
}
#endif
#endif
}

View File

@@ -305,7 +305,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
};
if (log_level > 2)
{
printf("Submitting recovery operation for %lx:%lx\n", op->oid.inode, op->oid.stripe);
printf("Submitting recovery operation for %lx:%lx (%s)\n", op->oid.inode, op->oid.stripe, op->degraded ? "degraded" : "misplaced");
}
op->osd_op->peer_fd = -1;
op->osd_op->callback = [this, op](osd_op_t *osd_op)

View File

@@ -240,21 +240,20 @@ void osd_t::start_pg_peering(pg_t & pg)
// (PG history is kept up to the latest active+clean state)
for (auto & history_set: pg.target_history)
{
bool found = true;
int nonzero = 0, found = 0;
for (auto history_osd: history_set)
{
if (history_osd != 0)
{
found = false;
nonzero++;
if (history_osd == this->osd_num ||
msgr.osd_peer_fds.find(history_osd) != msgr.osd_peer_fds.end())
{
found = true;
break;
found++;
}
}
}
if (!found)
if (found < (nonzero >= pg.pg_data_size ? pg.pg_data_size : 1))
{
pg.state = PG_INCOMPLETE;
report_pg_state(pg);
@@ -486,6 +485,10 @@ void osd_t::report_pg_state(pg_t & pg)
{
pg.print_state();
this->pg_state_dirty.insert({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
if (pg.state & PG_ACTIVE)
{
plan_scrub(pg, false);
}
if (pg.state == PG_ACTIVE && (pg.target_history.size() > 0 || pg.all_peers.size() > pg.target_set.size()))
{
// Clear history of active+clean PGs
@@ -494,7 +497,6 @@ void osd_t::report_pg_state(pg_t & pg)
pg.all_peers = pg.target_set;
std::sort(pg.all_peers.begin(), pg.all_peers.end());
pg.cur_peers = pg.target_set;
plan_scrub(pg, false);
// Change pg_config at the same time, otherwise our PG reconciling loop may try to apply the old metadata
auto & pg_cfg = st_cli.pool_config[pg.pool_id].pg_config[pg.pg_num];
pg_cfg.target_history = pg.target_history;
@@ -538,7 +540,6 @@ void osd_t::report_pg_state(pg_t & pg)
pg.cur_peers.push_back(pg_osd);
}
}
plan_scrub(pg, false);
auto & pg_cfg = st_cli.pool_config[pg.pool_id].pg_config[pg.pg_num];
pg_cfg.target_history = pg.target_history;
pg_cfg.all_peers = pg.all_peers;

View File

@@ -255,7 +255,7 @@ void pg_obj_state_check_t::finish_object()
}
else if (n_mismatched > 0)
{
if (log_level > 2 && (replicated || n_roles >= pg->pg_cursize))
if (log_level > 2)
{
printf("Object is misplaced: %lx:%lx version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
}

View File

@@ -87,6 +87,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
cur_op->reply.hdr.retval = retval;
if (cur_op->peer_fd == -1)
{
msgr.measure_exec(cur_op);
// Copy lambda to be unaffected by `delete op`
std::function<void(osd_op_t*)>(cur_op->callback)(cur_op);
}
@@ -357,7 +358,7 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
#ifdef OSD_DEBUG
uint64_t peer_osd = msgr.clients.find(subop->peer_fd) != msgr.clients.end()
? msgr.clients[subop->peer_fd]->osd_num : osd_num;
printf("subop %lu from osd %lu: version = %lu\n", opcode, peer_osd, version);
printf("subop %s %lx:%lx from osd %lu: version = %lu\n", osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version);
#endif
if (op_data->fact_ver != UINT64_MAX)
{

View File

@@ -115,6 +115,10 @@ int osd_t::pick_next_scrub(object_id & next_oid)
}
return 0;
}
if (scrub_list_op)
{
return 1;
}
timespec tv_now;
clock_gettime(CLOCK_REALTIME, &tv_now);
bool rescan = scrub_last_pg.pool_id != 0 || scrub_last_pg.pg_num != 0;
@@ -127,7 +131,7 @@ int osd_t::pick_next_scrub(object_id & next_oid)
}
while (pg_it != pgs.end())
{
if ((pg_it->second.state & PG_ACTIVE) && pg_it->second.next_scrub && pg_it->second.next_scrub < tv_now.tv_sec)
if ((pg_it->second.state & PG_ACTIVE) && pg_it->second.next_scrub && pg_it->second.next_scrub <= tv_now.tv_sec)
{
// Continue scrubbing from the next object
if (scrub_last_pg == pg_it->first)
@@ -346,7 +350,7 @@ void osd_t::schedule_scrub(pg_t & pg)
tfd->clear_timer(scrub_timer_id);
scrub_timer_id = -1;
}
if (tv_now.tv_sec > scrub_nearest_ts)
if (tv_now.tv_sec >= scrub_nearest_ts)
{
scrub_nearest_ts = 0;
peering_state = peering_state | OSD_SCRUBBING;
@@ -376,6 +380,7 @@ void osd_t::continue_primary_scrub(osd_op_t *cur_op)
goto resume_2;
{
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
cur_op->req.rw.len = bs_block_size * pg.pg_data_size;
// Determine version
auto vo_it = pg.ver_override.find(op_data->oid);
op_data->target_ver = vo_it != pg.ver_override.end() ? vo_it->second : UINT64_MAX;

View File

@@ -300,3 +300,11 @@ std::string read_all_fd(int fd)
res.resize(res_size);
return res;
}
std::string str_repeat(const std::string & str, int times)
{
std::string r;
for (int i = 0; i < times; i++)
r += str;
return r;
}

View File

@@ -17,3 +17,4 @@ std::string format_size(uint64_t size, bool nobytes = false);
void print_help(const char *help_text, std::string exe_name, std::string cmd, bool all);
uint64_t parse_time(std::string time_str, bool *ok = NULL);
std::string read_all_fd(int fd);
std::string str_repeat(const std::string & str, int times);

View File

@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor
Description: Vitastor client library
Version: 0.9.0
Version: 0.9.2
Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir}

View File

@@ -85,7 +85,7 @@ try_change 16
# Monitor should report non-zero overall statistics at least once
if ! (grep /vitastor/stats ./testdata/mon.log | jq -s -e '[ .[] | select((.kv.value.op_stats.primary_write.count | tonumber) > 0) ] | length > 0'); then
if ! (grep /vitastor/stats ./testdata/mon.log | jq -s -e '[ .[] | select((.kv.value.op_stats.primary_write.count // 0 | tonumber) > 0) ] | length > 0'); then
format_error "FAILED: monitor doesn't aggregate stats"
fi

View File

@@ -46,8 +46,8 @@ kill_osds()
kill_osds &
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=16 -fsync=256 -rw=randwrite \
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bsrange=4k-128k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \