Compare commits

...

8 Commits

Author SHA1 Message Date
ab90ed747f Release 0.5.6
- Fix operation statistics
- Fix a rebalance hang introduced in 0.5.5
- Test PG count changes with actual data moving
- Fix a possible 'unexpected pg state: 0' error during PG count change
2021-03-01 16:26:04 +03:00
29d8ac8b1b Do not report statistics for the empty operation 2021-03-01 16:20:57 +03:00
97795ea1b1 Use pg_minsize=2 in the pg_count change test
Also don't check for has_degraded because it's not a bug that objects
are _temporarily_ listed as degraded during PG peering as it's not
required for the new primary to connect to _all_ older peers to start
peering. The test may be improved in the future by temporarily disabling
degraded recovery during it and returning the has_degraded check back.
2021-03-01 16:18:08 +03:00
24e7075f08 Fix monitor's statistics aggregation 2021-02-28 19:51:16 +03:00
6155b23a7e Replace pgs[id] with pgs.at(id) to prevent accidental auto-vivification 2021-02-28 19:36:59 +03:00
7d49706c07 Improve the pg_count change test: add more OSDs and actually move data between them 2021-02-28 19:36:59 +03:00
46e79f3306 Wait for PGs to become clean before stopping them 2021-02-28 19:36:59 +03:00
41fd14e024 Fix deletes not increasing write_iodepth 2021-02-28 19:36:59 +03:00
15 changed files with 74 additions and 40 deletions

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (0.5.5-1) unstable; urgency=medium
vitastor (0.5.6-1) unstable; urgency=medium
* Bugfixes

View File

@@ -40,10 +40,10 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.5.5; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.5.5/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.5.5/fio; \
cd vitastor-0.5.5; \
cp -r /root/vitastor vitastor-0.5.6; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.5.6/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.5.6/fio; \
cd vitastor-0.5.6; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
QEMU=$(head -n1 qemu/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
sh copy-qemu-includes.sh; \
@@ -59,8 +59,8 @@ RUN set -e -x; \
echo "dep:fio=$FIO" > debian/substvars; \
echo "dep:qemu=$QEMU" >> debian/substvars; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.5.5.orig.tar.xz vitastor-0.5.5; \
cd vitastor-0.5.5; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.5.6.orig.tar.xz vitastor-0.5.6; \
cd vitastor-0.5.6; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -379,7 +379,7 @@ class Mon
}
if (this.verbose)
{
console.log(e);
console.log(JSON.stringify(e));
}
}
if (stats_changed)
@@ -974,21 +974,21 @@ class Mon
for (const op in st.op_stats||{})
{
op_stats[op] = op_stats[op] || { count: 0n, usec: 0n, bytes: 0n };
op_stats[op].count += BigInt(st.op_stats.count||0);
op_stats[op].usec += BigInt(st.op_stats.usec||0);
op_stats[op].bytes += BigInt(st.op_stats.bytes||0);
op_stats[op].count += BigInt(st.op_stats[op].count||0);
op_stats[op].usec += BigInt(st.op_stats[op].usec||0);
op_stats[op].bytes += BigInt(st.op_stats[op].bytes||0);
}
for (const op in st.subop_stats||{})
{
subop_stats[op] = subop_stats[op] || { count: 0n, usec: 0n };
subop_stats[op].count += BigInt(st.subop_stats.count||0);
subop_stats[op].usec += BigInt(st.subop_stats.usec||0);
subop_stats[op].count += BigInt(st.subop_stats[op].count||0);
subop_stats[op].usec += BigInt(st.subop_stats[op].usec||0);
}
for (const op in st.recovery_stats||{})
{
recovery_stats[op] = recovery_stats[op] || { count: 0n, bytes: 0n };
recovery_stats[op].count += BigInt(st.recovery_stats.count||0);
recovery_stats[op].bytes += BigInt(st.recovery_stats.bytes||0);
recovery_stats[op].count += BigInt(st.recovery_stats[op].count||0);
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
}
}
for (const op in op_stats)

View File

@@ -48,4 +48,4 @@ FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Ve
QEMU=`rpm -qi qemu qemu-kvm | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
perl -i -pe 's/(Requires:\s*qemu(?:-kvm)?)([^\n]+)?/$1 = '$QEMU'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.5.5/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.5.5$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.5.6/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.5.6$(rpm --eval '%dist').tar.gz *

View File

@@ -37,7 +37,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.5.5.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.5.6.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.5.5
Version: 0.5.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.5.5.el7.tar.gz
Source0: vitastor-0.5.6.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.5.5.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.5.6.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.5.5
Version: 0.5.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.5.5.el8.tar.gz
Source0: vitastor-0.5.6.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -510,6 +510,7 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
{
return 0;
}
write_iodepth++;
io_uring_sqe *sqe = NULL;
if (immediate_commit != IMMEDIATE_NONE ||
(journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&

View File

@@ -142,7 +142,7 @@ json11::Json osd_t::get_statistics()
}
st["host"] = self_state["host"];
json11::Json::object op_stats, subop_stats;
for (int i = 0; i <= OSD_OP_MAX; i++)
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
{
op_stats[osd_op_names[i]] = json11::Json::object {
{ "count", c_cli.stats.op_stat_count[i] },
@@ -150,7 +150,7 @@ json11::Json osd_t::get_statistics()
{ "bytes", c_cli.stats.op_stat_bytes[i] },
};
}
for (int i = 0; i <= OSD_OP_MAX; i++)
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
{
subop_stats[osd_op_names[i]] = json11::Json::object {
{ "count", c_cli.stats.subop_stat_count[i] },
@@ -593,7 +593,10 @@ void osd_t::apply_pg_config()
}
else
{
throw std::runtime_error("Unexpected PG "+std::to_string(pg_num)+" state: "+std::to_string(pg_it->second.state));
throw std::runtime_error(
"Unexpected PG "+std::to_string(pool_id)+"/"+std::to_string(pg_num)+
" state: "+std::to_string(pg_it->second.state)
);
}
}
auto & pg = this->pgs[{ .pool_id = pool_id, .pg_num = pg_num }];

View File

@@ -95,7 +95,7 @@ void osd_t::handle_flush_op(bool rollback, pool_id_t pool_id, pg_num_t pg_num, p
{
// This flush batch is done
std::vector<osd_op_t*> continue_ops;
auto & pg = pgs[pg_id];
auto & pg = pgs.at(pg_id);
auto it = pg.flush_actions.begin(), prev_it = it;
auto erase_start = it;
while (1)

View File

@@ -473,7 +473,9 @@ bool osd_t::stop_pg(pg_t & pg)
return false;
}
pg.state = pg.state & ~PG_ACTIVE | PG_STOPPING;
if (pg.inflight == 0 && !pg.flush_batch)
if (pg.inflight == 0 && !pg.flush_batch &&
// We must either forget all PG's unstable writes or wait for it to become clean
dirty_pgs.find({ .pool_id = pg.pool_id, .pg_num = pg.pg_num }) == dirty_pgs.end())
{
finish_stop_pg(pg);
}

View File

@@ -103,7 +103,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
if (op_data->st == 1) goto resume_1;
else if (op_data->st == 2) goto resume_2;
{
auto & pg = pgs[{ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num }];
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
for (int role = 0; role < op_data->pg_data_size; role++)
{
op_data->stripes[role].read_start = op_data->stripes[role].req_start;
@@ -211,7 +211,7 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
return;
}
osd_primary_op_data_t *op_data = cur_op->op_data;
auto & pg = pgs[{ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num }];
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
if (op_data->st == 1) goto resume_1;
else if (op_data->st == 2) goto resume_2;
else if (op_data->st == 3) goto resume_3;
@@ -582,7 +582,7 @@ resume_2:
int dpg = 0;
for (auto dirty_pg_num: dirty_pgs)
{
pgs[dirty_pg_num].inflight++;
pgs.at(dirty_pg_num).inflight++;
op_data->dirty_pgs[dpg++] = dirty_pg_num;
}
dirty_pgs.clear();
@@ -639,7 +639,7 @@ resume_6:
.pool_id = INODE_POOL(w.oid.inode),
.pg_num = map_to_pg(w.oid, st_cli.pool_config.at(INODE_POOL(w.oid.inode)).pg_stripe_size),
};
if (pgs[wpg].state & PG_ACTIVE)
if (pgs.at(wpg).state & PG_ACTIVE)
{
uint64_t & dest = this->unstable_writes[(osd_object_id_t){
.osd_num = unstable_osd.osd_num,
@@ -750,7 +750,7 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
return;
}
osd_primary_op_data_t *op_data = cur_op->op_data;
auto & pg = pgs[{ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num }];
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
if (op_data->st == 1) goto resume_1;
else if (op_data->st == 2) goto resume_2;
else if (op_data->st == 3) goto resume_3;

View File

@@ -40,7 +40,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
{
if (cur_op->op_data->pg_num > 0)
{
auto & pg = pgs[{ .pool_id = INODE_POOL(cur_op->op_data->oid.inode), .pg_num = cur_op->op_data->pg_num }];
auto & pg = pgs.at({ .pool_id = INODE_POOL(cur_op->op_data->oid.inode), .pg_num = cur_op->op_data->pg_num });
pg.inflight--;
assert(pg.inflight >= 0);
if ((pg.state & PG_STOPPING) && pg.inflight == 0 && !pg.flush_batch)

View File

@@ -5,6 +5,9 @@
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd4.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd5.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd6.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
@@ -12,18 +15,26 @@ build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) &>./testdata/osd4.log &
OSD4_PID=$!
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) &>./testdata/osd5.log &
OSD5_PID=$!
build/src/vitastor-osd --osd_num 6 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd6.bin 2>/dev/null) &>./testdata/osd6.log &
OSD6_PID=$!
cd mon
npm install
cd ..
node mon/mon-main.js --etcd_url http://$ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
node mon/mon-main.js --etcd_url http://$ETCD_URL --etcd_prefix "/vitastor" --verbose 1 &>./testdata/mon.log &
MON_PID=$!
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":3,"pg_minsize":2,"pg_count":16,"failure_domain":"osd"}}'
$ETCDCTL put /vitastor/config/global '{"immediate_commit":"all"}'
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":2,"pg_count":16,"failure_domain":"osd"}}'
sleep 2
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | sort) == ["1","2","3"]) | length) == 16'); then
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == 16'); then
format_error "FAILED: 16 PGS NOT CONFIGURED"
fi
@@ -31,20 +42,31 @@ if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '(
format_error "FAILED: 16 PGS NOT UP"
fi
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=128M -cluster_log_level=10
try_change()
{
n=$1
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":3,"pg_minsize":2,"pg_count":'$n',"failure_domain":"osd"}}'
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":2,"pg_count":'$n',"failure_domain":"osd"}}'
for i in {1..10}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | sort) == ["1","2","3"]) | length) == '$n) && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | sort) == ["1","2","3"]) | length) == '$n); then
# Wait for the rebalance to finish
for i in {1..60}; do
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: $n PGS NOT CONFIGURED"
@@ -77,4 +99,10 @@ try_change 17
try_change 16
# Monitor should report non-zero overall statistics at least once
if ! (grep /vitastor/stats ./testdata/mon.log | jq -s -e '[ .[] | select((.kv.value.op_stats.primary_write.count | tonumber) > 0) ] | length > 0'); then
format_error "FAILED: monitor doesn't aggregate stats"
fi
format_green OK