Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
c777a0041a | |||
2947ea93e8 | |||
978bdc128a | |||
bb2f395f1e | |||
b127da40f7 | |||
ca34a6047a | |||
38ba76e893 | |||
1e3c4edea0 | |||
e7ac855b07 | |||
c53357ac45 |
@@ -395,7 +395,7 @@ jobs:
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
timeout-minutes: 6
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
|
@@ -39,6 +39,10 @@ for my $line (<>)
|
||||
$test_name .= '_'.lc($1).'_'.$2;
|
||||
}
|
||||
}
|
||||
if ($test_name eq 'test_snapshot_chain_ec')
|
||||
{
|
||||
$timeout = 6;
|
||||
}
|
||||
$line =~ s!\./test_!/root/vitastor/tests/test_!;
|
||||
# Gitea CI doesn't support artifacts yet, lol
|
||||
#- name: Upload results
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "1.4.3")
|
||||
set(VERSION "1.4.4")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v1.4.3
|
||||
VERSION ?= v1.4.4
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v1.4.3
|
||||
image: vitalif/vitastor-csi:v1.4.4
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v1.4.3
|
||||
image: vitalif/vitastor-csi:v1.4.4
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "1.4.3"
|
||||
vitastorCSIDriverVersion = "1.4.4"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (1.4.3-1) unstable; urgency=medium
|
||||
vitastor (1.4.4-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -35,8 +35,8 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-1.4.3; \
|
||||
cd vitastor-1.4.3; \
|
||||
cp -r /root/vitastor vitastor-1.4.4; \
|
||||
cd vitastor-1.4.4; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -49,8 +49,8 @@ RUN set -e -x; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.3.orig.tar.xz vitastor-1.4.3; \
|
||||
cd vitastor-1.4.3; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.4.orig.tar.xz vitastor-1.4.4; \
|
||||
cd vitastor-1.4.4; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -19,8 +19,8 @@ These parameters only apply to Monitors.
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Type: seconds
|
||||
- Default: 30
|
||||
- Minimum: 10
|
||||
- Default: 1
|
||||
- Minimum: 5
|
||||
|
||||
Monitor etcd lease refresh interval in seconds
|
||||
|
||||
|
@@ -19,8 +19,8 @@
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Тип: секунды
|
||||
- Значение по умолчанию: 30
|
||||
- Минимальное значение: 10
|
||||
- Значение по умолчанию: 1
|
||||
- Минимальное значение: 5
|
||||
|
||||
Интервал обновления etcd резервации (lease) монитором
|
||||
|
||||
|
@@ -215,8 +215,8 @@ is scheduled.
|
||||
## up_wait_retry_interval
|
||||
|
||||
- Type: milliseconds
|
||||
- Default: 500
|
||||
- Minimum: 50
|
||||
- Default: 50
|
||||
- Minimum: 10
|
||||
- Can be changed online: yes
|
||||
|
||||
OSDs respond to clients with a special error code when they receive I/O
|
||||
|
@@ -224,8 +224,8 @@ OSD в любом случае согласовывают реальное зн
|
||||
## up_wait_retry_interval
|
||||
|
||||
- Тип: миллисекунды
|
||||
- Значение по умолчанию: 500
|
||||
- Минимальное значение: 50
|
||||
- Значение по умолчанию: 50
|
||||
- Минимальное значение: 10
|
||||
- Можно менять на лету: да
|
||||
|
||||
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
|
||||
|
@@ -59,6 +59,7 @@ them, even without restarting by updating configuration in etcd.
|
||||
- [recovery_tune_client_util_high](#recovery_tune_client_util_high)
|
||||
- [recovery_tune_agg_interval](#recovery_tune_agg_interval)
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
@@ -604,5 +605,14 @@ is usually fine.
|
||||
- Default: 10
|
||||
- Can be changed online: yes
|
||||
|
||||
Minimum possible value for auto-tuned recovery_sleep_us. Values lower
|
||||
than this value are changed to 0.
|
||||
Minimum possible value for auto-tuned recovery_sleep_us. Lower values
|
||||
are changed to 0.
|
||||
|
||||
## recovery_tune_sleep_cutoff_us
|
||||
|
||||
- Type: microseconds
|
||||
- Default: 10000000
|
||||
- Can be changed online: yes
|
||||
|
||||
Maximum possible value for auto-tuned recovery_sleep_us. Higher values
|
||||
are treated as outliers and ignored in aggregation.
|
||||
|
@@ -60,6 +60,7 @@
|
||||
- [recovery_tune_client_util_high](#recovery_tune_client_util_high)
|
||||
- [recovery_tune_agg_interval](#recovery_tune_agg_interval)
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
@@ -634,4 +635,14 @@ EC (кодов коррекции ошибок) с более, чем 1 диск
|
||||
- Можно менять на лету: да
|
||||
|
||||
Минимальное возможное значение авто-подстроенного recovery_sleep_us.
|
||||
Значения ниже данного заменяются на 0.
|
||||
Меньшие значения заменяются на 0.
|
||||
|
||||
## recovery_tune_sleep_cutoff_us
|
||||
|
||||
- Тип: микросекунды
|
||||
- Значение по умолчанию: 10000000
|
||||
- Можно менять на лету: да
|
||||
|
||||
Максимальное возможное значение авто-подстроенного recovery_sleep_us.
|
||||
Большие значения считаются случайными выбросами и игнорируются в
|
||||
усреднении.
|
||||
|
@@ -731,8 +731,19 @@
|
||||
default: 10
|
||||
online: true
|
||||
info: |
|
||||
Minimum possible value for auto-tuned recovery_sleep_us. Values lower
|
||||
than this value are changed to 0.
|
||||
Minimum possible value for auto-tuned recovery_sleep_us. Lower values
|
||||
are changed to 0.
|
||||
info_ru: |
|
||||
Минимальное возможное значение авто-подстроенного recovery_sleep_us.
|
||||
Значения ниже данного заменяются на 0.
|
||||
Меньшие значения заменяются на 0.
|
||||
- name: recovery_tune_sleep_cutoff_us
|
||||
type: us
|
||||
default: 10000000
|
||||
online: true
|
||||
info: |
|
||||
Maximum possible value for auto-tuned recovery_sleep_us. Higher values
|
||||
are treated as outliers and ignored in aggregation.
|
||||
info_ru: |
|
||||
Максимальное возможное значение авто-подстроенного recovery_sleep_us.
|
||||
Большие значения считаются случайными выбросами и игнорируются в
|
||||
усреднении.
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "1.4.3",
|
||||
"version": "1.4.4",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '1.4.3'
|
||||
VERSION = '1.4.4'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@@ -24,4 +24,4 @@ rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-1.4.3/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.3$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-1.4.4/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.4$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -36,7 +36,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.4.3.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.4.4.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.4.3
|
||||
Version: 1.4.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.4.3.el7.tar.gz
|
||||
Source0: vitastor-1.4.4.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.4.3.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.4.4.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.4.3
|
||||
Version: 1.4.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.4.3.el8.tar.gz
|
||||
Source0: vitastor-1.4.4.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -18,7 +18,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.4.3.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.4.4.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.4.3
|
||||
Version: 1.4.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.4.3.el9.tar.gz
|
||||
Source0: vitastor-1.4.4.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="1.4.3")
|
||||
add_definitions(-DVERSION="1.4.4")
|
||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
|
@@ -184,8 +184,7 @@ void journal_flusher_t::mark_trim_possible()
|
||||
if (trim_wanted > 0)
|
||||
{
|
||||
dequeuing = true;
|
||||
if (!journal_trim_counter)
|
||||
journal_trim_counter = journal_trim_interval;
|
||||
journal_trim_counter = 0;
|
||||
bs->ringloop->wakeup();
|
||||
}
|
||||
}
|
||||
@@ -366,7 +365,7 @@ resume_0:
|
||||
!flusher->flush_queue.size() || !flusher->dequeuing)
|
||||
{
|
||||
stop_flusher:
|
||||
if (flusher->trim_wanted > 0 && flusher->journal_trim_counter > 0)
|
||||
if (flusher->trim_wanted > 0 && !flusher->journal_trim_counter)
|
||||
{
|
||||
// Attempt forced trim
|
||||
flusher->active_flushers++;
|
||||
@@ -1346,7 +1345,6 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
else if (wait_state == wait_base+2) goto resume_2;
|
||||
else if (wait_state == wait_base+3) goto resume_3;
|
||||
else if (wait_state == wait_base+4) goto resume_4;
|
||||
flusher->journal_trim_counter = 0;
|
||||
new_trim_pos = bs->journal.get_trim_pos();
|
||||
if (new_trim_pos != bs->journal.used_start)
|
||||
{
|
||||
@@ -1419,6 +1417,7 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
flusher->journal_trim_counter = 0;
|
||||
flusher->trimming = false;
|
||||
}
|
||||
return true;
|
||||
|
@@ -76,6 +76,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
// 2nd step: Data device is synced, prepare & write journal entries
|
||||
// Check space in the journal and journal memory buffers
|
||||
blockstore_journal_check_t space_check(this);
|
||||
auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
|
||||
if (dsk.csum_block_size)
|
||||
{
|
||||
// More complex check because all journal entries have different lengths
|
||||
@@ -85,15 +86,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
left--;
|
||||
auto & dirty_entry = dirty_db.at(sbw);
|
||||
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
|
||||
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
|
||||
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
|
||||
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size,
|
||||
(unstable_writes.size()+unstable_unsynced)*journal.block_size))
|
||||
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@@ -320,7 +320,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||
blockstore_journal_check_t space_check(this);
|
||||
if (!space_check.check_available(op, unsynced_big_write_count + 1,
|
||||
sizeof(journal_entry_big_write) + dsk.clean_dyn_size,
|
||||
(unstable_writes.size()+unstable_unsynced)*journal.block_size))
|
||||
(unstable_writes.size()+unstable_unsynced+((dirty_it->second.state & BS_ST_INSTANT) ? 0 : 1))*journal.block_size))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -412,7 +412,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||
sizeof(journal_entry_big_write) + dsk.clean_dyn_size, 0)
|
||||
|| !space_check.check_available(op, 1,
|
||||
sizeof(journal_entry_small_write) + dyn_size,
|
||||
op->len + (unstable_writes.size()+unstable_unsynced)*journal.block_size))
|
||||
op->len + (unstable_writes.size()+unstable_unsynced+((dirty_it->second.state & BS_ST_INSTANT) ? 0 : 1))*journal.block_size))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -549,7 +549,7 @@ resume_2:
|
||||
uint64_t dyn_size = dsk.dirty_dyn_size(op->offset, op->len);
|
||||
blockstore_journal_check_t space_check(this);
|
||||
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size,
|
||||
(unstable_writes.size()+unstable_unsynced)*journal.block_size))
|
||||
(unstable_writes.size()+unstable_unsynced+((dirty_it->second.state & BS_ST_INSTANT) ? 0 : 1))*journal.block_size))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@@ -106,7 +106,7 @@ resume_2:
|
||||
if (etcd_states[i]["error"].is_null())
|
||||
{
|
||||
etcd_alive++;
|
||||
etcd_db_size = etcd_states[i]["dbSizeInUse"].uint64_value();
|
||||
etcd_db_size = etcd_states[i]["dbSize"].uint64_value();
|
||||
}
|
||||
}
|
||||
int mon_count = 0;
|
||||
|
@@ -233,6 +233,8 @@ void osd_t::parse_config(bool init)
|
||||
? 10 : config["recovery_tune_agg_interval"].uint64_value();
|
||||
recovery_tune_sleep_min_us = config["recovery_tune_sleep_min_us"].is_null()
|
||||
? 10 : config["recovery_tune_sleep_min_us"].uint64_value();
|
||||
recovery_tune_sleep_cutoff_us = config["recovery_tune_sleep_cutoff_us"].is_null()
|
||||
? 10000000 : config["recovery_tune_sleep_cutoff_us"].uint64_value();
|
||||
recovery_pg_switch = config["recovery_pg_switch"].uint64_value();
|
||||
if (recovery_pg_switch < 1)
|
||||
recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
|
||||
|
@@ -125,6 +125,7 @@ class osd_t
|
||||
int recovery_tune_interval = 1;
|
||||
int recovery_tune_agg_interval = 10;
|
||||
int recovery_tune_sleep_min_us = 10;
|
||||
int recovery_tune_sleep_cutoff_us = 10000000;
|
||||
int recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
|
||||
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
||||
int inode_vanish_time = 60;
|
||||
@@ -282,6 +283,7 @@ class osd_t
|
||||
void exec_sync_stab_all(osd_op_t *cur_op);
|
||||
void exec_show_config(osd_op_t *cur_op);
|
||||
void exec_secondary(osd_op_t *cur_op);
|
||||
void exec_secondary_real(osd_op_t *cur_op);
|
||||
void secondary_op_callback(osd_op_t *cur_op);
|
||||
|
||||
// primary ops
|
||||
|
@@ -422,6 +422,10 @@ void osd_t::tune_recovery()
|
||||
rtune_avg_lat = total_recovery_usec/recovery_count;
|
||||
uint64_t target_lat = rtune_avg_lat * rtune_avg_lat/1000000.0 * recovery_count/recovery_tune_interval / rtune_target_util;
|
||||
auto sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0;
|
||||
if (sleep_us > recovery_tune_sleep_cutoff_us)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (recovery_target_sleep_items.size() != recovery_tune_agg_interval)
|
||||
{
|
||||
recovery_target_sleep_items.resize(recovery_tune_agg_interval);
|
||||
@@ -438,7 +442,7 @@ void osd_t::tune_recovery()
|
||||
if (recovery_target_sleep_count < recovery_tune_agg_interval)
|
||||
recovery_target_sleep_count++;
|
||||
recovery_target_sleep_us = recovery_target_sleep_total / recovery_target_sleep_count;
|
||||
if (log_level > 4)
|
||||
if (log_level > 1)
|
||||
{
|
||||
printf(
|
||||
"[OSD %lu] auto-tune: client util: %.2f, recovery util: %.2f, lat: %lu us -> target util %.2f, delay %lu us\n",
|
||||
|
@@ -222,6 +222,9 @@ void osd_t::start_pg_peering(pg_t & pg)
|
||||
}
|
||||
if (pg.pg_cursize < pg.pg_minsize)
|
||||
{
|
||||
// FIXME: Incomplete EC PGs may currently easily lead to write hangs ("slow ops" in OSD logs)
|
||||
// because such PGs don't flush unstable entries on secondary OSDs so they can't remove these
|
||||
// entries from their journals...
|
||||
pg.state = PG_INCOMPLETE;
|
||||
report_pg_state(pg);
|
||||
return;
|
||||
|
@@ -861,15 +861,15 @@ static void calc_rmw_parity_copy_mod(osd_rmw_stripe_t *stripes, int pg_size, int
|
||||
static void calc_rmw_parity_copy_parity(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t start, uint32_t end)
|
||||
{
|
||||
if (write_osd_set != read_osd_set)
|
||||
if (write_osd_set != read_osd_set && end != 0)
|
||||
{
|
||||
for (int role = pg_minsize; role < pg_size; role++)
|
||||
{
|
||||
if (write_osd_set[role] != read_osd_set[role] && (start != 0 || end != chunk_size))
|
||||
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0 && (start != 0 || end != chunk_size))
|
||||
{
|
||||
// Copy new parity into the read buffer to write it back
|
||||
memcpy(
|
||||
(uint8_t*)stripes[role].read_buf + start,
|
||||
(uint8_t*)stripes[role].read_buf + start - stripes[role].read_start,
|
||||
stripes[role].write_buf,
|
||||
end - start
|
||||
);
|
||||
|
@@ -30,6 +30,7 @@ void test16();
|
||||
void test_recover_22_d2();
|
||||
void test_ec43_error_bruteforce();
|
||||
void test_recover_53_d5();
|
||||
void test_recover_22();
|
||||
|
||||
int main(int narg, char *args[])
|
||||
{
|
||||
@@ -70,6 +71,8 @@ int main(int narg, char *args[])
|
||||
test_ec43_error_bruteforce();
|
||||
// Test 19
|
||||
test_recover_53_d5();
|
||||
// Test 20
|
||||
test_recover_22();
|
||||
// End
|
||||
printf("all ok\n");
|
||||
return 0;
|
||||
@@ -1244,3 +1247,99 @@ void test_recover_53_d5()
|
||||
// Done
|
||||
use_ec(8, 5, false);
|
||||
}
|
||||
|
||||
void test_recover_22()
|
||||
{
|
||||
const int bmp = 128*1024 / 4096 / 8;
|
||||
use_ec(4, 2, true);
|
||||
osd_num_t osd_set[4] = { 1, 2, 3, 4 };
|
||||
osd_num_t write_osd_set[4] = { 5, 0, 3, 0 };
|
||||
osd_rmw_stripe_t stripes[4] = {};
|
||||
unsigned bitmaps[4] = { 0 };
|
||||
// split
|
||||
void *write_buf = (uint8_t*)malloc_or_die(4096);
|
||||
set_pattern(write_buf, 4096, PATTERN0);
|
||||
split_stripes(2, 128*1024, 120*1024, 4096, stripes);
|
||||
assert(stripes[0].req_start == 120*1024 && stripes[0].req_end == 124*1024);
|
||||
assert(stripes[1].req_start == 0 && stripes[1].req_end == 0);
|
||||
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||
assert(stripes[3].req_start == 0 && stripes[3].req_end == 0);
|
||||
// calc_rmw
|
||||
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 4, 2, 2, write_osd_set, 128*1024, bmp);
|
||||
for (int i = 0; i < 4; i++)
|
||||
stripes[i].bmp_buf = bitmaps+i;
|
||||
assert(rmw_buf);
|
||||
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||
assert(stripes[1].read_start == 120*1024 && stripes[1].read_end == 124*1024);
|
||||
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
|
||||
assert(stripes[3].read_start == 0 && stripes[3].read_end == 0);
|
||||
assert(stripes[0].write_start == 120*1024 && stripes[0].write_end == 124*1024);
|
||||
assert(stripes[1].write_start == 0 && stripes[1].write_end == 0);
|
||||
assert(stripes[2].write_start == 120*1024 && stripes[2].write_end == 124*1024);
|
||||
assert(stripes[3].write_start == 0 && stripes[3].write_end == 0);
|
||||
assert(stripes[0].read_buf == (uint8_t*)rmw_buf+4*1024);
|
||||
assert(stripes[1].read_buf == (uint8_t*)rmw_buf+132*1024);
|
||||
assert(stripes[2].read_buf == NULL);
|
||||
assert(stripes[3].read_buf == NULL);
|
||||
assert(stripes[0].write_buf == write_buf);
|
||||
assert(stripes[1].write_buf == NULL);
|
||||
assert(stripes[2].write_buf == (uint8_t*)rmw_buf);
|
||||
assert(stripes[3].write_buf == NULL);
|
||||
// encode
|
||||
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1);
|
||||
set_pattern(stripes[1].read_buf, 4*1024, PATTERN2);
|
||||
memset(stripes[0].bmp_buf, 0xff, bmp);
|
||||
memset(stripes[1].bmp_buf, 0xff, bmp);
|
||||
calc_rmw_parity_ec(stripes, 4, 2, osd_set, write_osd_set, 128*1024, bmp);
|
||||
assert(*(uint32_t*)stripes[2].bmp_buf == 0);
|
||||
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||
assert(stripes[1].write_start == 0 && stripes[1].write_end == 0);
|
||||
assert(stripes[2].write_start == 120*1024 && stripes[2].write_end == 124*1024);
|
||||
assert(stripes[3].write_start == 0 && stripes[3].write_end == 0);
|
||||
assert(stripes[0].write_buf == stripes[0].read_buf);
|
||||
assert(stripes[1].write_buf == NULL);
|
||||
assert(stripes[2].write_buf == (uint8_t*)rmw_buf);
|
||||
assert(stripes[3].write_buf == NULL);
|
||||
check_pattern(stripes[2].write_buf, 4*1024, PATTERN0^PATTERN2);
|
||||
// decode and verify
|
||||
memset(stripes, 0, sizeof(stripes));
|
||||
split_stripes(2, 128*1024, 0, 256*1024, stripes);
|
||||
assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024);
|
||||
assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024);
|
||||
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||
assert(stripes[3].req_start == 0 && stripes[3].req_end == 0);
|
||||
for (int role = 0; role < 4; role++)
|
||||
{
|
||||
stripes[role].read_start = stripes[role].req_start;
|
||||
stripes[role].read_end = stripes[role].req_end;
|
||||
}
|
||||
assert(extend_missing_stripes(stripes, write_osd_set, 2, 4) == 0);
|
||||
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
|
||||
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
|
||||
assert(stripes[3].read_start == 0 && stripes[3].read_end == 0);
|
||||
void *read_buf = alloc_read_buffer(stripes, 4, 0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
stripes[i].bmp_buf = bitmaps+i;
|
||||
assert(read_buf);
|
||||
assert(stripes[0].read_buf == read_buf);
|
||||
assert(stripes[1].read_buf == (uint8_t*)read_buf+128*1024);
|
||||
assert(stripes[2].read_buf == (uint8_t*)read_buf+2*128*1024);
|
||||
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1);
|
||||
set_pattern(stripes[0].read_buf+120*1024, 4*1024, PATTERN0);
|
||||
set_pattern(stripes[2].read_buf, 128*1024, PATTERN1^PATTERN2);
|
||||
set_pattern(stripes[2].read_buf+120*1024, 4*1024, PATTERN0^PATTERN2);
|
||||
memset(stripes[0].bmp_buf, 0xff, bmp);
|
||||
memset(stripes[2].bmp_buf, 0, bmp);
|
||||
bitmaps[1] = 0;
|
||||
bitmaps[3] = 0;
|
||||
reconstruct_stripes_ec(stripes, 4, 2, bmp);
|
||||
assert(bitmaps[0] == 0xFFFFFFFF);
|
||||
assert(*(uint32_t*)stripes[1].bmp_buf == 0xFFFFFFFF);
|
||||
check_pattern(stripes[1].read_buf, 128*1024, PATTERN2);
|
||||
free(read_buf);
|
||||
// Done
|
||||
free(rmw_buf);
|
||||
free(write_buf);
|
||||
use_ec(4, 2, false);
|
||||
}
|
||||
|
@@ -42,8 +42,10 @@ void osd_t::secondary_op_callback(osd_op_t *op)
|
||||
int retval = op->bs_op->retval;
|
||||
delete op->bs_op;
|
||||
op->bs_op = NULL;
|
||||
if (op->is_recovery_related() && recovery_target_sleep_us)
|
||||
if (op->is_recovery_related() && recovery_target_sleep_us &&
|
||||
op->req.hdr.opcode == OSD_OP_SEC_STABILIZE)
|
||||
{
|
||||
// Apply pause AFTER commit. Do not apply pause to SYNC at all
|
||||
if (!op->tv_end.tv_sec)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &op->tv_end);
|
||||
@@ -59,7 +61,25 @@ void osd_t::secondary_op_callback(osd_op_t *op)
|
||||
}
|
||||
}
|
||||
|
||||
void osd_t::exec_secondary(osd_op_t *cur_op)
|
||||
void osd_t::exec_secondary(osd_op_t *op)
|
||||
{
|
||||
if (op->is_recovery_related() && recovery_target_sleep_us &&
|
||||
op->req.hdr.opcode != OSD_OP_SEC_STABILIZE && op->req.hdr.opcode != OSD_OP_SEC_SYNC)
|
||||
{
|
||||
// Apply pause BEFORE write/delete
|
||||
tfd->set_timer_us(recovery_target_sleep_us, false, [this, op](int timer_id)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
||||
exec_secondary_real(op);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
exec_secondary_real(op);
|
||||
}
|
||||
}
|
||||
|
||||
void osd_t::exec_secondary_real(osd_op_t *cur_op)
|
||||
{
|
||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
||||
{
|
||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 1.4.3
|
||||
Version: 1.4.4
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
Reference in New Issue
Block a user