Compare commits
No commits in common. "62a4f45160e35aaab94414dd8ac42197fd155645" and "f882c7dd87af865e4ea9dc5b925b4f2c551c55ab" have entirely different histories.
62a4f45160
...
f882c7dd87
|
@ -415,7 +415,6 @@ stop_flusher:
|
||||||
flusher->sync_to_repeat.erase(cur.oid);
|
flusher->sync_to_repeat.erase(cur.oid);
|
||||||
if (!flusher->try_find_other(dirty_end, cur))
|
if (!flusher->try_find_other(dirty_end, cur))
|
||||||
{
|
{
|
||||||
cur.oid = {};
|
|
||||||
goto stop_flusher;
|
goto stop_flusher;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -505,7 +505,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
for (auto & rv: PRIV(read_op)->read_vec)
|
for (auto & rv: PRIV(read_op)->read_vec)
|
||||||
{
|
{
|
||||||
if (rv.journal_sector)
|
if (rv.journal_sector)
|
||||||
journal.used_sectors.at(rv.journal_sector-1)++;
|
journal.used_sectors[rv.journal_sector-1]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
read_op->retval = 0;
|
read_op->retval = 0;
|
||||||
|
@ -966,7 +966,7 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op
|
||||||
{
|
{
|
||||||
if (rv.journal_sector)
|
if (rv.journal_sector)
|
||||||
{
|
{
|
||||||
auto used = --journal.used_sectors.at(rv.journal_sector-1);
|
auto used = --journal.used_sectors[rv.journal_sector-1];
|
||||||
if (used == 0)
|
if (used == 0)
|
||||||
{
|
{
|
||||||
journal.used_sectors.erase(rv.journal_sector-1);
|
journal.used_sectors.erase(rv.journal_sector-1);
|
||||||
|
|
|
@ -215,7 +215,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
#endif
|
#endif
|
||||||
data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
|
data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
|
||||||
}
|
}
|
||||||
auto used = --journal.used_sectors.at(dirty_it->second.journal_sector);
|
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
|
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
|
||||||
|
@ -225,11 +225,6 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
if (used == 0)
|
if (used == 0)
|
||||||
{
|
{
|
||||||
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
||||||
if (dirty_it->second.journal_sector == journal.sector_info[journal.cur_sector].offset)
|
|
||||||
{
|
|
||||||
// Mark current sector as "full" to select the new one
|
|
||||||
journal.in_sector_pos = dsk.journal_block_size;
|
|
||||||
}
|
|
||||||
flusher->mark_trim_possible();
|
flusher->mark_trim_possible();
|
||||||
}
|
}
|
||||||
free_dirty_dyn_data(dirty_it->second);
|
free_dirty_dyn_data(dirty_it->second);
|
||||||
|
|
|
@ -116,10 +116,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + dyn_size
|
sizeof(journal_entry_big_write) + dyn_size
|
||||||
);
|
);
|
||||||
auto jsec = dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
assert(journal.next_free >= journal.used_start
|
|
||||||
? (jsec >= journal.used_start && jsec < journal.next_free)
|
|
||||||
: (jsec >= journal.used_start || jsec < journal.next_free));
|
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
|
|
@ -436,19 +436,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
||||||
sizeof(journal_entry_small_write) + dyn_size
|
sizeof(journal_entry_small_write) + dyn_size
|
||||||
);
|
);
|
||||||
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
if (!(journal.next_free >= journal.used_start
|
|
||||||
? (jsec >= journal.used_start && jsec < journal.next_free)
|
|
||||||
: (jsec >= journal.used_start || jsec < journal.next_free)))
|
|
||||||
{
|
|
||||||
printf(
|
|
||||||
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
|
|
||||||
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
|
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
|
|
||||||
journal.used_start, journal.next_free
|
|
||||||
);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
@ -570,19 +558,7 @@ resume_2:
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + dyn_size
|
sizeof(journal_entry_big_write) + dyn_size
|
||||||
);
|
);
|
||||||
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
if (!(journal.next_free >= journal.used_start
|
|
||||||
? (jsec >= journal.used_start && jsec < journal.next_free)
|
|
||||||
: (jsec >= journal.used_start || jsec < journal.next_free)))
|
|
||||||
{
|
|
||||||
printf(
|
|
||||||
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
|
|
||||||
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
|
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
|
|
||||||
journal.used_start, journal.next_free
|
|
||||||
);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
|
|
@ -22,7 +22,7 @@ if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
||||||
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
|
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
||||||
else
|
else
|
||||||
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5 --min_flusher_count 16"
|
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5"
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -30,16 +30,14 @@ kill_osds()
|
||||||
kill -9 $OSD1_PID
|
kill -9 $OSD1_PID
|
||||||
$ETCDCTL del /vitastor/osd/state/1
|
$ETCDCTL del /vitastor/osd/state/1
|
||||||
|
|
||||||
for kill_osd in $(seq 2 $OSD_COUNT); do
|
for i in $(seq 2 $OSD_COUNT); do
|
||||||
sleep 15
|
sleep 15
|
||||||
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
|
echo Killing OSD $i and starting OSD $((i-1))
|
||||||
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
|
p=OSD${i}_PID
|
||||||
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
|
|
||||||
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
|
|
||||||
p=OSD${kill_osd}_PID
|
|
||||||
kill -9 ${!p}
|
kill -9 ${!p}
|
||||||
$ETCDCTL del /vitastor/osd/state/$kill_osd
|
$ETCDCTL del /vitastor/osd/state/$i
|
||||||
start_osd $((kill_osd-1))
|
start_osd $((i-1))
|
||||||
|
sleep 15
|
||||||
done
|
done
|
||||||
|
|
||||||
sleep 5
|
sleep 5
|
||||||
|
|
|
@ -44,7 +44,7 @@ wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq
|
||||||
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
||||||
|
|
||||||
# Wait for scrub to finish
|
# Wait for scrub to finish
|
||||||
wait_condition 300 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing
|
wait_condition 60 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing
|
||||||
|
|
||||||
if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) < 2) ]]; then
|
if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) < 2) ]]; then
|
||||||
# Check that objects are marked as inconsistent if 2 replicas or EC/XOR 2+1
|
# Check that objects are marked as inconsistent if 2 replicas or EC/XOR 2+1
|
||||||
|
@ -56,7 +56,7 @@ if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_
|
||||||
build/src/vitastor-cli fix --etcd_address $ETCD_URL --bad_osds $ZERO_OSD
|
build/src/vitastor-cli fix --etcd_address $ETCD_URL --bad_osds $ZERO_OSD
|
||||||
elif [[ ($SCHEME = replicated && $PG_SIZE > 2) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) > 1) ]]; then
|
elif [[ ($SCHEME = replicated && $PG_SIZE > 2) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) > 1) ]]; then
|
||||||
# Check that everything heals
|
# Check that everything heals
|
||||||
wait_finish_rebalance 300
|
wait_finish_rebalance 60
|
||||||
|
|
||||||
build/src/vitastor-cli describe --etcd_address $ETCD_URL --json | jq -e '. | length == 0'
|
build/src/vitastor-cli describe --etcd_address $ETCD_URL --json | jq -e '. | length == 0'
|
||||||
fi
|
fi
|
||||||
|
|
Loading…
Reference in New Issue