Compare commits

..

No commits in common. "62a4f45160e35aaab94414dd8ac42197fd155645" and "f882c7dd87af865e4ea9dc5b925b4f2c551c55ab" have entirely different histories.

8 changed files with 15 additions and 50 deletions

View File

@ -415,7 +415,6 @@ stop_flusher:
flusher->sync_to_repeat.erase(cur.oid); flusher->sync_to_repeat.erase(cur.oid);
if (!flusher->try_find_other(dirty_end, cur)) if (!flusher->try_find_other(dirty_end, cur))
{ {
cur.oid = {};
goto stop_flusher; goto stop_flusher;
} }
} }

View File

@ -505,7 +505,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
for (auto & rv: PRIV(read_op)->read_vec) for (auto & rv: PRIV(read_op)->read_vec)
{ {
if (rv.journal_sector) if (rv.journal_sector)
journal.used_sectors.at(rv.journal_sector-1)++; journal.used_sectors[rv.journal_sector-1]++;
} }
} }
read_op->retval = 0; read_op->retval = 0;
@ -966,7 +966,7 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op
{ {
if (rv.journal_sector) if (rv.journal_sector)
{ {
auto used = --journal.used_sectors.at(rv.journal_sector-1); auto used = --journal.used_sectors[rv.journal_sector-1];
if (used == 0) if (used == 0)
{ {
journal.used_sectors.erase(rv.journal_sector-1); journal.used_sectors.erase(rv.journal_sector-1);

View File

@ -215,7 +215,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
#endif #endif
data_alloc->set(dirty_it->second.location >> dsk.block_order, false); data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
} }
auto used = --journal.used_sectors.at(dirty_it->second.journal_sector); auto used = --journal.used_sectors[dirty_it->second.journal_sector];
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf( printf(
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector, "remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
@ -225,11 +225,6 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
if (used == 0) if (used == 0)
{ {
journal.used_sectors.erase(dirty_it->second.journal_sector); journal.used_sectors.erase(dirty_it->second.journal_sector);
if (dirty_it->second.journal_sector == journal.sector_info[journal.cur_sector].offset)
{
// Mark current sector as "full" to select the new one
journal.in_sector_pos = dsk.journal_block_size;
}
flusher->mark_trim_possible(); flusher->mark_trim_possible();
} }
free_dirty_dyn_data(dirty_it->second); free_dirty_dyn_data(dirty_it->second);

View File

@ -116,10 +116,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE, journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
sizeof(journal_entry_big_write) + dyn_size sizeof(journal_entry_big_write) + dyn_size
); );
auto jsec = dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset; dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
assert(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free));
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++; journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf( printf(

View File

@ -436,19 +436,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE, journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
sizeof(journal_entry_small_write) + dyn_size sizeof(journal_entry_small_write) + dyn_size
); );
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset; dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
if (!(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free)))
{
printf(
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
journal.used_start, journal.next_free
);
abort();
}
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++; journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf( printf(
@ -570,19 +558,7 @@ resume_2:
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE, journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
sizeof(journal_entry_big_write) + dyn_size sizeof(journal_entry_big_write) + dyn_size
); );
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset; dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
if (!(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free)))
{
printf(
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
journal.used_start, journal.next_free
);
abort();
}
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++; journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf( printf(

View File

@ -22,7 +22,7 @@ if [ "$IMMEDIATE_COMMIT" != "" ]; then
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5" NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}' $ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
else else
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5 --min_flusher_count 16" NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}' $ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
fi fi

View File

@ -30,16 +30,14 @@ kill_osds()
kill -9 $OSD1_PID kill -9 $OSD1_PID
$ETCDCTL del /vitastor/osd/state/1 $ETCDCTL del /vitastor/osd/state/1
for kill_osd in $(seq 2 $OSD_COUNT); do for i in $(seq 2 $OSD_COUNT); do
sleep 15 sleep 15
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies echo Killing OSD $i and starting OSD $((i-1))
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\ p=OSD${i}_PID
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
p=OSD${kill_osd}_PID
kill -9 ${!p} kill -9 ${!p}
$ETCDCTL del /vitastor/osd/state/$kill_osd $ETCDCTL del /vitastor/osd/state/$i
start_osd $((kill_osd-1)) start_osd $((i-1))
sleep 15
done done
sleep 5 sleep 5

View File

@ -44,7 +44,7 @@ wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'` $ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
# Wait for scrub to finish # Wait for scrub to finish
wait_condition 300 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing wait_condition 60 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing
if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) < 2) ]]; then if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) < 2) ]]; then
# Check that objects are marked as inconsistent if 2 replicas or EC/XOR 2+1 # Check that objects are marked as inconsistent if 2 replicas or EC/XOR 2+1
@ -56,7 +56,7 @@ if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_
build/src/vitastor-cli fix --etcd_address $ETCD_URL --bad_osds $ZERO_OSD build/src/vitastor-cli fix --etcd_address $ETCD_URL --bad_osds $ZERO_OSD
elif [[ ($SCHEME = replicated && $PG_SIZE > 2) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) > 1) ]]; then elif [[ ($SCHEME = replicated && $PG_SIZE > 2) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) > 1) ]]; then
# Check that everything heals # Check that everything heals
wait_finish_rebalance 300 wait_finish_rebalance 60
build/src/vitastor-cli describe --etcd_address $ETCD_URL --json | jq -e '. | length == 0' build/src/vitastor-cli describe --etcd_address $ETCD_URL --json | jq -e '. | length == 0'
fi fi