forked from vitalif/vitastor
Compare commits
4 Commits
v0.9.0
...
test-doubl
Author | SHA1 | Date | |
---|---|---|---|
ee1dcfea25 | |||
150968070f | |||
cdfc74665b | |||
3f60fecd7c |
@@ -550,6 +550,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_imm_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: IMMEDIATE_COMMIT=1 SCHEME=ec /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_scrub:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -430,7 +430,7 @@ Flusher - это микро-поток (корутина), которая коп
|
||||
Находить и автоматически восстанавливать "лучшие версии" объектов с
|
||||
несовпадающими копиями/частями. При использовании репликации "лучшая"
|
||||
версия - версия, доступная в большем числе экземпляров, чем другие. При
|
||||
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
|
||||
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
|
||||
частей данных и чётности, полностью соответствующих друг другу.
|
||||
|
||||
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
|
||||
|
@@ -474,7 +474,7 @@
|
||||
Находить и автоматически восстанавливать "лучшие версии" объектов с
|
||||
несовпадающими копиями/частями. При использовании репликации "лучшая"
|
||||
версия - версия, доступная в большем числе экземпляров, чем другие. При
|
||||
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
|
||||
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
|
||||
частей данных и чётности, полностью соответствующих друг другу.
|
||||
|
||||
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
|
||||
|
@@ -536,7 +536,19 @@ resume_1:
|
||||
return false;
|
||||
}
|
||||
// zero out old metadata entry
|
||||
{
|
||||
clean_disk_entry *old_entry = (clean_disk_entry*)((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size);
|
||||
if (old_entry->oid.inode != 0 && old_entry->oid != cur.oid)
|
||||
{
|
||||
printf("Fatal error (metadata corruption or bug): tried to wipe metadata entry %lu (%lx:%lx v%lu) as old location of %lx:%lx\n",
|
||||
old_clean_loc >> bs->dsk.block_order, old_entry->oid.inode, old_entry->oid.stripe,
|
||||
old_entry->version, cur.oid.inode, cur.oid.stripe);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||
if (meta_old.sector != meta_new.sector)
|
||||
{
|
||||
await_sqe(15);
|
||||
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
|
||||
data->callback = simple_callback_w;
|
||||
@@ -545,6 +557,7 @@ resume_1:
|
||||
);
|
||||
wait_count++;
|
||||
}
|
||||
}
|
||||
if (has_delete)
|
||||
{
|
||||
clean_disk_entry *new_entry = (clean_disk_entry*)((uint8_t*)meta_new.buf + meta_new.pos*bs->dsk.clean_entry_size);
|
||||
|
@@ -790,7 +790,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||
unstab = unstab < ov.version ? ov.version : unstab;
|
||||
if (je->type == JE_SMALL_WRITE_INSTANT)
|
||||
{
|
||||
bs->mark_stable(ov, true);
|
||||
bs->mark_stable(ov);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -890,7 +890,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||
unstab = unstab < ov.version ? ov.version : unstab;
|
||||
if (je->type == JE_BIG_WRITE_INSTANT)
|
||||
{
|
||||
bs->mark_stable(ov, true);
|
||||
bs->mark_stable(ov);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -904,7 +904,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||
.oid = je->stable.oid,
|
||||
.version = je->stable.version,
|
||||
};
|
||||
bs->mark_stable(ov, true);
|
||||
bs->mark_stable(ov);
|
||||
}
|
||||
else if (je->type == JE_ROLLBACK)
|
||||
{
|
||||
@@ -961,7 +961,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||
bs->journal.used_sectors[proc_pos]++;
|
||||
// Deletions are treated as immediately stable, because
|
||||
// "2-phase commit" (write->stabilize) isn't sufficient for them anyway
|
||||
bs->mark_stable(ov, true);
|
||||
bs->mark_stable(ov);
|
||||
}
|
||||
// Ignore delete if neither preceding dirty entries nor the clean one are present
|
||||
}
|
||||
|
@@ -179,7 +179,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||
{
|
||||
object_id oid = dirty_it->first.oid;
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Unblock writes-after-delete %lx:%lx v%lx\n", oid.inode, oid.stripe, dirty_it->first.version);
|
||||
printf("Unblock writes-after-delete %lx:%lx v%lu\n", oid.inode, oid.stripe, dirty_it->first.version);
|
||||
#endif
|
||||
dirty_it = dirty_end;
|
||||
// Unblock operations blocked by delete flushing
|
||||
|
@@ -251,6 +251,10 @@ void osd_messenger_t::try_connect_peer_addr(osd_num_t peer_osd, const char *peer
|
||||
return;
|
||||
}
|
||||
clients[peer_fd] = new osd_client_t();
|
||||
if (log_level > 0)
|
||||
{
|
||||
fprintf(stderr, "Connecting to OSD %lu at %s:%d (client %d)\n", peer_osd, peer_host, peer_port, peer_fd);
|
||||
}
|
||||
clients[peer_fd]->peer_addr = addr;
|
||||
clients[peer_fd]->peer_port = peer_port;
|
||||
clients[peer_fd]->peer_fd = peer_fd;
|
||||
@@ -313,7 +317,10 @@ void osd_messenger_t::handle_peer_epoll(int peer_fd, int epoll_events)
|
||||
if (epoll_events & EPOLLRDHUP)
|
||||
{
|
||||
// Stop client
|
||||
if (log_level > 0)
|
||||
{
|
||||
fprintf(stderr, "[OSD %lu] client %d disconnected\n", this->osd_num, peer_fd);
|
||||
}
|
||||
stop_client(peer_fd, true);
|
||||
}
|
||||
else if (epoll_events & EPOLLIN)
|
||||
|
@@ -50,7 +50,7 @@ struct osd_client_t
|
||||
|
||||
sockaddr_storage peer_addr;
|
||||
int peer_port;
|
||||
int peer_fd;
|
||||
int peer_fd = -1;
|
||||
int peer_state;
|
||||
int connect_timeout_id = -1;
|
||||
int ping_time_remaining = 0;
|
||||
@@ -87,11 +87,7 @@ struct osd_client_t
|
||||
std::vector<iovec> send_list, next_send_list;
|
||||
std::vector<msgr_sendp_t> outbox, next_outbox;
|
||||
|
||||
~osd_client_t()
|
||||
{
|
||||
free(in_buf);
|
||||
in_buf = NULL;
|
||||
}
|
||||
~osd_client_t();
|
||||
};
|
||||
|
||||
struct osd_wanted_peer_t
|
||||
|
@@ -122,17 +122,6 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
// Cancel outbound operations
|
||||
cancel_osd_ops(cl);
|
||||
}
|
||||
#ifndef __MOCK__
|
||||
// And close the FD only when everything is done
|
||||
// ...because peer_fd number can get reused after close()
|
||||
close(peer_fd);
|
||||
#ifdef WITH_RDMA
|
||||
if (cl->rdma_conn)
|
||||
{
|
||||
delete cl->rdma_conn;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
// Find the item again because it can be invalidated at this point
|
||||
it = clients.find(peer_fd);
|
||||
if (it != clients.end())
|
||||
@@ -145,3 +134,25 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
delete cl;
|
||||
}
|
||||
}
|
||||
|
||||
osd_client_t::~osd_client_t()
|
||||
{
|
||||
free(in_buf);
|
||||
in_buf = NULL;
|
||||
if (peer_fd >= 0)
|
||||
{
|
||||
// Close the FD only when the client is actually destroyed
|
||||
// Which only happens when all references are cleared
|
||||
close(peer_fd);
|
||||
peer_fd = -1;
|
||||
}
|
||||
#ifndef __MOCK__
|
||||
#ifdef WITH_RDMA
|
||||
if (rdma_conn)
|
||||
{
|
||||
delete rdma_conn;
|
||||
rdma_conn = NULL;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@@ -305,7 +305,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
|
||||
};
|
||||
if (log_level > 2)
|
||||
{
|
||||
printf("Submitting recovery operation for %lx:%lx\n", op->oid.inode, op->oid.stripe);
|
||||
printf("Submitting recovery operation for %lx:%lx (%s)\n", op->oid.inode, op->oid.stripe, op->degraded ? "degraded" : "misplaced");
|
||||
}
|
||||
op->osd_op->peer_fd = -1;
|
||||
op->osd_op->callback = [this, op](osd_op_t *osd_op)
|
||||
|
@@ -255,7 +255,7 @@ void pg_obj_state_check_t::finish_object()
|
||||
}
|
||||
else if (n_mismatched > 0)
|
||||
{
|
||||
if (log_level > 2 && (replicated || n_roles >= pg->pg_cursize))
|
||||
if (log_level > 2)
|
||||
{
|
||||
printf("Object is misplaced: %lx:%lx version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
|
||||
}
|
||||
|
@@ -357,7 +357,7 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
|
||||
#ifdef OSD_DEBUG
|
||||
uint64_t peer_osd = msgr.clients.find(subop->peer_fd) != msgr.clients.end()
|
||||
? msgr.clients[subop->peer_fd]->osd_num : osd_num;
|
||||
printf("subop %lu from osd %lu: version = %lu\n", opcode, peer_osd, version);
|
||||
printf("subop %s %lx:%lx from osd %lu: version = %lu\n", osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version);
|
||||
#endif
|
||||
if (op_data->fact_ver != UINT64_MAX)
|
||||
{
|
||||
|
@@ -46,6 +46,7 @@ SCHEME=xor ./test_write.sh
|
||||
|
||||
PG_SIZE=2 ./test_heal.sh
|
||||
SCHEME=ec ./test_heal.sh
|
||||
IMMEDIATE_COMMIT=1 SCHEME=ec ./test_heal.sh
|
||||
|
||||
./test_scrub.sh
|
||||
ZERO_OSD=2 ./test_scrub.sh
|
||||
|
@@ -12,7 +12,7 @@ PG_COUNT=32
|
||||
. `dirname $0`/run_3osds.sh
|
||||
check_qemu
|
||||
|
||||
IMG_SIZE=960
|
||||
IMG_SIZE=128
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1024*1024))'}'
|
||||
|
||||
@@ -22,32 +22,28 @@ LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
|
||||
kill_osds()
|
||||
{
|
||||
for i in {1..60}; do
|
||||
sleep 5
|
||||
|
||||
kill -9 $OSD1_PID
|
||||
$ETCDCTL del /vitastor/osd/state/1
|
||||
|
||||
for i in 2 3 4 5 6 7; do
|
||||
sleep 15
|
||||
echo Killing OSD $i and starting OSD $((i-1))
|
||||
p=OSD${i}_PID
|
||||
kill -9 ${!p}
|
||||
$ETCDCTL del /vitastor/osd/state/$i
|
||||
start_osd $((i-1))
|
||||
sleep 15
|
||||
K=$((1 + RANDOM % OSD_COUNT))
|
||||
eval "kill -9 \$OSD${K}_PID"
|
||||
$ETCDCTL del /vitastor/osd/state/$K
|
||||
start_osd $K
|
||||
done
|
||||
|
||||
sleep 5
|
||||
start_osd 7
|
||||
|
||||
sleep 5
|
||||
}
|
||||
|
||||
kill_osds &
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=16 -fsync=256 -rw=randwrite \
|
||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=512k -direct=1 -iodepth=8 -fsync=256 -rw=write \
|
||||
-refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=1000 -runtime=120 &>/dev/null &
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=512k -direct=1 -iodepth=8 -fsync=256 -rw=write \
|
||||
-refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=1000 -runtime=120 &>/dev/null &
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=512k -direct=1 -iodepth=8 -fsync=256 -rw=write \
|
||||
-refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=1000 -runtime=120
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
|
Reference in New Issue
Block a user