Test and fix degraded-read

blocking-uring-test
Vitaliy Filippov 2020-02-09 18:57:45 +03:00
parent 97d3fc593c
commit 1513d0490a
3 changed files with 21 additions and 14 deletions

View File

@ -8,6 +8,7 @@
void osd_t::init_primary()
{
// Initial test version of clustering code requires exactly 2 peers
// FIXME Hardcode
if (config["peer1"] == "" || config["peer2"] == "")
throw std::runtime_error("run_primary requires two peers");
peers.push_back(parse_peer(config["peer1"]));
@ -16,8 +17,9 @@ void osd_t::init_primary()
throw std::runtime_error("peer1 and peer2 osd numbers are the same");
pgs.push_back((pg_t){
.state = PG_OFFLINE,
.pg_cursize = 2, // or 3
.pg_num = 1,
.target_set = { 1, 2, 3 },
.target_set = { 1, 0, 3 }, // or { 1, 2, 3 }
});
pg_count = 1;
peering_state = 1;

View File

@ -3,10 +3,10 @@
// read: read directly or read paired stripe(s), reconstruct, return
// write: read paired stripe(s), modify, write
//
// nuance: take care to read the same version from paired stripes!
// if there are no write requests in progress we're good (stripes must be in sync)
// and... remember the last readable version during a write request
// and... postpone other write requests to the same stripe until the completion of previous ones
// to do so, we remember "last readable" version until a write request completes
// and we postpone other write requests to the same stripe until completion of previous ones
//
// sync: sync peers, get unstable versions from somewhere, stabilize them
@ -79,7 +79,7 @@ void osd_t::exec_primary_read(osd_op_t *cur_op)
auto vo_it = pgs[pg_num].ver_override.find(oid);
op_data->target_ver = vo_it != pgs[pg_num].ver_override.end() ? vo_it->second : UINT64_MAX;
}
if (pgs[pg_num].pg_cursize == 3)
if (pgs[pg_num].pg_cursize == pgs[pg_num].pg_size)
{
// Fast happy-path
submit_read_subops(pgs[pg_num].pg_minsize, pgs[pg_num].target_set.data(), cur_op);
@ -162,8 +162,9 @@ int osd_t::extend_missing_stripes(osd_read_stripe_t *stripes, osd_num_t *target_
{
for (int role = 0; role < minsize; role++)
{
if (stripes[role*2+1].end != 0 && target_set[role] == 0)
if (stripes[role].end != 0 && target_set[role] == 0)
{
stripes[role].real_start = stripes[role].real_end = 0;
// Stripe is missing. Extend read to other stripes.
// We need at least pg_minsize stripes to recover the lost part.
int exist = 0;
@ -212,6 +213,11 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd
stripes[role].pos = buf_size;
buf_size += stripes[role].real_end - stripes[role].real_start;
}
else if (stripes[role].end != 0)
{
stripes[role].pos = buf_size;
buf_size += stripes[role].end - stripes[role].start;
}
}
osd_op_t *subops = new osd_op_t[n_subops];
cur_op->buf = memalign(MEM_ALIGNMENT, buf_size);
@ -227,7 +233,6 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd
auto role_osd_num = target_set[role];
if (role_osd_num != 0)
{
printf("Read subop from %lu: %lu / %lu\n", role_osd_num, op_data->oid.inode, op_data->oid.stripe | role);
if (role_osd_num == this->osd_num)
{
subops[subop].bs_op = {

View File

@ -22,7 +22,7 @@ uint64_t test_write(int connect_fd, uint64_t inode, uint64_t stripe, uint64_t ve
void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_t len);
bool check_pattern(void *buf, uint64_t len, uint64_t pattern);
bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern);
#define PATTERN0 0x8c4641acc762840e
#define PATTERN1 0x70a549add9a2280a
@ -51,13 +51,13 @@ int main(int narg, char *args[])
// Cluster read
connect_fd = connect_osd("127.0.0.1", 11203);
data = test_primary_read(connect_fd, 2, 0, 128*1024);
if (data && check_pattern(data, 128*1024, PATTERN0))
if (data && check_pattern(data, 0, 128*1024, PATTERN0))
printf("inode=2 0-128K OK\n");
if (data)
free(data);
data = test_primary_read(connect_fd, 2, 0, 256*1024);
if (data && check_pattern(data, 128*1024, PATTERN0) &&
check_pattern(data+128*1024, 128*1024, PATTERN1))
if (data && check_pattern(data, 0, 128*1024, PATTERN0) &&
check_pattern(data, 128*1024, 128*1024, PATTERN1))
printf("inode=2 0-256K OK\n");
if (data)
free(data);
@ -186,13 +186,13 @@ void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_
return data;
}
bool check_pattern(void *buf, uint64_t len, uint64_t pattern)
bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern)
{
for (int i = 0; i < len/sizeof(uint64_t); i++)
{
if (((uint64_t*)buf)[i] != pattern)
if (((uint64_t*)(buf+offset))[i] != pattern)
{
printf("(result[%d] = %lu) != %lu\n", i, ((uint64_t*)buf)[i], pattern);
printf("(result + %lu bytes = %lx) != %lx\n", i*sizeof(uint64_t)+offset, ((uint64_t*)buf+offset)[i], pattern);
return false;
}
}