From 1513d0490a9e4729e99121911ffbf346e373a7bf Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 9 Feb 2020 18:57:45 +0300 Subject: [PATCH] Test and fix degraded-read --- osd_peering.cpp | 4 +++- osd_primary.cpp | 17 +++++++++++------ test_osd.cpp | 14 +++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/osd_peering.cpp b/osd_peering.cpp index c8da16e64..104112309 100644 --- a/osd_peering.cpp +++ b/osd_peering.cpp @@ -8,6 +8,7 @@ void osd_t::init_primary() { // Initial test version of clustering code requires exactly 2 peers + // FIXME Hardcode if (config["peer1"] == "" || config["peer2"] == "") throw std::runtime_error("run_primary requires two peers"); peers.push_back(parse_peer(config["peer1"])); @@ -16,8 +17,9 @@ void osd_t::init_primary() throw std::runtime_error("peer1 and peer2 osd numbers are the same"); pgs.push_back((pg_t){ .state = PG_OFFLINE, + .pg_cursize = 2, // or 3 .pg_num = 1, - .target_set = { 1, 2, 3 }, + .target_set = { 1, 0, 3 }, // or { 1, 2, 3 } }); pg_count = 1; peering_state = 1; diff --git a/osd_primary.cpp b/osd_primary.cpp index dd04aff57..96a35148d 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -3,10 +3,10 @@ // read: read directly or read paired stripe(s), reconstruct, return // write: read paired stripe(s), modify, write +// // nuance: take care to read the same version from paired stripes! -// if there are no write requests in progress we're good (stripes must be in sync) -// and... remember the last readable version during a write request -// and... postpone other write requests to the same stripe until the completion of previous ones +// to do so, we remember "last readable" version until a write request completes +// and we postpone other write requests to the same stripe until completion of previous ones // // sync: sync peers, get unstable versions from somewhere, stabilize them @@ -79,7 +79,7 @@ void osd_t::exec_primary_read(osd_op_t *cur_op) auto vo_it = pgs[pg_num].ver_override.find(oid); op_data->target_ver = vo_it != pgs[pg_num].ver_override.end() ? vo_it->second : UINT64_MAX; } - if (pgs[pg_num].pg_cursize == 3) + if (pgs[pg_num].pg_cursize == pgs[pg_num].pg_size) { // Fast happy-path submit_read_subops(pgs[pg_num].pg_minsize, pgs[pg_num].target_set.data(), cur_op); @@ -162,8 +162,9 @@ int osd_t::extend_missing_stripes(osd_read_stripe_t *stripes, osd_num_t *target_ { for (int role = 0; role < minsize; role++) { - if (stripes[role*2+1].end != 0 && target_set[role] == 0) + if (stripes[role].end != 0 && target_set[role] == 0) { + stripes[role].real_start = stripes[role].real_end = 0; // Stripe is missing. Extend read to other stripes. // We need at least pg_minsize stripes to recover the lost part. int exist = 0; @@ -212,6 +213,11 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd stripes[role].pos = buf_size; buf_size += stripes[role].real_end - stripes[role].real_start; } + else if (stripes[role].end != 0) + { + stripes[role].pos = buf_size; + buf_size += stripes[role].end - stripes[role].start; + } } osd_op_t *subops = new osd_op_t[n_subops]; cur_op->buf = memalign(MEM_ALIGNMENT, buf_size); @@ -227,7 +233,6 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd auto role_osd_num = target_set[role]; if (role_osd_num != 0) { - printf("Read subop from %lu: %lu / %lu\n", role_osd_num, op_data->oid.inode, op_data->oid.stripe | role); if (role_osd_num == this->osd_num) { subops[subop].bs_op = { diff --git a/test_osd.cpp b/test_osd.cpp index 15ffb3f39..8fa2b2c6f 100644 --- a/test_osd.cpp +++ b/test_osd.cpp @@ -22,7 +22,7 @@ uint64_t test_write(int connect_fd, uint64_t inode, uint64_t stripe, uint64_t ve void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_t len); -bool check_pattern(void *buf, uint64_t len, uint64_t pattern); +bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern); #define PATTERN0 0x8c4641acc762840e #define PATTERN1 0x70a549add9a2280a @@ -51,13 +51,13 @@ int main(int narg, char *args[]) // Cluster read connect_fd = connect_osd("127.0.0.1", 11203); data = test_primary_read(connect_fd, 2, 0, 128*1024); - if (data && check_pattern(data, 128*1024, PATTERN0)) + if (data && check_pattern(data, 0, 128*1024, PATTERN0)) printf("inode=2 0-128K OK\n"); if (data) free(data); data = test_primary_read(connect_fd, 2, 0, 256*1024); - if (data && check_pattern(data, 128*1024, PATTERN0) && - check_pattern(data+128*1024, 128*1024, PATTERN1)) + if (data && check_pattern(data, 0, 128*1024, PATTERN0) && + check_pattern(data, 128*1024, 128*1024, PATTERN1)) printf("inode=2 0-256K OK\n"); if (data) free(data); @@ -186,13 +186,13 @@ void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_ return data; } -bool check_pattern(void *buf, uint64_t len, uint64_t pattern) +bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern) { for (int i = 0; i < len/sizeof(uint64_t); i++) { - if (((uint64_t*)buf)[i] != pattern) + if (((uint64_t*)(buf+offset))[i] != pattern) { - printf("(result[%d] = %lu) != %lu\n", i, ((uint64_t*)buf)[i], pattern); + printf("(result + %lu bytes = %lx) != %lx\n", i*sizeof(uint64_t)+offset, ((uint64_t*)buf+offset)[i], pattern); return false; } }