From e19d9fde5f3ae50949356c8c16834926e4697406 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 5 Mar 2020 20:58:52 +0300 Subject: [PATCH] Fix peering_pg, begin tests --- osd_peering_pg.cpp | 191 ++++++++++++++++++++++------------------ osd_peering_pg.h | 1 + osd_peering_pg_test.cpp | 53 +++++++++++ test.cpp | 39 -------- 4 files changed, 161 insertions(+), 123 deletions(-) create mode 100644 osd_peering_pg_test.cpp diff --git a/osd_peering_pg.cpp b/osd_peering_pg.cpp index 4babecf0..4d9dcc47 100644 --- a/osd_peering_pg.cpp +++ b/osd_peering_pg.cpp @@ -5,43 +5,61 @@ void pg_t::remember_object(pg_obj_state_check_t &st, std::vector & auto & pg = *this; // Remember the decision uint64_t state = 0; - if (st.n_roles == pg.pg_cursize) + if (st.target_ver == 0) { - if (st.n_matched == pg.pg_cursize) - state = OBJ_CLEAN; - else - { - state = OBJ_MISPLACED; - pg.state = pg.state | PG_HAS_MISPLACED; - } - } - else if (st.n_roles < pg.pg_minsize) - { - printf("Object is unfound: inode=%lu stripe=%lu version=%lu/%lu\n", st.oid.inode, st.oid.stripe, st.target_ver, st.max_ver); - state = OBJ_INCOMPLETE; - pg.state = pg.state | PG_HAS_UNFOUND; + st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0; + st.ver_start = st.ver_end = st.obj_end; + state = OBJ_CLEAN; } else { - printf("Object is degraded: inode=%lu stripe=%lu version=%lu/%lu\n", st.oid.inode, st.oid.stripe, st.target_ver, st.max_ver); - state = OBJ_DEGRADED; - pg.state = pg.state | PG_HAS_DEGRADED; - } - if (st.n_copies > pg.pg_size) - { - state |= OBJ_OVERCOPIED; - pg.state = pg.state | PG_HAS_UNCLEAN; - } - if (st.n_stable < st.n_copies) - { - state |= OBJ_NEEDS_STABLE; - pg.state = pg.state | PG_HAS_UNCLEAN; + if (st.n_roles == pg.pg_cursize) + { + if (st.n_matched == pg.pg_cursize) + { + state = OBJ_CLEAN; + } + else + { + state = OBJ_MISPLACED; + pg.state = pg.state | PG_HAS_MISPLACED; + } + } + else if (st.n_roles < pg.pg_minsize) + { + printf("Object is unfound: inode=%lu stripe=%lu version=%lu/%lu\n", st.oid.inode, st.oid.stripe, st.target_ver, st.max_ver); + for (int i = st.ver_start; i < st.ver_end; i++) + { + printf("Present on: osd %lu, role %ld%s\n", all[i].osd_num, (all[i].oid.stripe & STRIPE_MASK), all[i].is_stable ? " (stable)" : ""); + } + state = OBJ_INCOMPLETE; + pg.state = pg.state | PG_HAS_UNFOUND; + } + else + { + printf("Object is degraded: inode=%lu stripe=%lu version=%lu/%lu\n", st.oid.inode, st.oid.stripe, st.target_ver, st.max_ver); + for (int i = st.ver_start; i < st.ver_end; i++) + { + printf("Present on: osd %lu, role %ld%s\n", all[i].osd_num, (all[i].oid.stripe & STRIPE_MASK), all[i].is_stable ? " (stable)" : ""); + } + state = OBJ_DEGRADED; + pg.state = pg.state | PG_HAS_DEGRADED; + } + if (st.n_copies > pg.pg_size) + { + state |= OBJ_OVERCOPIED; + pg.state = pg.state | PG_HAS_UNCLEAN; + } + if (st.n_stable < st.n_copies) + { + state |= OBJ_NEEDS_STABLE; + pg.state = pg.state | PG_HAS_UNCLEAN; + } } if (st.target_ver < st.max_ver || st.has_old_unstable) { state |= OBJ_NEEDS_ROLLBACK; pg.state = pg.state | PG_HAS_UNCLEAN; - pg.ver_override[st.oid] = st.target_ver; } if (st.is_buggy) { @@ -49,7 +67,12 @@ void pg_t::remember_object(pg_obj_state_check_t &st, std::vector & // FIXME: bring pg offline throw std::runtime_error("buggy object state"); } - if (state != OBJ_CLEAN) + pg.total_count++; + if (state == OBJ_CLEAN) + { + pg.clean_count++; + } + else { st.osd_set.clear(); for (int i = st.ver_start; i < st.ver_end; i++) @@ -131,9 +154,6 @@ void pg_t::remember_object(pg_obj_state_check_t &st, std::vector & } } } - else - pg.clean_count++; - pg.total_count++; } // FIXME: Write at least some tests for this function @@ -180,72 +200,75 @@ void pg_t::calc_object_states() if (st.oid.inode != 0) { // Remember object state - st.obj_end = st.ver_end = i; - remember_object(st, all); - } - st.obj_start = st.ver_start = i; - st.oid = { .inode = all[i].oid.inode, .stripe = all[i].oid.stripe & ~STRIPE_MASK }; - st.max_ver = st.target_ver = all[i].version; - st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0; - st.is_buggy = st.has_old_unstable = false; - } - else if (st.target_ver != all[i].version) - { - if (st.n_stable > 0 || st.n_roles >= pg.pg_minsize) - { - // Last processed version is either recoverable or stable, choose it as target and skip previous versions - st.ver_end = i; - i++; - while (i < all.size() && st.oid.inode == all[i].oid.inode && - st.oid.stripe == (all[i].oid.stripe & ~STRIPE_MASK)) + if (!st.target_ver && (st.n_stable > 0 || st.n_roles >= pg.pg_minsize)) { - if (!all[i].is_stable) - { - st.has_old_unstable = true; - } - i++; + // Version is either stable or recoverable + st.target_ver = st.last_ver; + st.ver_end = i; } st.obj_end = i; - i--; - continue; + remember_object(st, all); + } + st.obj_start = i; + st.oid = { .inode = all[i].oid.inode, .stripe = all[i].oid.stripe & ~STRIPE_MASK }; + st.last_ver = st.max_ver = all[i].version; + st.target_ver = 0; + st.ver_start = i; + st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0; + } + if (!st.target_ver && st.last_ver != all[i].version && (st.n_stable > 0 || st.n_roles >= pg.pg_minsize)) + { + // Version is either stable or recoverable + st.target_ver = st.last_ver; + st.ver_end = i; + } + if (!st.target_ver) + { + if (st.last_ver != all[i].version) + { + st.ver_start = i; + st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0; + st.last_ver = all[i].version; + } + replica = (all[i].oid.stripe & STRIPE_MASK); + st.n_copies++; + if (replica >= pg.pg_size) + { + // FIXME In the future, check it against the PG epoch number to handle replication factor/scheme changes + st.is_buggy = true; } else { - // Last processed version is unstable and unrecoverable - // We'll know that because target_ver < max_ver - st.ver_start = i; - st.target_ver = all[i].version; - st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0; + if (all[i].is_stable) + { + st.n_stable++; + } + if (pg.cur_set[replica] == all[i].osd_num) + { + st.n_matched++; + } + if (!(st.has_roles & (1 << replica))) + { + st.has_roles = st.has_roles | (1 << replica); + st.n_roles++; + } } } - replica = (all[i].oid.stripe & STRIPE_MASK); - st.n_copies++; - if (replica >= pg.pg_size) + else if (!all[i].is_stable) { - // FIXME In the future, check it against the PG epoch number to handle replication factor/scheme changes - st.is_buggy = true; - } - else - { - if (all[i].is_stable) - { - st.n_stable++; - } - if (pg.cur_set[replica] == all[i].osd_num) - { - st.n_matched++; - } - if (!(st.has_roles & (1 << replica))) - { - st.has_roles = st.has_roles | (1 << replica); - st.n_roles++; - } + st.has_old_unstable = true; } } if (st.oid.inode != 0) { // Remember object state - st.obj_end = st.ver_end = all.size(); + if (!st.target_ver && (st.n_stable > 0 || st.n_roles >= pg.pg_minsize)) + { + // Version is either stable or recoverable + st.target_ver = st.last_ver; + st.ver_end = all.size(); + } + st.obj_end = all.size(); remember_object(st, all); } if (pg.pg_cursize < pg.pg_size) diff --git a/osd_peering_pg.h b/osd_peering_pg.h index 571d9edc..344d4ec6 100644 --- a/osd_peering_pg.h +++ b/osd_peering_pg.h @@ -75,6 +75,7 @@ struct pg_obj_state_check_t int obj_start = 0, obj_end = 0, ver_start = 0, ver_end = 0; object_id oid = { 0 }; uint64_t max_ver = 0; + uint64_t last_ver = 0; uint64_t target_ver = 0; uint64_t n_copies = 0, has_roles = 0, n_roles = 0, n_stable = 0, n_matched = 0; bool is_buggy = false, has_old_unstable = false; diff --git a/osd_peering_pg_test.cpp b/osd_peering_pg_test.cpp new file mode 100644 index 00000000..f4b890c8 --- /dev/null +++ b/osd_peering_pg_test.cpp @@ -0,0 +1,53 @@ +#define _LARGEFILE64_SOURCE + +#include "osd_peering_pg.h" + +/** + * TODO tests for object & pg state calculation. + * + * 1) pg=1,2,3. objects: + * v1=1s,2s,3s -> clean + * v1=1s,2s,3 v2=1s,2s,_ -> degraded + needs_rollback + * v1=1s,2s,_ -> degraded + * v1=1s,2s,3s v2=1,6,_ -> degraded + needs_stabilize + * v1=2s,1s,3s -> misplaced + * v1=4,5,6 -> misplaced + needs_stabilize + * v1=1s,2s,6s -> misplaced + * 2) ... + */ +int main(int argc, char *argv[]) +{ + pg_t pg = { + .state = PG_PEERING, + .pg_num = 1, + .target_set = { 1, 2, 3 }, + .cur_set = { 1, 2, 3 }, + .peering_state = new pg_peering_state_t(), + }; + for (uint64_t osd_num = 1; osd_num <= 3; osd_num++) + { + pg_list_result_t r = { + .buf = (obj_ver_id*)malloc(sizeof(obj_ver_id) * 1024*1024*8), + .total_count = 1024*1024*8, + .stable_count = (uint64_t)(1024*1024*8 - (osd_num == 1 ? 10 : 0)), + }; + for (uint64_t i = 0; i < r.total_count; i++) + { + r.buf[i] = { + .oid = { + .inode = 1, + .stripe = (i << STRIPE_SHIFT) | (osd_num-1), + }, + .version = (uint64_t)(osd_num == 1 && i >= r.total_count - 10 ? 2 : 1), + }; + } + pg.peering_state->list_results[osd_num] = r; + } + pg.calc_object_states(); + printf("deviation variants=%ld clean=%lu\n", pg.state_dict.size(), pg.clean_count); + for (auto it: pg.state_dict) + { + printf("dev: state=%lx\n", it.second.state); + } + return 0; +} diff --git a/test.cpp b/test.cpp index 92b3fa5c..df31c5ec 100644 --- a/test.cpp +++ b/test.cpp @@ -26,7 +26,6 @@ #include "blockstore.h" #include "blockstore_impl.h" -#include "osd_peering_pg.h" //#include "cpp-btree/btree_map.h" static int setup_context(unsigned entries, struct io_uring *ring) @@ -337,44 +336,6 @@ int main04(int argc, char *argv[]) return 0; } -int main05(int argc, char *argv[]) -{ - // FIXME extract this into a test - pg_t pg = { - .state = PG_PEERING, - .pg_num = 1, - .target_set = { 1, 2, 3 }, - .cur_set = { 1, 2, 3 }, - .peering_state = new pg_peering_state_t(), - }; - for (uint64_t osd_num = 1; osd_num <= 3; osd_num++) - { - pg_list_result_t r = { - .buf = (obj_ver_id*)malloc(sizeof(obj_ver_id) * 1024*1024*8), - .total_count = 1024*1024*8, - .stable_count = (uint64_t)(1024*1024*8 - (osd_num == 1 ? 10 : 0)), - }; - for (uint64_t i = 0; i < r.total_count; i++) - { - r.buf[i] = { - .oid = { - .inode = 1, - .stripe = (i << STRIPE_SHIFT) | (osd_num-1), - }, - .version = (uint64_t)(osd_num == 1 && i >= r.total_count - 10 ? 2 : 1), - }; - } - pg.peering_state->list_results[osd_num] = r; - } - pg.calc_object_states(); - printf("deviation variants=%ld clean=%lu\n", pg.state_dict.size(), pg.clean_count); - for (auto it: pg.state_dict) - { - printf("dev: state=%lx\n", it.second.state); - } - return 0; -} - int main(int argc, char *argv[]) { timeval fill_start, fill_end, filter_end;