diff --git a/mon/mon.js b/mon/mon.js index 02ced4b3..d15683c8 100644 --- a/mon/mon.js +++ b/mon/mon.js @@ -266,7 +266,7 @@ const etcd_tree = { : { primary: osd_num_t, state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"| - "degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"| + "degraded"|"has_corrupted"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"| "has_invalid"|"left_on_dead")[], } }, */ diff --git a/src/osd.cpp b/src/osd.cpp index ada003cf..06ac4d82 100644 --- a/src/osd.cpp +++ b/src/osd.cpp @@ -431,6 +431,10 @@ void osd_t::print_stats() recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i]; } } + if (corrupted_objects > 0) + { + printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects); + } if (incomplete_objects > 0) { printf("[OSD %lu] %lu object(s) incomplete\n", osd_num, incomplete_objects); diff --git a/src/osd.h b/src/osd.h index 461fef38..90a9f515 100644 --- a/src/osd.h +++ b/src/osd.h @@ -135,7 +135,7 @@ class osd_t std::set dirty_pgs; std::set dirty_osds; int copies_to_delete_after_sync_count = 0; - uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0; + uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0, corrupted_objects = 0; int peering_state = 0; std::map recovery_ops; bool recovery_last_degraded = true; diff --git a/src/osd_cluster.cpp b/src/osd_cluster.cpp index 0aeba06a..96b59e4d 100644 --- a/src/osd_cluster.cpp +++ b/src/osd_cluster.cpp @@ -337,6 +337,8 @@ void osd_t::report_statistics() pg_stats["misplaced_count"] = pg.misplaced_objects.size(); pg_stats["degraded_count"] = pg.degraded_objects.size(); pg_stats["incomplete_count"] = pg.incomplete_objects.size(); + if (pg.corrupted_count) + pg_stats["corrupted_count"] = pg.corrupted_count; pg_stats["write_osd_set"] = pg.cur_set; txn.push_back(json11::Json::object { { "request_put", json11::Json::object { diff --git a/src/osd_peering.cpp b/src/osd_peering.cpp index 6a021e32..80e92b4c 100644 --- a/src/osd_peering.cpp +++ b/src/osd_peering.cpp @@ -128,9 +128,11 @@ void osd_t::reset_pg(pg_t & pg) pg.state_dict.clear(); copies_to_delete_after_sync_count -= pg.copies_to_delete_after_sync.size(); pg.copies_to_delete_after_sync.clear(); + corrupted_objects -= pg.corrupted_count; incomplete_objects -= pg.incomplete_objects.size(); misplaced_objects -= pg.misplaced_objects.size(); degraded_objects -= pg.degraded_objects.size(); + pg.corrupted_count = 0; pg.incomplete_objects.clear(); pg.misplaced_objects.clear(); pg.degraded_objects.clear(); @@ -206,7 +208,7 @@ void osd_t::start_pg_peering(pg_t & pg) pg.cur_loc_set.push_back({ .role = (uint64_t)role, .osd_num = pg.cur_set[role], - .outdated = false, + .loc_bad = 0, }); } } diff --git a/src/osd_peering_pg.cpp b/src/osd_peering_pg.cpp index 249f268b..74e3234a 100644 --- a/src/osd_peering_pg.cpp +++ b/src/osd_peering_pg.cpp @@ -280,7 +280,7 @@ void pg_obj_state_check_t::finish_object() osd_set.push_back((pg_obj_loc_t){ .role = (list[i].oid.stripe & STRIPE_MASK), .osd_num = list[i].osd_num, - .outdated = false, + .loc_bad = 0, }); } } @@ -302,7 +302,7 @@ void pg_obj_state_check_t::finish_object() osd_set.push_back((pg_obj_loc_t){ .role = (list[i].oid.stripe & STRIPE_MASK), .osd_num = list[i].osd_num, - .outdated = true, + .loc_bad = LOC_OUTDATED, }); if (!(state & (OBJ_INCOMPLETE | OBJ_DEGRADED))) { @@ -330,7 +330,7 @@ void pg_obj_state_check_t::finish_object() { for (auto & o: osd_set) { - if (!o.outdated) + if (!(o.loc_bad & LOC_OUTDATED)) { read_target.push_back(o.osd_num); } @@ -350,7 +350,7 @@ void pg_obj_state_check_t::finish_object() } for (auto & o: osd_set) { - if (!o.outdated) + if (!(o.loc_bad & LOC_OUTDATED)) { read_target[o.role] = o.osd_num; } @@ -446,7 +446,8 @@ void pg_t::calc_object_states(int log_level) osd_set_desc += (osd_set_desc == "" ? "" : ", ")+ std::to_string(loc.osd_num)+ (st.replicated ? "" : "("+std::to_string(loc.role)+")")+ - (loc.outdated ? "(old)" : ""); + (loc.loc_bad & LOC_OUTDATED ? "(old)" : "")+ + (loc.loc_bad & LOC_CORRUPTED ? "(corrupted)" : ""); } printf("[PG %u/%u] %lu objects on OSD set %s\n", pool_id, pg_num, stp.second.object_count, osd_set_desc.c_str()); } @@ -456,7 +457,7 @@ void pg_t::calc_object_states(int log_level) void pg_t::print_state() { printf( - "[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num, + "[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num, (state & PG_STARTING) ? "starting" : "", (state & PG_OFFLINE) ? "offline" : "", (state & PG_PEERING) ? "peering" : "", @@ -465,6 +466,7 @@ void pg_t::print_state() (state & PG_REPEERING) ? "repeering" : "", (state & PG_STOPPING) ? "stopping" : "", (state & PG_DEGRADED) ? " + degraded" : "", + (state & PG_HAS_CORRUPTED) ? " + has_corrupted" : "", (state & PG_HAS_INCOMPLETE) ? " + has_incomplete" : "", (state & PG_HAS_DEGRADED) ? " + has_degraded" : "", (state & PG_HAS_MISPLACED) ? " + has_misplaced" : "", diff --git a/src/osd_peering_pg.h b/src/osd_peering_pg.h index d0e729ee..3fbd0c55 100644 --- a/src/osd_peering_pg.h +++ b/src/osd_peering_pg.h @@ -13,11 +13,14 @@ #define PG_EPOCH_BITS 48 +#define LOC_OUTDATED 1 +#define LOC_CORRUPTED 2 + struct pg_obj_loc_t { uint64_t role; osd_num_t osd_num; - bool outdated; + uint32_t loc_bad; // LOC_OUTDATED / LOC_CORRUPTED }; typedef std::vector pg_osd_set_t; @@ -106,6 +109,7 @@ struct pg_t // it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario // which is up to ~192 MB per 1 TB in the worst case scenario std::map state_dict; + uint64_t corrupted_count; btree::btree_map incomplete_objects, misplaced_objects, degraded_objects; std::map flush_actions; std::vector copies_to_delete_after_sync; @@ -122,9 +126,9 @@ struct pg_t inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b) { - return a.outdated < b.outdated || - a.outdated == b.outdated && a.role < b.role || - a.outdated == b.outdated && a.role == b.role && a.osd_num < b.osd_num; + return a.loc_bad < b.loc_bad || + a.loc_bad == b.loc_bad && a.role < b.role || + a.loc_bad == b.loc_bad && a.role == b.role && a.osd_num < b.osd_num; } inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b) diff --git a/src/pg_states.cpp b/src/pg_states.cpp index f480dbbf..f20bec0f 100644 --- a/src/pg_states.cpp +++ b/src/pg_states.cpp @@ -3,9 +3,9 @@ #include "pg_states.h" -const int pg_state_bit_count = 14; +const int pg_state_bit_count = 15; -const int pg_state_bits[14] = { +const int pg_state_bits[15] = { PG_STARTING, PG_PEERING, PG_INCOMPLETE, @@ -14,6 +14,7 @@ const int pg_state_bits[14] = { PG_STOPPING, PG_OFFLINE, PG_DEGRADED, + PG_HAS_CORRUPTED, PG_HAS_INCOMPLETE, PG_HAS_DEGRADED, PG_HAS_MISPLACED, @@ -22,7 +23,7 @@ const int pg_state_bits[14] = { PG_LEFT_ON_DEAD, }; -const char *pg_state_names[14] = { +const char *pg_state_names[15] = { "starting", "peering", "incomplete", @@ -31,6 +32,7 @@ const char *pg_state_names[14] = { "stopping", "offline", "degraded", + "has_corrupted", "has_incomplete", "has_degraded", "has_misplaced", diff --git a/src/pg_states.h b/src/pg_states.h index 605c817a..7cee257c 100644 --- a/src/pg_states.h +++ b/src/pg_states.h @@ -22,7 +22,8 @@ #define PG_HAS_MISPLACED (1<<10) #define PG_HAS_UNCLEAN (1<<11) #define PG_HAS_INVALID (1<<12) -#define PG_LEFT_ON_DEAD (1<<13) +#define PG_HAS_CORRUPTED (1<<13) +#define PG_LEFT_ON_DEAD (1<<14) // Lower bits that represent object role (EC 0/1/2... or always 0 with replication) // 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size @@ -32,6 +33,8 @@ #define OBJ_DEGRADED 0x02 #define OBJ_INCOMPLETE 0x04 #define OBJ_MISPLACED 0x08 +// OBJ_CORRUPTED is always set with one of OBJ_INCOMPLETE/OBJ_DEGRADED/OBJ_MISPLACED +#define OBJ_CORRUPTED 0x10 #define OBJ_NEEDS_STABLE 0x10000 #define OBJ_NEEDS_ROLLBACK 0x20000