Add corrupted object state

test-double-alloc
Vitaliy Filippov 2023-01-21 01:35:31 +03:00
parent 97720fa6b4
commit 0538a484b3
9 changed files with 36 additions and 17 deletions

View File

@ -266,7 +266,7 @@ const etcd_tree = {
<pg_id>: { <pg_id>: {
primary: osd_num_t, primary: osd_num_t,
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"| state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"| "degraded"|"has_corrupted"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"left_on_dead")[], "has_invalid"|"left_on_dead")[],
} }
}, */ }, */

View File

@ -431,6 +431,10 @@ void osd_t::print_stats()
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i]; recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
} }
} }
if (corrupted_objects > 0)
{
printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects);
}
if (incomplete_objects > 0) if (incomplete_objects > 0)
{ {
printf("[OSD %lu] %lu object(s) incomplete\n", osd_num, incomplete_objects); printf("[OSD %lu] %lu object(s) incomplete\n", osd_num, incomplete_objects);

View File

@ -135,7 +135,7 @@ class osd_t
std::set<pool_pg_num_t> dirty_pgs; std::set<pool_pg_num_t> dirty_pgs;
std::set<osd_num_t> dirty_osds; std::set<osd_num_t> dirty_osds;
int copies_to_delete_after_sync_count = 0; int copies_to_delete_after_sync_count = 0;
uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0; uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0, corrupted_objects = 0;
int peering_state = 0; int peering_state = 0;
std::map<object_id, osd_recovery_op_t> recovery_ops; std::map<object_id, osd_recovery_op_t> recovery_ops;
bool recovery_last_degraded = true; bool recovery_last_degraded = true;

View File

@ -337,6 +337,8 @@ void osd_t::report_statistics()
pg_stats["misplaced_count"] = pg.misplaced_objects.size(); pg_stats["misplaced_count"] = pg.misplaced_objects.size();
pg_stats["degraded_count"] = pg.degraded_objects.size(); pg_stats["degraded_count"] = pg.degraded_objects.size();
pg_stats["incomplete_count"] = pg.incomplete_objects.size(); pg_stats["incomplete_count"] = pg.incomplete_objects.size();
if (pg.corrupted_count)
pg_stats["corrupted_count"] = pg.corrupted_count;
pg_stats["write_osd_set"] = pg.cur_set; pg_stats["write_osd_set"] = pg.cur_set;
txn.push_back(json11::Json::object { txn.push_back(json11::Json::object {
{ "request_put", json11::Json::object { { "request_put", json11::Json::object {

View File

@ -128,9 +128,11 @@ void osd_t::reset_pg(pg_t & pg)
pg.state_dict.clear(); pg.state_dict.clear();
copies_to_delete_after_sync_count -= pg.copies_to_delete_after_sync.size(); copies_to_delete_after_sync_count -= pg.copies_to_delete_after_sync.size();
pg.copies_to_delete_after_sync.clear(); pg.copies_to_delete_after_sync.clear();
corrupted_objects -= pg.corrupted_count;
incomplete_objects -= pg.incomplete_objects.size(); incomplete_objects -= pg.incomplete_objects.size();
misplaced_objects -= pg.misplaced_objects.size(); misplaced_objects -= pg.misplaced_objects.size();
degraded_objects -= pg.degraded_objects.size(); degraded_objects -= pg.degraded_objects.size();
pg.corrupted_count = 0;
pg.incomplete_objects.clear(); pg.incomplete_objects.clear();
pg.misplaced_objects.clear(); pg.misplaced_objects.clear();
pg.degraded_objects.clear(); pg.degraded_objects.clear();
@ -206,7 +208,7 @@ void osd_t::start_pg_peering(pg_t & pg)
pg.cur_loc_set.push_back({ pg.cur_loc_set.push_back({
.role = (uint64_t)role, .role = (uint64_t)role,
.osd_num = pg.cur_set[role], .osd_num = pg.cur_set[role],
.outdated = false, .loc_bad = 0,
}); });
} }
} }

View File

@ -280,7 +280,7 @@ void pg_obj_state_check_t::finish_object()
osd_set.push_back((pg_obj_loc_t){ osd_set.push_back((pg_obj_loc_t){
.role = (list[i].oid.stripe & STRIPE_MASK), .role = (list[i].oid.stripe & STRIPE_MASK),
.osd_num = list[i].osd_num, .osd_num = list[i].osd_num,
.outdated = false, .loc_bad = 0,
}); });
} }
} }
@ -302,7 +302,7 @@ void pg_obj_state_check_t::finish_object()
osd_set.push_back((pg_obj_loc_t){ osd_set.push_back((pg_obj_loc_t){
.role = (list[i].oid.stripe & STRIPE_MASK), .role = (list[i].oid.stripe & STRIPE_MASK),
.osd_num = list[i].osd_num, .osd_num = list[i].osd_num,
.outdated = true, .loc_bad = LOC_OUTDATED,
}); });
if (!(state & (OBJ_INCOMPLETE | OBJ_DEGRADED))) if (!(state & (OBJ_INCOMPLETE | OBJ_DEGRADED)))
{ {
@ -330,7 +330,7 @@ void pg_obj_state_check_t::finish_object()
{ {
for (auto & o: osd_set) for (auto & o: osd_set)
{ {
if (!o.outdated) if (!(o.loc_bad & LOC_OUTDATED))
{ {
read_target.push_back(o.osd_num); read_target.push_back(o.osd_num);
} }
@ -350,7 +350,7 @@ void pg_obj_state_check_t::finish_object()
} }
for (auto & o: osd_set) for (auto & o: osd_set)
{ {
if (!o.outdated) if (!(o.loc_bad & LOC_OUTDATED))
{ {
read_target[o.role] = o.osd_num; read_target[o.role] = o.osd_num;
} }
@ -446,7 +446,8 @@ void pg_t::calc_object_states(int log_level)
osd_set_desc += (osd_set_desc == "" ? "" : ", ")+ osd_set_desc += (osd_set_desc == "" ? "" : ", ")+
std::to_string(loc.osd_num)+ std::to_string(loc.osd_num)+
(st.replicated ? "" : "("+std::to_string(loc.role)+")")+ (st.replicated ? "" : "("+std::to_string(loc.role)+")")+
(loc.outdated ? "(old)" : ""); (loc.loc_bad & LOC_OUTDATED ? "(old)" : "")+
(loc.loc_bad & LOC_CORRUPTED ? "(corrupted)" : "");
} }
printf("[PG %u/%u] %lu objects on OSD set %s\n", pool_id, pg_num, stp.second.object_count, osd_set_desc.c_str()); printf("[PG %u/%u] %lu objects on OSD set %s\n", pool_id, pg_num, stp.second.object_count, osd_set_desc.c_str());
} }
@ -456,7 +457,7 @@ void pg_t::calc_object_states(int log_level)
void pg_t::print_state() void pg_t::print_state()
{ {
printf( printf(
"[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num, "[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num,
(state & PG_STARTING) ? "starting" : "", (state & PG_STARTING) ? "starting" : "",
(state & PG_OFFLINE) ? "offline" : "", (state & PG_OFFLINE) ? "offline" : "",
(state & PG_PEERING) ? "peering" : "", (state & PG_PEERING) ? "peering" : "",
@ -465,6 +466,7 @@ void pg_t::print_state()
(state & PG_REPEERING) ? "repeering" : "", (state & PG_REPEERING) ? "repeering" : "",
(state & PG_STOPPING) ? "stopping" : "", (state & PG_STOPPING) ? "stopping" : "",
(state & PG_DEGRADED) ? " + degraded" : "", (state & PG_DEGRADED) ? " + degraded" : "",
(state & PG_HAS_CORRUPTED) ? " + has_corrupted" : "",
(state & PG_HAS_INCOMPLETE) ? " + has_incomplete" : "", (state & PG_HAS_INCOMPLETE) ? " + has_incomplete" : "",
(state & PG_HAS_DEGRADED) ? " + has_degraded" : "", (state & PG_HAS_DEGRADED) ? " + has_degraded" : "",
(state & PG_HAS_MISPLACED) ? " + has_misplaced" : "", (state & PG_HAS_MISPLACED) ? " + has_misplaced" : "",

View File

@ -13,11 +13,14 @@
#define PG_EPOCH_BITS 48 #define PG_EPOCH_BITS 48
#define LOC_OUTDATED 1
#define LOC_CORRUPTED 2
struct pg_obj_loc_t struct pg_obj_loc_t
{ {
uint64_t role; uint64_t role;
osd_num_t osd_num; osd_num_t osd_num;
bool outdated; uint32_t loc_bad; // LOC_OUTDATED / LOC_CORRUPTED
}; };
typedef std::vector<pg_obj_loc_t> pg_osd_set_t; typedef std::vector<pg_obj_loc_t> pg_osd_set_t;
@ -106,6 +109,7 @@ struct pg_t
// it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario // it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
// which is up to ~192 MB per 1 TB in the worst case scenario // which is up to ~192 MB per 1 TB in the worst case scenario
std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict; std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict;
uint64_t corrupted_count;
btree::btree_map<object_id, pg_osd_set_state_t*> incomplete_objects, misplaced_objects, degraded_objects; btree::btree_map<object_id, pg_osd_set_state_t*> incomplete_objects, misplaced_objects, degraded_objects;
std::map<obj_piece_id_t, flush_action_t> flush_actions; std::map<obj_piece_id_t, flush_action_t> flush_actions;
std::vector<obj_ver_osd_t> copies_to_delete_after_sync; std::vector<obj_ver_osd_t> copies_to_delete_after_sync;
@ -122,9 +126,9 @@ struct pg_t
inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b) inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b)
{ {
return a.outdated < b.outdated || return a.loc_bad < b.loc_bad ||
a.outdated == b.outdated && a.role < b.role || a.loc_bad == b.loc_bad && a.role < b.role ||
a.outdated == b.outdated && a.role == b.role && a.osd_num < b.osd_num; a.loc_bad == b.loc_bad && a.role == b.role && a.osd_num < b.osd_num;
} }
inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b) inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b)

View File

@ -3,9 +3,9 @@
#include "pg_states.h" #include "pg_states.h"
const int pg_state_bit_count = 14; const int pg_state_bit_count = 15;
const int pg_state_bits[14] = { const int pg_state_bits[15] = {
PG_STARTING, PG_STARTING,
PG_PEERING, PG_PEERING,
PG_INCOMPLETE, PG_INCOMPLETE,
@ -14,6 +14,7 @@ const int pg_state_bits[14] = {
PG_STOPPING, PG_STOPPING,
PG_OFFLINE, PG_OFFLINE,
PG_DEGRADED, PG_DEGRADED,
PG_HAS_CORRUPTED,
PG_HAS_INCOMPLETE, PG_HAS_INCOMPLETE,
PG_HAS_DEGRADED, PG_HAS_DEGRADED,
PG_HAS_MISPLACED, PG_HAS_MISPLACED,
@ -22,7 +23,7 @@ const int pg_state_bits[14] = {
PG_LEFT_ON_DEAD, PG_LEFT_ON_DEAD,
}; };
const char *pg_state_names[14] = { const char *pg_state_names[15] = {
"starting", "starting",
"peering", "peering",
"incomplete", "incomplete",
@ -31,6 +32,7 @@ const char *pg_state_names[14] = {
"stopping", "stopping",
"offline", "offline",
"degraded", "degraded",
"has_corrupted",
"has_incomplete", "has_incomplete",
"has_degraded", "has_degraded",
"has_misplaced", "has_misplaced",

View File

@ -22,7 +22,8 @@
#define PG_HAS_MISPLACED (1<<10) #define PG_HAS_MISPLACED (1<<10)
#define PG_HAS_UNCLEAN (1<<11) #define PG_HAS_UNCLEAN (1<<11)
#define PG_HAS_INVALID (1<<12) #define PG_HAS_INVALID (1<<12)
#define PG_LEFT_ON_DEAD (1<<13) #define PG_HAS_CORRUPTED (1<<13)
#define PG_LEFT_ON_DEAD (1<<14)
// Lower bits that represent object role (EC 0/1/2... or always 0 with replication) // Lower bits that represent object role (EC 0/1/2... or always 0 with replication)
// 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size // 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size
@ -32,6 +33,8 @@
#define OBJ_DEGRADED 0x02 #define OBJ_DEGRADED 0x02
#define OBJ_INCOMPLETE 0x04 #define OBJ_INCOMPLETE 0x04
#define OBJ_MISPLACED 0x08 #define OBJ_MISPLACED 0x08
// OBJ_CORRUPTED is always set with one of OBJ_INCOMPLETE/OBJ_DEGRADED/OBJ_MISPLACED
#define OBJ_CORRUPTED 0x10
#define OBJ_NEEDS_STABLE 0x10000 #define OBJ_NEEDS_STABLE 0x10000
#define OBJ_NEEDS_ROLLBACK 0x20000 #define OBJ_NEEDS_ROLLBACK 0x20000