forked from vitalif/vitastor
Add corrupted object state
parent
97720fa6b4
commit
0538a484b3
|
@ -266,7 +266,7 @@ const etcd_tree = {
|
||||||
<pg_id>: {
|
<pg_id>: {
|
||||||
primary: osd_num_t,
|
primary: osd_num_t,
|
||||||
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
|
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
|
||||||
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
|
"degraded"|"has_corrupted"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
|
||||||
"has_invalid"|"left_on_dead")[],
|
"has_invalid"|"left_on_dead")[],
|
||||||
}
|
}
|
||||||
}, */
|
}, */
|
||||||
|
|
|
@ -431,6 +431,10 @@ void osd_t::print_stats()
|
||||||
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
|
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (corrupted_objects > 0)
|
||||||
|
{
|
||||||
|
printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects);
|
||||||
|
}
|
||||||
if (incomplete_objects > 0)
|
if (incomplete_objects > 0)
|
||||||
{
|
{
|
||||||
printf("[OSD %lu] %lu object(s) incomplete\n", osd_num, incomplete_objects);
|
printf("[OSD %lu] %lu object(s) incomplete\n", osd_num, incomplete_objects);
|
||||||
|
|
|
@ -135,7 +135,7 @@ class osd_t
|
||||||
std::set<pool_pg_num_t> dirty_pgs;
|
std::set<pool_pg_num_t> dirty_pgs;
|
||||||
std::set<osd_num_t> dirty_osds;
|
std::set<osd_num_t> dirty_osds;
|
||||||
int copies_to_delete_after_sync_count = 0;
|
int copies_to_delete_after_sync_count = 0;
|
||||||
uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0;
|
uint64_t misplaced_objects = 0, degraded_objects = 0, incomplete_objects = 0, corrupted_objects = 0;
|
||||||
int peering_state = 0;
|
int peering_state = 0;
|
||||||
std::map<object_id, osd_recovery_op_t> recovery_ops;
|
std::map<object_id, osd_recovery_op_t> recovery_ops;
|
||||||
bool recovery_last_degraded = true;
|
bool recovery_last_degraded = true;
|
||||||
|
|
|
@ -337,6 +337,8 @@ void osd_t::report_statistics()
|
||||||
pg_stats["misplaced_count"] = pg.misplaced_objects.size();
|
pg_stats["misplaced_count"] = pg.misplaced_objects.size();
|
||||||
pg_stats["degraded_count"] = pg.degraded_objects.size();
|
pg_stats["degraded_count"] = pg.degraded_objects.size();
|
||||||
pg_stats["incomplete_count"] = pg.incomplete_objects.size();
|
pg_stats["incomplete_count"] = pg.incomplete_objects.size();
|
||||||
|
if (pg.corrupted_count)
|
||||||
|
pg_stats["corrupted_count"] = pg.corrupted_count;
|
||||||
pg_stats["write_osd_set"] = pg.cur_set;
|
pg_stats["write_osd_set"] = pg.cur_set;
|
||||||
txn.push_back(json11::Json::object {
|
txn.push_back(json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
|
|
|
@ -128,9 +128,11 @@ void osd_t::reset_pg(pg_t & pg)
|
||||||
pg.state_dict.clear();
|
pg.state_dict.clear();
|
||||||
copies_to_delete_after_sync_count -= pg.copies_to_delete_after_sync.size();
|
copies_to_delete_after_sync_count -= pg.copies_to_delete_after_sync.size();
|
||||||
pg.copies_to_delete_after_sync.clear();
|
pg.copies_to_delete_after_sync.clear();
|
||||||
|
corrupted_objects -= pg.corrupted_count;
|
||||||
incomplete_objects -= pg.incomplete_objects.size();
|
incomplete_objects -= pg.incomplete_objects.size();
|
||||||
misplaced_objects -= pg.misplaced_objects.size();
|
misplaced_objects -= pg.misplaced_objects.size();
|
||||||
degraded_objects -= pg.degraded_objects.size();
|
degraded_objects -= pg.degraded_objects.size();
|
||||||
|
pg.corrupted_count = 0;
|
||||||
pg.incomplete_objects.clear();
|
pg.incomplete_objects.clear();
|
||||||
pg.misplaced_objects.clear();
|
pg.misplaced_objects.clear();
|
||||||
pg.degraded_objects.clear();
|
pg.degraded_objects.clear();
|
||||||
|
@ -206,7 +208,7 @@ void osd_t::start_pg_peering(pg_t & pg)
|
||||||
pg.cur_loc_set.push_back({
|
pg.cur_loc_set.push_back({
|
||||||
.role = (uint64_t)role,
|
.role = (uint64_t)role,
|
||||||
.osd_num = pg.cur_set[role],
|
.osd_num = pg.cur_set[role],
|
||||||
.outdated = false,
|
.loc_bad = 0,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -280,7 +280,7 @@ void pg_obj_state_check_t::finish_object()
|
||||||
osd_set.push_back((pg_obj_loc_t){
|
osd_set.push_back((pg_obj_loc_t){
|
||||||
.role = (list[i].oid.stripe & STRIPE_MASK),
|
.role = (list[i].oid.stripe & STRIPE_MASK),
|
||||||
.osd_num = list[i].osd_num,
|
.osd_num = list[i].osd_num,
|
||||||
.outdated = false,
|
.loc_bad = 0,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -302,7 +302,7 @@ void pg_obj_state_check_t::finish_object()
|
||||||
osd_set.push_back((pg_obj_loc_t){
|
osd_set.push_back((pg_obj_loc_t){
|
||||||
.role = (list[i].oid.stripe & STRIPE_MASK),
|
.role = (list[i].oid.stripe & STRIPE_MASK),
|
||||||
.osd_num = list[i].osd_num,
|
.osd_num = list[i].osd_num,
|
||||||
.outdated = true,
|
.loc_bad = LOC_OUTDATED,
|
||||||
});
|
});
|
||||||
if (!(state & (OBJ_INCOMPLETE | OBJ_DEGRADED)))
|
if (!(state & (OBJ_INCOMPLETE | OBJ_DEGRADED)))
|
||||||
{
|
{
|
||||||
|
@ -330,7 +330,7 @@ void pg_obj_state_check_t::finish_object()
|
||||||
{
|
{
|
||||||
for (auto & o: osd_set)
|
for (auto & o: osd_set)
|
||||||
{
|
{
|
||||||
if (!o.outdated)
|
if (!(o.loc_bad & LOC_OUTDATED))
|
||||||
{
|
{
|
||||||
read_target.push_back(o.osd_num);
|
read_target.push_back(o.osd_num);
|
||||||
}
|
}
|
||||||
|
@ -350,7 +350,7 @@ void pg_obj_state_check_t::finish_object()
|
||||||
}
|
}
|
||||||
for (auto & o: osd_set)
|
for (auto & o: osd_set)
|
||||||
{
|
{
|
||||||
if (!o.outdated)
|
if (!(o.loc_bad & LOC_OUTDATED))
|
||||||
{
|
{
|
||||||
read_target[o.role] = o.osd_num;
|
read_target[o.role] = o.osd_num;
|
||||||
}
|
}
|
||||||
|
@ -446,7 +446,8 @@ void pg_t::calc_object_states(int log_level)
|
||||||
osd_set_desc += (osd_set_desc == "" ? "" : ", ")+
|
osd_set_desc += (osd_set_desc == "" ? "" : ", ")+
|
||||||
std::to_string(loc.osd_num)+
|
std::to_string(loc.osd_num)+
|
||||||
(st.replicated ? "" : "("+std::to_string(loc.role)+")")+
|
(st.replicated ? "" : "("+std::to_string(loc.role)+")")+
|
||||||
(loc.outdated ? "(old)" : "");
|
(loc.loc_bad & LOC_OUTDATED ? "(old)" : "")+
|
||||||
|
(loc.loc_bad & LOC_CORRUPTED ? "(corrupted)" : "");
|
||||||
}
|
}
|
||||||
printf("[PG %u/%u] %lu objects on OSD set %s\n", pool_id, pg_num, stp.second.object_count, osd_set_desc.c_str());
|
printf("[PG %u/%u] %lu objects on OSD set %s\n", pool_id, pg_num, stp.second.object_count, osd_set_desc.c_str());
|
||||||
}
|
}
|
||||||
|
@ -456,7 +457,7 @@ void pg_t::calc_object_states(int log_level)
|
||||||
void pg_t::print_state()
|
void pg_t::print_state()
|
||||||
{
|
{
|
||||||
printf(
|
printf(
|
||||||
"[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num,
|
"[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num,
|
||||||
(state & PG_STARTING) ? "starting" : "",
|
(state & PG_STARTING) ? "starting" : "",
|
||||||
(state & PG_OFFLINE) ? "offline" : "",
|
(state & PG_OFFLINE) ? "offline" : "",
|
||||||
(state & PG_PEERING) ? "peering" : "",
|
(state & PG_PEERING) ? "peering" : "",
|
||||||
|
@ -465,6 +466,7 @@ void pg_t::print_state()
|
||||||
(state & PG_REPEERING) ? "repeering" : "",
|
(state & PG_REPEERING) ? "repeering" : "",
|
||||||
(state & PG_STOPPING) ? "stopping" : "",
|
(state & PG_STOPPING) ? "stopping" : "",
|
||||||
(state & PG_DEGRADED) ? " + degraded" : "",
|
(state & PG_DEGRADED) ? " + degraded" : "",
|
||||||
|
(state & PG_HAS_CORRUPTED) ? " + has_corrupted" : "",
|
||||||
(state & PG_HAS_INCOMPLETE) ? " + has_incomplete" : "",
|
(state & PG_HAS_INCOMPLETE) ? " + has_incomplete" : "",
|
||||||
(state & PG_HAS_DEGRADED) ? " + has_degraded" : "",
|
(state & PG_HAS_DEGRADED) ? " + has_degraded" : "",
|
||||||
(state & PG_HAS_MISPLACED) ? " + has_misplaced" : "",
|
(state & PG_HAS_MISPLACED) ? " + has_misplaced" : "",
|
||||||
|
|
|
@ -13,11 +13,14 @@
|
||||||
|
|
||||||
#define PG_EPOCH_BITS 48
|
#define PG_EPOCH_BITS 48
|
||||||
|
|
||||||
|
#define LOC_OUTDATED 1
|
||||||
|
#define LOC_CORRUPTED 2
|
||||||
|
|
||||||
struct pg_obj_loc_t
|
struct pg_obj_loc_t
|
||||||
{
|
{
|
||||||
uint64_t role;
|
uint64_t role;
|
||||||
osd_num_t osd_num;
|
osd_num_t osd_num;
|
||||||
bool outdated;
|
uint32_t loc_bad; // LOC_OUTDATED / LOC_CORRUPTED
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::vector<pg_obj_loc_t> pg_osd_set_t;
|
typedef std::vector<pg_obj_loc_t> pg_osd_set_t;
|
||||||
|
@ -106,6 +109,7 @@ struct pg_t
|
||||||
// it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
|
// it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
|
||||||
// which is up to ~192 MB per 1 TB in the worst case scenario
|
// which is up to ~192 MB per 1 TB in the worst case scenario
|
||||||
std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict;
|
std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict;
|
||||||
|
uint64_t corrupted_count;
|
||||||
btree::btree_map<object_id, pg_osd_set_state_t*> incomplete_objects, misplaced_objects, degraded_objects;
|
btree::btree_map<object_id, pg_osd_set_state_t*> incomplete_objects, misplaced_objects, degraded_objects;
|
||||||
std::map<obj_piece_id_t, flush_action_t> flush_actions;
|
std::map<obj_piece_id_t, flush_action_t> flush_actions;
|
||||||
std::vector<obj_ver_osd_t> copies_to_delete_after_sync;
|
std::vector<obj_ver_osd_t> copies_to_delete_after_sync;
|
||||||
|
@ -122,9 +126,9 @@ struct pg_t
|
||||||
|
|
||||||
inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b)
|
inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b)
|
||||||
{
|
{
|
||||||
return a.outdated < b.outdated ||
|
return a.loc_bad < b.loc_bad ||
|
||||||
a.outdated == b.outdated && a.role < b.role ||
|
a.loc_bad == b.loc_bad && a.role < b.role ||
|
||||||
a.outdated == b.outdated && a.role == b.role && a.osd_num < b.osd_num;
|
a.loc_bad == b.loc_bad && a.role == b.role && a.osd_num < b.osd_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b)
|
inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b)
|
||||||
|
|
|
@ -3,9 +3,9 @@
|
||||||
|
|
||||||
#include "pg_states.h"
|
#include "pg_states.h"
|
||||||
|
|
||||||
const int pg_state_bit_count = 14;
|
const int pg_state_bit_count = 15;
|
||||||
|
|
||||||
const int pg_state_bits[14] = {
|
const int pg_state_bits[15] = {
|
||||||
PG_STARTING,
|
PG_STARTING,
|
||||||
PG_PEERING,
|
PG_PEERING,
|
||||||
PG_INCOMPLETE,
|
PG_INCOMPLETE,
|
||||||
|
@ -14,6 +14,7 @@ const int pg_state_bits[14] = {
|
||||||
PG_STOPPING,
|
PG_STOPPING,
|
||||||
PG_OFFLINE,
|
PG_OFFLINE,
|
||||||
PG_DEGRADED,
|
PG_DEGRADED,
|
||||||
|
PG_HAS_CORRUPTED,
|
||||||
PG_HAS_INCOMPLETE,
|
PG_HAS_INCOMPLETE,
|
||||||
PG_HAS_DEGRADED,
|
PG_HAS_DEGRADED,
|
||||||
PG_HAS_MISPLACED,
|
PG_HAS_MISPLACED,
|
||||||
|
@ -22,7 +23,7 @@ const int pg_state_bits[14] = {
|
||||||
PG_LEFT_ON_DEAD,
|
PG_LEFT_ON_DEAD,
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *pg_state_names[14] = {
|
const char *pg_state_names[15] = {
|
||||||
"starting",
|
"starting",
|
||||||
"peering",
|
"peering",
|
||||||
"incomplete",
|
"incomplete",
|
||||||
|
@ -31,6 +32,7 @@ const char *pg_state_names[14] = {
|
||||||
"stopping",
|
"stopping",
|
||||||
"offline",
|
"offline",
|
||||||
"degraded",
|
"degraded",
|
||||||
|
"has_corrupted",
|
||||||
"has_incomplete",
|
"has_incomplete",
|
||||||
"has_degraded",
|
"has_degraded",
|
||||||
"has_misplaced",
|
"has_misplaced",
|
||||||
|
|
|
@ -22,7 +22,8 @@
|
||||||
#define PG_HAS_MISPLACED (1<<10)
|
#define PG_HAS_MISPLACED (1<<10)
|
||||||
#define PG_HAS_UNCLEAN (1<<11)
|
#define PG_HAS_UNCLEAN (1<<11)
|
||||||
#define PG_HAS_INVALID (1<<12)
|
#define PG_HAS_INVALID (1<<12)
|
||||||
#define PG_LEFT_ON_DEAD (1<<13)
|
#define PG_HAS_CORRUPTED (1<<13)
|
||||||
|
#define PG_LEFT_ON_DEAD (1<<14)
|
||||||
|
|
||||||
// Lower bits that represent object role (EC 0/1/2... or always 0 with replication)
|
// Lower bits that represent object role (EC 0/1/2... or always 0 with replication)
|
||||||
// 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size
|
// 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size
|
||||||
|
@ -32,6 +33,8 @@
|
||||||
#define OBJ_DEGRADED 0x02
|
#define OBJ_DEGRADED 0x02
|
||||||
#define OBJ_INCOMPLETE 0x04
|
#define OBJ_INCOMPLETE 0x04
|
||||||
#define OBJ_MISPLACED 0x08
|
#define OBJ_MISPLACED 0x08
|
||||||
|
// OBJ_CORRUPTED is always set with one of OBJ_INCOMPLETE/OBJ_DEGRADED/OBJ_MISPLACED
|
||||||
|
#define OBJ_CORRUPTED 0x10
|
||||||
#define OBJ_NEEDS_STABLE 0x10000
|
#define OBJ_NEEDS_STABLE 0x10000
|
||||||
#define OBJ_NEEDS_ROLLBACK 0x20000
|
#define OBJ_NEEDS_ROLLBACK 0x20000
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue