2020-09-17 23:02:40 +03:00
|
|
|
// Copyright (c) Vitaliy Filippov, 2019+
|
2021-02-06 01:26:07 +03:00
|
|
|
// License: VNPL-1.1 (see README.md for details)
|
2020-09-17 23:02:40 +03:00
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
#include <algorithm>
|
|
|
|
|
2020-03-04 17:12:27 +03:00
|
|
|
#include "cpp-btree/btree_map.h"
|
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
#include "object_id.h"
|
2020-02-03 12:35:02 +03:00
|
|
|
#include "osd_ops.h"
|
2020-05-21 21:00:54 +03:00
|
|
|
#include "pg_states.h"
|
2020-04-14 14:37:50 +03:00
|
|
|
|
2020-07-02 00:41:28 +03:00
|
|
|
#define PG_EPOCH_BITS 48
|
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
struct pg_obj_loc_t
|
|
|
|
{
|
|
|
|
uint64_t role;
|
2020-02-03 12:35:02 +03:00
|
|
|
osd_num_t osd_num;
|
2023-04-10 01:05:41 +03:00
|
|
|
uint32_t loc_bad; // LOC_OUTDATED / LOC_CORRUPTED / LOC_INCONSISTENT
|
2020-01-24 02:23:27 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef std::vector<pg_obj_loc_t> pg_osd_set_t;
|
|
|
|
|
|
|
|
struct pg_osd_set_state_t
|
|
|
|
{
|
2020-02-11 02:30:46 +03:00
|
|
|
// (role -> osd_num_t) map, as in pg.target_set and pg.cur_set
|
2020-02-03 12:35:02 +03:00
|
|
|
std::vector<osd_num_t> read_target;
|
2020-02-11 02:30:46 +03:00
|
|
|
// full OSD set including additional OSDs where the object is misplaced
|
2020-01-24 02:23:27 +03:00
|
|
|
pg_osd_set_t osd_set;
|
|
|
|
uint64_t state = 0;
|
|
|
|
uint64_t object_count = 0;
|
2023-01-24 02:26:52 +03:00
|
|
|
uint64_t ref_count = 0;
|
2020-01-24 02:23:27 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct pg_list_result_t
|
|
|
|
{
|
2020-02-24 01:01:34 +03:00
|
|
|
obj_ver_id *buf = NULL;
|
2020-01-24 02:23:27 +03:00
|
|
|
uint64_t total_count;
|
|
|
|
uint64_t stable_count;
|
|
|
|
};
|
|
|
|
|
2020-02-11 02:30:46 +03:00
|
|
|
struct osd_op_t;
|
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
struct pg_peering_state_t
|
|
|
|
{
|
|
|
|
// osd_num -> list result
|
2020-10-18 01:41:06 +03:00
|
|
|
std::map<osd_num_t, osd_op_t*> list_ops;
|
|
|
|
std::map<osd_num_t, pg_list_result_t> list_results;
|
2020-09-04 10:54:21 +03:00
|
|
|
pool_id_t pool_id = 0;
|
2020-04-19 00:20:18 +03:00
|
|
|
pg_num_t pg_num = 0;
|
2020-01-24 02:23:27 +03:00
|
|
|
};
|
|
|
|
|
2020-01-27 01:34:24 +03:00
|
|
|
struct obj_piece_id_t
|
|
|
|
{
|
|
|
|
object_id oid;
|
|
|
|
uint64_t osd_num;
|
|
|
|
};
|
|
|
|
|
2021-03-15 02:26:39 +03:00
|
|
|
struct obj_ver_osd_t
|
|
|
|
{
|
|
|
|
uint64_t osd_num;
|
|
|
|
object_id oid;
|
|
|
|
uint64_t version;
|
|
|
|
};
|
|
|
|
|
2020-03-13 21:41:54 +03:00
|
|
|
struct flush_action_t
|
2020-01-27 01:34:24 +03:00
|
|
|
{
|
|
|
|
bool rollback = false, make_stable = false;
|
|
|
|
uint64_t stable_to = 0, rollback_to = 0;
|
2020-03-13 21:41:54 +03:00
|
|
|
bool submitted = false;
|
2020-01-27 01:34:24 +03:00
|
|
|
};
|
|
|
|
|
2020-03-15 18:39:31 +03:00
|
|
|
struct pg_flush_batch_t
|
|
|
|
{
|
|
|
|
std::map<osd_num_t, std::vector<obj_ver_id>> rollback_lists;
|
|
|
|
std::map<osd_num_t, std::vector<obj_ver_id>> stable_lists;
|
|
|
|
int flush_ops = 0, flush_done = 0;
|
|
|
|
int flush_objects = 0;
|
|
|
|
};
|
2020-03-13 21:41:54 +03:00
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
struct pg_t
|
|
|
|
{
|
2020-04-27 14:32:59 +03:00
|
|
|
int state = 0;
|
2020-09-04 10:54:21 +03:00
|
|
|
uint64_t scheme = 0;
|
2021-03-07 14:29:29 +03:00
|
|
|
uint64_t pg_cursize = 0, pg_size = 0, pg_minsize = 0, pg_data_size = 0;
|
2020-09-04 10:54:21 +03:00
|
|
|
pool_id_t pool_id = 0;
|
|
|
|
pg_num_t pg_num = 0;
|
2020-02-26 18:32:00 +03:00
|
|
|
uint64_t clean_count = 0, total_count = 0;
|
2020-07-02 00:41:28 +03:00
|
|
|
// epoch number - should increase with each non-clean activation of the PG
|
|
|
|
uint64_t epoch = 0, reported_epoch = 0;
|
2020-05-04 01:32:24 +03:00
|
|
|
// target history and all potential peers
|
2020-04-19 00:20:18 +03:00
|
|
|
std::vector<std::vector<osd_num_t>> target_history;
|
2020-05-04 01:32:24 +03:00
|
|
|
std::vector<osd_num_t> all_peers;
|
2023-04-18 02:08:43 +03:00
|
|
|
// next scrub time
|
|
|
|
uint64_t next_scrub = 0;
|
2020-05-04 01:32:24 +03:00
|
|
|
bool history_changed = false;
|
|
|
|
// peer list from the last peering event
|
|
|
|
std::vector<osd_num_t> cur_peers;
|
2020-02-11 02:30:46 +03:00
|
|
|
// target_set is the "correct" peer OSD set for this PG
|
2020-02-03 12:35:02 +03:00
|
|
|
std::vector<osd_num_t> target_set;
|
2020-02-11 02:30:46 +03:00
|
|
|
// cur_set is the current set of connected peer OSDs for this PG
|
|
|
|
// cur_set = (role => osd_num or UINT64_MAX if missing). role numbers begin with zero
|
|
|
|
std::vector<osd_num_t> cur_set;
|
2020-05-05 00:16:01 +03:00
|
|
|
// same thing in state_dict-like format
|
|
|
|
pg_osd_set_t cur_loc_set;
|
2020-12-28 02:07:53 +03:00
|
|
|
// moved object map. by default, each object is considered to reside on cur_set.
|
2020-01-24 02:23:27 +03:00
|
|
|
// this map stores all objects that differ.
|
2020-01-27 01:34:24 +03:00
|
|
|
// it may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
|
2020-01-24 02:23:27 +03:00
|
|
|
// which is up to ~192 MB per 1 TB in the worst case scenario
|
|
|
|
std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict;
|
2023-01-21 01:35:31 +03:00
|
|
|
uint64_t corrupted_count;
|
2023-04-10 01:05:41 +03:00
|
|
|
btree::btree_map<object_id, pg_osd_set_state_t*> inconsistent_objects, incomplete_objects, misplaced_objects, degraded_objects;
|
2020-03-13 21:41:54 +03:00
|
|
|
std::map<obj_piece_id_t, flush_action_t> flush_actions;
|
2021-03-15 02:26:39 +03:00
|
|
|
std::vector<obj_ver_osd_t> copies_to_delete_after_sync;
|
2020-03-04 17:12:27 +03:00
|
|
|
btree::btree_map<object_id, uint64_t> ver_override;
|
2020-01-24 02:23:27 +03:00
|
|
|
pg_peering_state_t *peering_state = NULL;
|
2020-03-13 21:41:54 +03:00
|
|
|
pg_flush_batch_t *flush_batch = NULL;
|
2020-01-24 02:23:27 +03:00
|
|
|
|
2020-04-03 13:03:42 +03:00
|
|
|
int inflight = 0; // including write_queue
|
2020-02-25 01:20:45 +03:00
|
|
|
std::multimap<object_id, osd_op_t*> write_queue;
|
|
|
|
|
2023-01-24 02:26:52 +03:00
|
|
|
pg_osd_set_state_t* add_object_to_state(const object_id oid, const uint64_t state, const pg_osd_set_t & osd_set);
|
2020-04-27 14:32:59 +03:00
|
|
|
void calc_object_states(int log_level);
|
2020-03-14 22:19:45 +03:00
|
|
|
void print_state();
|
2020-01-24 02:23:27 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b)
|
|
|
|
{
|
2023-01-21 01:35:31 +03:00
|
|
|
return a.loc_bad < b.loc_bad ||
|
|
|
|
a.loc_bad == b.loc_bad && a.role < b.role ||
|
|
|
|
a.loc_bad == b.loc_bad && a.role == b.role && a.osd_num < b.osd_num;
|
2020-01-24 02:23:27 +03:00
|
|
|
}
|
|
|
|
|
2020-01-27 01:34:24 +03:00
|
|
|
inline bool operator == (const obj_piece_id_t & a, const obj_piece_id_t & b)
|
|
|
|
{
|
|
|
|
return a.oid == b.oid && a.osd_num == b.osd_num;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator < (const obj_piece_id_t & a, const obj_piece_id_t & b)
|
|
|
|
{
|
|
|
|
return a.oid < b.oid || a.oid == b.oid && a.osd_num < b.osd_num;
|
|
|
|
}
|
|
|
|
|
2020-01-24 02:23:27 +03:00
|
|
|
namespace std
|
|
|
|
{
|
|
|
|
template<> struct hash<pg_osd_set_t>
|
|
|
|
{
|
|
|
|
inline size_t operator()(const pg_osd_set_t &s) const
|
|
|
|
{
|
|
|
|
size_t seed = 0;
|
|
|
|
for (auto e: s)
|
|
|
|
{
|
|
|
|
// Copy-pasted from spp::hash_combine()
|
|
|
|
seed ^= (e.role + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
|
|
|
|
seed ^= (e.osd_num + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
|
|
|
|
}
|
|
|
|
return seed;
|
|
|
|
}
|
|
|
|
};
|
2020-01-27 01:34:24 +03:00
|
|
|
|
|
|
|
template<> struct hash<obj_piece_id_t>
|
|
|
|
{
|
|
|
|
inline size_t operator()(const obj_piece_id_t &s) const
|
|
|
|
{
|
|
|
|
size_t seed = std::hash<object_id>()(s.oid);
|
|
|
|
// Copy-pasted from spp::hash_combine()
|
|
|
|
seed ^= (s.osd_num + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
|
|
|
|
return seed;
|
|
|
|
}
|
|
|
|
};
|
2020-01-24 02:23:27 +03:00
|
|
|
}
|