2020-09-17 23:02:40 +03:00
|
|
|
// Copyright (c) Vitaliy Filippov, 2019+
|
2021-02-06 01:26:07 +03:00
|
|
|
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
2020-09-17 23:02:40 +03:00
|
|
|
|
2020-06-06 01:39:58 +03:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "messenger.h"
|
|
|
|
#include "etcd_state_client.h"
|
|
|
|
|
2021-04-06 01:57:23 +03:00
|
|
|
#define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
|
|
|
|
#define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
|
2023-08-12 20:26:40 +03:00
|
|
|
#define DEFAULT_CLIENT_MAX_BUFFERED_BYTES 32*1024*1024
|
|
|
|
#define DEFAULT_CLIENT_MAX_BUFFERED_OPS 1024
|
|
|
|
#define DEFAULT_CLIENT_MAX_WRITEBACK_IODEPTH 256
|
2021-07-10 21:47:23 +03:00
|
|
|
#define INODE_LIST_DONE 1
|
|
|
|
#define INODE_LIST_HAS_UNSTABLE 2
|
2023-01-12 02:37:31 +03:00
|
|
|
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
|
2023-01-06 17:33:49 +03:00
|
|
|
#define OSD_OP_READ_CHAIN_BITMAP 0x102
|
2020-06-06 01:39:58 +03:00
|
|
|
|
2021-09-26 11:32:42 +03:00
|
|
|
#define OSD_OP_IGNORE_READONLY 0x08
|
|
|
|
|
2020-06-06 01:39:58 +03:00
|
|
|
struct cluster_op_t;
|
|
|
|
|
|
|
|
struct cluster_op_part_t
|
|
|
|
{
|
|
|
|
cluster_op_t *parent;
|
|
|
|
uint64_t offset;
|
|
|
|
uint32_t len;
|
|
|
|
pg_num_t pg_num;
|
|
|
|
osd_num_t osd_num;
|
2020-06-24 01:31:48 +03:00
|
|
|
osd_op_buf_list_t iov;
|
2021-04-06 01:57:23 +03:00
|
|
|
unsigned flags;
|
2020-06-06 01:39:58 +03:00
|
|
|
osd_op_t op;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct cluster_op_t
|
|
|
|
{
|
2023-01-06 17:33:49 +03:00
|
|
|
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP, OSD_OP_READ_CHAIN_BITMAP
|
2020-06-06 01:39:58 +03:00
|
|
|
uint64_t inode;
|
|
|
|
uint64_t offset;
|
|
|
|
uint64_t len;
|
2021-06-14 22:42:49 +03:00
|
|
|
// for reads and writes within a single object (stripe),
|
|
|
|
// reads can return current version and writes can use "CAS" semantics
|
|
|
|
uint64_t version = 0;
|
2021-09-26 11:32:42 +03:00
|
|
|
// now only OSD_OP_IGNORE_READONLY is supported
|
|
|
|
uint64_t flags = 0;
|
2023-01-06 17:33:49 +03:00
|
|
|
// negative retval is an error number
|
|
|
|
// write and read return len on success
|
|
|
|
// sync and delete return 0 on success
|
|
|
|
// read_bitmap and read_chain_bitmap return the length of bitmap in bits(!)
|
2020-06-06 01:39:58 +03:00
|
|
|
int retval;
|
2020-06-24 01:31:48 +03:00
|
|
|
osd_op_buf_list_t iov;
|
2023-01-06 17:33:49 +03:00
|
|
|
// READ, READ_BITMAP, READ_CHAIN_BITMAP return the bitmap here
|
2021-07-28 02:13:48 +03:00
|
|
|
void *bitmap_buf = NULL;
|
2020-06-06 01:39:58 +03:00
|
|
|
std::function<void(cluster_op_t*)> callback;
|
2021-01-30 01:35:58 +03:00
|
|
|
~cluster_op_t();
|
2020-06-06 01:39:58 +03:00
|
|
|
protected:
|
2021-04-03 14:51:52 +03:00
|
|
|
int state = 0;
|
2021-01-30 01:35:58 +03:00
|
|
|
uint64_t cur_inode; // for snapshot reads
|
2020-06-24 01:31:48 +03:00
|
|
|
void *buf = NULL;
|
2020-06-14 23:57:58 +03:00
|
|
|
cluster_op_t *orig_op = NULL;
|
2020-06-06 01:39:58 +03:00
|
|
|
bool needs_reslice = false;
|
2020-09-05 22:05:21 +03:00
|
|
|
bool up_wait = false;
|
2021-04-06 01:57:23 +03:00
|
|
|
int inflight_count = 0, done_count = 0;
|
2020-06-06 01:39:58 +03:00
|
|
|
std::vector<cluster_op_part_t> parts;
|
2021-07-28 02:13:48 +03:00
|
|
|
void *part_bitmaps = NULL;
|
2021-01-30 01:35:58 +03:00
|
|
|
unsigned bitmap_buf_size = 0;
|
2021-04-28 01:39:27 +03:00
|
|
|
cluster_op_t *prev = NULL, *next = NULL;
|
2021-04-28 02:46:17 +03:00
|
|
|
int prev_wait = 0;
|
2020-06-06 01:39:58 +03:00
|
|
|
friend class cluster_client_t;
|
2023-08-12 20:26:40 +03:00
|
|
|
friend class writeback_cache_t;
|
2021-04-03 14:51:52 +03:00
|
|
|
};
|
|
|
|
|
2021-07-10 21:47:23 +03:00
|
|
|
struct inode_list_t;
|
|
|
|
struct inode_list_osd_t;
|
2023-08-12 20:26:40 +03:00
|
|
|
class writeback_cache_t;
|
2021-07-10 21:47:23 +03:00
|
|
|
|
2021-04-03 14:51:52 +03:00
|
|
|
// FIXME: Split into public and private interfaces
|
2020-06-06 01:39:58 +03:00
|
|
|
class cluster_client_t
|
|
|
|
{
|
|
|
|
timerfd_manager_t *tfd;
|
|
|
|
ring_loop_t *ringloop;
|
|
|
|
|
2020-09-03 00:52:41 +03:00
|
|
|
std::map<pool_id_t, uint64_t> pg_counts;
|
2023-08-12 20:26:40 +03:00
|
|
|
// client_max_dirty_* is actually "max unsynced", for the case when immediate_commit is off
|
2021-04-06 01:57:23 +03:00
|
|
|
uint64_t client_max_dirty_bytes = 0;
|
|
|
|
uint64_t client_max_dirty_ops = 0;
|
2023-08-12 20:26:40 +03:00
|
|
|
// writeback improves (1) small consecutive writes and (2) Q1 writes without fsync
|
|
|
|
bool enable_writeback = false;
|
|
|
|
// client_max_buffered_* is the real "dirty limit" - maximum amount of writes buffered in memory
|
|
|
|
uint64_t client_max_buffered_bytes = 0;
|
|
|
|
uint64_t client_max_buffered_ops = 0;
|
|
|
|
uint64_t client_max_writeback_iodepth = 0;
|
|
|
|
|
2020-06-06 01:39:58 +03:00
|
|
|
int log_level;
|
2020-06-14 23:57:58 +03:00
|
|
|
int up_wait_retry_interval = 500; // ms
|
2020-06-06 01:39:58 +03:00
|
|
|
|
2020-06-14 23:57:58 +03:00
|
|
|
int retry_timeout_id = 0;
|
|
|
|
std::vector<cluster_op_t*> offline_ops;
|
2021-04-28 01:39:27 +03:00
|
|
|
cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
|
2023-08-12 20:26:40 +03:00
|
|
|
writeback_cache_t *wb = NULL;
|
2021-04-03 14:51:52 +03:00
|
|
|
std::set<osd_num_t> dirty_osds;
|
2021-04-06 01:57:23 +03:00
|
|
|
uint64_t dirty_bytes = 0, dirty_ops = 0;
|
2020-06-06 01:39:58 +03:00
|
|
|
|
2021-01-30 01:35:58 +03:00
|
|
|
void *scrap_buffer = NULL;
|
|
|
|
unsigned scrap_buffer_size = 0;
|
2021-01-13 00:19:04 +03:00
|
|
|
|
2020-10-10 00:56:47 +03:00
|
|
|
bool pgs_loaded = false;
|
2021-04-03 14:51:52 +03:00
|
|
|
ring_consumer_t consumer;
|
2020-10-10 00:56:47 +03:00
|
|
|
std::vector<std::function<void(void)>> on_ready_hooks;
|
2021-07-10 21:47:23 +03:00
|
|
|
std::vector<inode_list_t*> lists;
|
2023-04-18 01:51:18 +03:00
|
|
|
std::multimap<osd_num_t, osd_op_t*> raw_ops;
|
2021-04-08 10:52:21 +03:00
|
|
|
int continuing_ops = 0;
|
2023-02-18 02:27:08 +03:00
|
|
|
bool msgr_initialized = false;
|
2020-10-10 00:56:47 +03:00
|
|
|
|
2020-06-06 01:39:58 +03:00
|
|
|
public:
|
2020-10-10 00:56:47 +03:00
|
|
|
etcd_state_client_t st_cli;
|
2023-02-18 02:27:08 +03:00
|
|
|
|
2020-10-10 00:56:47 +03:00
|
|
|
osd_messenger_t msgr;
|
2023-02-18 02:27:08 +03:00
|
|
|
void init_msgr();
|
|
|
|
|
2023-03-26 17:50:35 +03:00
|
|
|
json11::Json::object cli_config, file_config, etcd_global_config;
|
|
|
|
json11::Json::object config;
|
2020-10-10 00:56:47 +03:00
|
|
|
|
2023-10-01 12:43:31 +03:00
|
|
|
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json config);
|
2020-06-23 20:10:33 +03:00
|
|
|
~cluster_client_t();
|
2020-06-06 01:39:58 +03:00
|
|
|
void execute(cluster_op_t *op);
|
2023-04-18 01:51:18 +03:00
|
|
|
void execute_raw(osd_num_t osd_num, osd_op_t *op);
|
2021-04-03 01:54:52 +03:00
|
|
|
bool is_ready();
|
2020-10-10 00:56:47 +03:00
|
|
|
void on_ready(std::function<void(void)> fn);
|
2023-08-12 20:26:40 +03:00
|
|
|
bool flush();
|
2020-06-06 01:39:58 +03:00
|
|
|
|
2022-08-09 02:27:02 +03:00
|
|
|
bool get_immediate_commit(uint64_t inode);
|
2022-02-12 01:30:50 +03:00
|
|
|
|
2020-09-05 22:05:21 +03:00
|
|
|
void continue_ops(bool up_retry = false);
|
2021-07-10 21:47:23 +03:00
|
|
|
inode_list_t *list_inode_start(inode_t inode,
|
2021-07-28 02:13:48 +03:00
|
|
|
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
|
2021-07-10 21:47:23 +03:00
|
|
|
int list_pg_count(inode_list_t *lst);
|
2023-06-30 00:05:04 +03:00
|
|
|
const std::vector<osd_num_t> & list_inode_get_inactive_osds(inode_list_t *lst);
|
2021-07-10 21:47:23 +03:00
|
|
|
void list_inode_next(inode_list_t *lst, int next_pgs);
|
2022-08-09 02:27:02 +03:00
|
|
|
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
|
|
|
|
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }
|
2021-07-10 21:47:23 +03:00
|
|
|
uint64_t next_op_id();
|
|
|
|
|
2021-04-03 14:51:52 +03:00
|
|
|
protected:
|
|
|
|
bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
|
2020-06-14 23:57:58 +03:00
|
|
|
void on_load_config_hook(json11::Json::object & config);
|
2020-06-06 01:39:58 +03:00
|
|
|
void on_load_pgs_hook(bool success);
|
2021-03-23 00:09:00 +03:00
|
|
|
void on_change_hook(std::map<std::string, etcd_kv_t> & changes);
|
2020-06-06 01:39:58 +03:00
|
|
|
void on_change_osd_state_hook(uint64_t peer_osd);
|
2023-08-12 20:26:40 +03:00
|
|
|
void execute_internal(cluster_op_t *op);
|
|
|
|
void unshift_op(cluster_op_t *op);
|
2021-04-03 14:51:52 +03:00
|
|
|
int continue_rw(cluster_op_t *op);
|
2023-08-12 11:12:14 +03:00
|
|
|
bool check_rw(cluster_op_t *op);
|
2020-06-14 23:57:58 +03:00
|
|
|
void slice_rw(cluster_op_t *op);
|
2021-04-03 14:51:52 +03:00
|
|
|
bool try_send(cluster_op_t *op, int i);
|
|
|
|
int continue_sync(cluster_op_t *op);
|
2020-06-14 23:57:58 +03:00
|
|
|
void send_sync(cluster_op_t *op, cluster_op_part_t *part);
|
2020-06-06 01:39:58 +03:00
|
|
|
void handle_op_part(cluster_op_part_t *part);
|
2021-01-30 01:35:58 +03:00
|
|
|
void copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *part);
|
2021-04-28 01:39:27 +03:00
|
|
|
void erase_op(cluster_op_t *op);
|
2021-04-28 02:46:17 +03:00
|
|
|
void calc_wait(cluster_op_t *op);
|
|
|
|
void inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc);
|
2021-07-10 21:47:23 +03:00
|
|
|
void continue_lists();
|
|
|
|
void continue_listing(inode_list_t *lst);
|
|
|
|
void send_list(inode_list_osd_t *cur_list);
|
2023-04-18 01:51:18 +03:00
|
|
|
void continue_raw_ops(osd_num_t peer_osd);
|
2023-08-12 20:26:40 +03:00
|
|
|
|
|
|
|
friend class writeback_cache_t;
|
2020-06-06 01:39:58 +03:00
|
|
|
};
|