Add consul & connect timeouts, report state before loading PGs, move init_primary to osd_cluster
parent
663153713b
commit
ff38b464a5
9
osd.cpp
9
osd.cpp
|
@ -44,9 +44,6 @@ osd_t::osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringlo
|
||||||
|
|
||||||
this->tfd = new timerfd_manager_t(ringloop);
|
this->tfd = new timerfd_manager_t(ringloop);
|
||||||
|
|
||||||
if (run_primary)
|
|
||||||
init_primary();
|
|
||||||
|
|
||||||
init_cluster();
|
init_cluster();
|
||||||
|
|
||||||
consumer.loop = [this]() { loop(); };
|
consumer.loop = [this]() { loop(); };
|
||||||
|
@ -141,6 +138,12 @@ void osd_t::parse_config(blockstore_config_t & config)
|
||||||
peer_connect_interval = strtoull(config["peer_connect_interval"].c_str(), NULL, 10);
|
peer_connect_interval = strtoull(config["peer_connect_interval"].c_str(), NULL, 10);
|
||||||
if (!peer_connect_interval)
|
if (!peer_connect_interval)
|
||||||
peer_connect_interval = 5;
|
peer_connect_interval = 5;
|
||||||
|
http_request_timeout = strtoull(config["http_request_timeout"].c_str(), NULL, 10);
|
||||||
|
if (!http_request_timeout)
|
||||||
|
http_request_timeout = 5;
|
||||||
|
peer_connect_timeout = strtoull(config["peer_connect_timeout"].c_str(), NULL, 10);
|
||||||
|
if (!peer_connect_timeout)
|
||||||
|
peer_connect_timeout = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
void osd_t::bind_socket()
|
void osd_t::bind_socket()
|
||||||
|
|
3
osd.h
3
osd.h
|
@ -136,6 +136,7 @@ struct osd_client_t
|
||||||
int peer_fd;
|
int peer_fd;
|
||||||
int peer_state;
|
int peer_state;
|
||||||
std::function<void(osd_num_t, int)> connect_callback;
|
std::function<void(osd_num_t, int)> connect_callback;
|
||||||
|
int connect_timeout_id = -1;
|
||||||
osd_num_t osd_num = 0;
|
osd_num_t osd_num = 0;
|
||||||
|
|
||||||
void *in_buf = NULL;
|
void *in_buf = NULL;
|
||||||
|
@ -211,6 +212,8 @@ class osd_t
|
||||||
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
|
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
|
||||||
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
||||||
int peer_connect_interval = 5;
|
int peer_connect_interval = 5;
|
||||||
|
int http_request_timeout = 5;
|
||||||
|
int peer_connect_timeout = 5;
|
||||||
|
|
||||||
// peer OSDs
|
// peer OSDs
|
||||||
|
|
||||||
|
|
105
osd_cluster.cpp
105
osd_cluster.cpp
|
@ -4,10 +4,17 @@
|
||||||
|
|
||||||
void osd_t::init_cluster()
|
void osd_t::init_cluster()
|
||||||
{
|
{
|
||||||
|
if (run_primary)
|
||||||
|
{
|
||||||
|
init_primary();
|
||||||
|
}
|
||||||
if (consul_address != "")
|
if (consul_address != "")
|
||||||
{
|
{
|
||||||
|
if (!run_primary)
|
||||||
|
{
|
||||||
|
report_status();
|
||||||
|
}
|
||||||
printf("OSD %lu reporting to Consul at %s each %d seconds\n", osd_num, consul_address.c_str(), consul_report_interval);
|
printf("OSD %lu reporting to Consul at %s each %d seconds\n", osd_num, consul_address.c_str(), consul_report_interval);
|
||||||
report_status();
|
|
||||||
this->consul_tfd = new timerfd_interval(ringloop, consul_report_interval, [this]()
|
this->consul_tfd = new timerfd_interval(ringloop, consul_report_interval, [this]()
|
||||||
{
|
{
|
||||||
report_status();
|
report_status();
|
||||||
|
@ -15,6 +22,74 @@ void osd_t::init_cluster()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void osd_t::init_primary()
|
||||||
|
{
|
||||||
|
if (consul_address == "")
|
||||||
|
{
|
||||||
|
// Test version of clustering code with 1 PG and 2 peers
|
||||||
|
// Example: peers = 2:127.0.0.1:11204,3:127.0.0.1:11205
|
||||||
|
std::string peerstr = config["peers"];
|
||||||
|
while (peerstr.size())
|
||||||
|
{
|
||||||
|
int pos = peerstr.find(',');
|
||||||
|
parse_test_peer(pos < 0 ? peerstr : peerstr.substr(0, pos));
|
||||||
|
peerstr = pos < 0 ? std::string("") : peerstr.substr(pos+1);
|
||||||
|
}
|
||||||
|
if (peer_states.size() < 2)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("run_primary requires at least 2 peers");
|
||||||
|
}
|
||||||
|
pgs[1] = (pg_t){
|
||||||
|
.state = PG_PEERING,
|
||||||
|
.pg_cursize = 0,
|
||||||
|
.pg_num = 1,
|
||||||
|
.target_set = { 1, 2, 3 },
|
||||||
|
.cur_set = { 0, 0, 0 },
|
||||||
|
};
|
||||||
|
pgs[1].print_state();
|
||||||
|
pg_count = 1;
|
||||||
|
peering_state = OSD_CONNECTING_PEERS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
peering_state = OSD_LOADING_PGS;
|
||||||
|
load_pgs();
|
||||||
|
}
|
||||||
|
if (autosync_interval > 0)
|
||||||
|
{
|
||||||
|
this->sync_tfd = new timerfd_interval(ringloop, autosync_interval, [this]()
|
||||||
|
{
|
||||||
|
autosync();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void osd_t::parse_test_peer(std::string peer)
|
||||||
|
{
|
||||||
|
// OSD_NUM:IP:PORT
|
||||||
|
int pos1 = peer.find(':');
|
||||||
|
int pos2 = peer.find(':', pos1+1);
|
||||||
|
if (pos1 < 0 || pos2 < 0)
|
||||||
|
throw new std::runtime_error("OSD peer string must be in the form OSD_NUM:IP:PORT");
|
||||||
|
std::string addr = peer.substr(pos1+1, pos2-pos1-1);
|
||||||
|
std::string osd_num_str = peer.substr(0, pos1);
|
||||||
|
std::string port_str = peer.substr(pos2+1);
|
||||||
|
osd_num_t osd_num = strtoull(osd_num_str.c_str(), NULL, 10);
|
||||||
|
if (!osd_num)
|
||||||
|
throw new std::runtime_error("Could not parse OSD peer osd_num");
|
||||||
|
else if (peer_states.find(osd_num) != peer_states.end())
|
||||||
|
throw std::runtime_error("Same osd number "+std::to_string(osd_num)+" specified twice in peers");
|
||||||
|
int port = strtoull(port_str.c_str(), NULL, 10);
|
||||||
|
if (!port)
|
||||||
|
throw new std::runtime_error("Could not parse OSD peer port");
|
||||||
|
peer_states[osd_num] = json11::Json::object {
|
||||||
|
{ "state", "up" },
|
||||||
|
{ "addresses", json11::Json::array { addr } },
|
||||||
|
{ "port", port },
|
||||||
|
};
|
||||||
|
wanted_peers[osd_num] = { 0 };
|
||||||
|
}
|
||||||
|
|
||||||
json11::Json osd_t::get_status()
|
json11::Json osd_t::get_status()
|
||||||
{
|
{
|
||||||
json11::Json::object st;
|
json11::Json::object st;
|
||||||
|
@ -142,21 +217,6 @@ void osd_t::consul_txn(json11::Json txn, std::function<void(std::string, json11:
|
||||||
http_request_json(consul_address, req, callback);
|
http_request_json(consul_address, req, callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t stoull_full(std::string str, int base = 10)
|
|
||||||
{
|
|
||||||
if (isspace(str[0]))
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
size_t end = -1;
|
|
||||||
uint64_t r = std::stoull(str, &end, base);
|
|
||||||
if (end < str.length())
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start -> Load PGs -> Load peers -> Connect to peers -> Peer PGs
|
// Start -> Load PGs -> Load peers -> Connect to peers -> Peer PGs
|
||||||
// Wait for PG changes -> Start/Stop PGs when requested
|
// Wait for PG changes -> Start/Stop PGs when requested
|
||||||
// Peer connection is lost -> Reload connection data -> Try to reconnect -> Repeat
|
// Peer connection is lost -> Reload connection data -> Try to reconnect -> Repeat
|
||||||
|
@ -164,6 +224,14 @@ void osd_t::load_pgs()
|
||||||
{
|
{
|
||||||
assert(this->pgs.size() == 0);
|
assert(this->pgs.size() == 0);
|
||||||
json11::Json::array txn = {
|
json11::Json::array txn = {
|
||||||
|
// Update OSD state when loading PGs to allow "monitors" do CAS transactions when moving PGs
|
||||||
|
json11::Json::object {
|
||||||
|
{ "KV", json11::Json::object {
|
||||||
|
{ "Verb", "set" },
|
||||||
|
{ "Key", consul_prefix+"/osd/state/"+std::to_string(osd_num)+"." },
|
||||||
|
{ "Value", base64_encode(get_status().dump()) },
|
||||||
|
} }
|
||||||
|
},
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "KV", json11::Json::object {
|
{ "KV", json11::Json::object {
|
||||||
{ "Verb", "get" },
|
{ "Verb", "get" },
|
||||||
|
@ -188,6 +256,7 @@ void osd_t::load_pgs()
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
peering_state &= ~OSD_LOADING_PGS;
|
||||||
json11::Json pg_config;
|
json11::Json pg_config;
|
||||||
std::map<pg_num_t, json11::Json> pg_history;
|
std::map<pg_num_t, json11::Json> pg_history;
|
||||||
for (auto & res: data["Results"].array_items())
|
for (auto & res: data["Results"].array_items())
|
||||||
|
@ -203,10 +272,10 @@ void osd_t::load_pgs()
|
||||||
{
|
{
|
||||||
pg_config = value;
|
pg_config = value;
|
||||||
}
|
}
|
||||||
else
|
else if (key.substr(0, consul_prefix.length()+12) == consul_prefix+"/pg/history/")
|
||||||
{
|
{
|
||||||
// <consul_prefix>/pg/history/%d.
|
// <consul_prefix>/pg/history/%d.
|
||||||
pg_num_t pg_num = stoull_full(key.substr(consul_prefix.length()+13, key.length()-consul_prefix.length()-14));
|
pg_num_t pg_num = stoull_full(key.substr(consul_prefix.length()+12, key.length()-consul_prefix.length()-13));
|
||||||
if (pg_num)
|
if (pg_num)
|
||||||
{
|
{
|
||||||
pg_history[pg_num] = value;
|
pg_history[pg_num] = value;
|
||||||
|
|
36
osd_http.cpp
36
osd_http.cpp
|
@ -25,7 +25,7 @@ static int extract_port(std::string & host)
|
||||||
return port;
|
return port;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> getifaddr_list()
|
std::vector<std::string> getifaddr_list(bool include_v6)
|
||||||
{
|
{
|
||||||
std::vector<std::string> addresses;
|
std::vector<std::string> addresses;
|
||||||
ifaddrs *list, *ifa;
|
ifaddrs *list, *ifa;
|
||||||
|
@ -40,7 +40,7 @@ std::vector<std::string> getifaddr_list()
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int family = ifa->ifa_addr->sa_family;
|
int family = ifa->ifa_addr->sa_family;
|
||||||
if ((family == AF_INET || family == AF_INET6) &&
|
if ((family == AF_INET || family == AF_INET6 && include_v6) &&
|
||||||
(ifa->ifa_flags & (IFF_UP | IFF_RUNNING | IFF_LOOPBACK)) == (IFF_UP | IFF_RUNNING))
|
(ifa->ifa_flags & (IFF_UP | IFF_RUNNING | IFF_LOOPBACK)) == (IFF_UP | IFF_RUNNING))
|
||||||
{
|
{
|
||||||
void *addr_ptr;
|
void *addr_ptr;
|
||||||
|
@ -70,6 +70,7 @@ struct http_co_t
|
||||||
|
|
||||||
int st = 0;
|
int st = 0;
|
||||||
int peer_fd = -1;
|
int peer_fd = -1;
|
||||||
|
int timeout_id = -1;
|
||||||
int epoll_events = 0;
|
int epoll_events = 0;
|
||||||
int code = 0;
|
int code = 0;
|
||||||
int sent = 0, received = 0;
|
int sent = 0, received = 0;
|
||||||
|
@ -181,6 +182,11 @@ http_response_t *parse_http_response(std::string res)
|
||||||
|
|
||||||
http_co_t::~http_co_t()
|
http_co_t::~http_co_t()
|
||||||
{
|
{
|
||||||
|
if (timeout_id >= 0)
|
||||||
|
{
|
||||||
|
osd->tfd->clear_timer(timeout_id);
|
||||||
|
timeout_id = -1;
|
||||||
|
}
|
||||||
callback(code, response);
|
callback(code, response);
|
||||||
if (peer_fd >= 0)
|
if (peer_fd >= 0)
|
||||||
{
|
{
|
||||||
|
@ -214,6 +220,17 @@ void http_co_t::resume()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||||
|
if (osd->http_request_timeout > 0)
|
||||||
|
{
|
||||||
|
timeout_id = osd->tfd->set_timer(1000*osd->http_request_timeout, false, [this](int timer_id)
|
||||||
|
{
|
||||||
|
if (response.length() == 0)
|
||||||
|
{
|
||||||
|
code = EIO;
|
||||||
|
}
|
||||||
|
delete this;
|
||||||
|
});
|
||||||
|
}
|
||||||
r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||||
if (r < 0 && errno != EINPROGRESS)
|
if (r < 0 && errno != EINPROGRESS)
|
||||||
{
|
{
|
||||||
|
@ -366,3 +383,18 @@ void http_co_t::resume()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t stoull_full(std::string str, int base)
|
||||||
|
{
|
||||||
|
if (isspace(str[0]))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
size_t end = -1;
|
||||||
|
uint64_t r = std::stoull(str, &end, base);
|
||||||
|
if (end < str.length())
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
|
@ -13,4 +13,5 @@ struct http_response_t
|
||||||
};
|
};
|
||||||
|
|
||||||
http_response_t *parse_http_response(std::string res);
|
http_response_t *parse_http_response(std::string res);
|
||||||
std::vector<std::string> getifaddr_list();
|
std::vector<std::string> getifaddr_list(bool include_v6 = false);
|
||||||
|
uint64_t stoull_full(std::string str, int base = 10);
|
||||||
|
|
|
@ -6,74 +6,6 @@
|
||||||
#include "base64.h"
|
#include "base64.h"
|
||||||
#include "osd.h"
|
#include "osd.h"
|
||||||
|
|
||||||
void osd_t::init_primary()
|
|
||||||
{
|
|
||||||
if (consul_address == "")
|
|
||||||
{
|
|
||||||
// Test version of clustering code with 1 PG and 2 peers
|
|
||||||
// Example: peers = 2:127.0.0.1:11204,3:127.0.0.1:11205
|
|
||||||
std::string peerstr = config["peers"];
|
|
||||||
while (peerstr.size())
|
|
||||||
{
|
|
||||||
int pos = peerstr.find(',');
|
|
||||||
parse_test_peer(pos < 0 ? peerstr : peerstr.substr(0, pos));
|
|
||||||
peerstr = pos < 0 ? std::string("") : peerstr.substr(pos+1);
|
|
||||||
}
|
|
||||||
if (peer_states.size() < 2)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("run_primary requires at least 2 peers");
|
|
||||||
}
|
|
||||||
pgs[1] = (pg_t){
|
|
||||||
.state = PG_PEERING,
|
|
||||||
.pg_cursize = 0,
|
|
||||||
.pg_num = 1,
|
|
||||||
.target_set = { 1, 2, 3 },
|
|
||||||
.cur_set = { 0, 0, 0 },
|
|
||||||
};
|
|
||||||
pgs[1].print_state();
|
|
||||||
pg_count = 1;
|
|
||||||
peering_state = OSD_CONNECTING_PEERS;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
peering_state = OSD_LOADING_PGS;
|
|
||||||
load_pgs();
|
|
||||||
}
|
|
||||||
if (autosync_interval > 0)
|
|
||||||
{
|
|
||||||
this->sync_tfd = new timerfd_interval(ringloop, 3, [this]()
|
|
||||||
{
|
|
||||||
autosync();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void osd_t::parse_test_peer(std::string peer)
|
|
||||||
{
|
|
||||||
// OSD_NUM:IP:PORT
|
|
||||||
int pos1 = peer.find(':');
|
|
||||||
int pos2 = peer.find(':', pos1+1);
|
|
||||||
if (pos1 < 0 || pos2 < 0)
|
|
||||||
throw new std::runtime_error("OSD peer string must be in the form OSD_NUM:IP:PORT");
|
|
||||||
std::string addr = peer.substr(pos1+1, pos2-pos1-1);
|
|
||||||
std::string osd_num_str = peer.substr(0, pos1);
|
|
||||||
std::string port_str = peer.substr(pos2+1);
|
|
||||||
osd_num_t osd_num = strtoull(osd_num_str.c_str(), NULL, 10);
|
|
||||||
if (!osd_num)
|
|
||||||
throw new std::runtime_error("Could not parse OSD peer osd_num");
|
|
||||||
else if (peer_states.find(osd_num) != peer_states.end())
|
|
||||||
throw std::runtime_error("Same osd number "+std::to_string(osd_num)+" specified twice in peers");
|
|
||||||
int port = strtoull(port_str.c_str(), NULL, 10);
|
|
||||||
if (!port)
|
|
||||||
throw new std::runtime_error("Could not parse OSD peer port");
|
|
||||||
peer_states[osd_num] = json11::Json::object {
|
|
||||||
{ "state", "up" },
|
|
||||||
{ "addresses", json11::Json::array { addr } },
|
|
||||||
{ "port", port },
|
|
||||||
};
|
|
||||||
wanted_peers[osd_num] = { 0 };
|
|
||||||
}
|
|
||||||
|
|
||||||
void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port, std::function<void(osd_num_t, int)> callback)
|
void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port, std::function<void(osd_num_t, int)> callback)
|
||||||
{
|
{
|
||||||
struct sockaddr_in addr;
|
struct sockaddr_in addr;
|
||||||
|
@ -92,6 +24,18 @@ void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||||
|
int timeout_id = -1;
|
||||||
|
if (peer_connect_timeout > 0)
|
||||||
|
{
|
||||||
|
tfd->set_timer(1000*peer_connect_timeout, false, [this, peer_fd](int timer_id)
|
||||||
|
{
|
||||||
|
auto callback = clients[peer_fd].connect_callback;
|
||||||
|
osd_num_t osd_num = clients[peer_fd].osd_num;
|
||||||
|
stop_client(peer_fd);
|
||||||
|
callback(osd_num, -EIO);
|
||||||
|
return;
|
||||||
|
});
|
||||||
|
}
|
||||||
r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||||
if (r < 0 && errno != EINPROGRESS)
|
if (r < 0 && errno != EINPROGRESS)
|
||||||
{
|
{
|
||||||
|
@ -105,6 +49,7 @@ void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port
|
||||||
.peer_fd = peer_fd,
|
.peer_fd = peer_fd,
|
||||||
.peer_state = PEER_CONNECTING,
|
.peer_state = PEER_CONNECTING,
|
||||||
.connect_callback = callback,
|
.connect_callback = callback,
|
||||||
|
.connect_timeout_id = timeout_id,
|
||||||
.osd_num = osd_num,
|
.osd_num = osd_num,
|
||||||
.in_buf = malloc(receive_buffer_size),
|
.in_buf = malloc(receive_buffer_size),
|
||||||
};
|
};
|
||||||
|
@ -122,6 +67,11 @@ void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port
|
||||||
void osd_t::handle_connect_result(int peer_fd)
|
void osd_t::handle_connect_result(int peer_fd)
|
||||||
{
|
{
|
||||||
auto & cl = clients[peer_fd];
|
auto & cl = clients[peer_fd];
|
||||||
|
if (cl.connect_timeout_id >= 0)
|
||||||
|
{
|
||||||
|
tfd->clear_timer(cl.connect_timeout_id);
|
||||||
|
cl.connect_timeout_id = -1;
|
||||||
|
}
|
||||||
osd_num_t osd_num = cl.osd_num;
|
osd_num_t osd_num = cl.osd_num;
|
||||||
auto callback = cl.connect_callback;
|
auto callback = cl.connect_callback;
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
|
@ -110,7 +110,6 @@ void pg_obj_state_check_t::handle_version()
|
||||||
n_copies++;
|
n_copies++;
|
||||||
if (replica >= pg->pg_size)
|
if (replica >= pg->pg_size)
|
||||||
{
|
{
|
||||||
// FIXME In the future, check it against the PG epoch number to handle replication factor/scheme changes
|
|
||||||
n_buggy++;
|
n_buggy++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue