Retry consul connection attempts and then die
parent
d78ce509c6
commit
089b4eb208
2
Makefile
2
Makefile
|
@ -30,7 +30,7 @@ libfio_blockstore.so: ./libblockstore.so fio_engine.cpp json11.o
|
||||||
g++ $(CXXFLAGS) -shared -o libfio_blockstore.so fio_engine.cpp json11.o ./libblockstore.so -ltcmalloc_minimal -luring
|
g++ $(CXXFLAGS) -shared -o libfio_blockstore.so fio_engine.cpp json11.o ./libblockstore.so -ltcmalloc_minimal -luring
|
||||||
|
|
||||||
OSD_OBJS := osd.o osd_secondary.o osd_receive.o osd_send.o osd_peering.o osd_flush.o osd_peering_pg.o \
|
OSD_OBJS := osd.o osd_secondary.o osd_receive.o osd_send.o osd_peering.o osd_flush.o osd_peering_pg.o \
|
||||||
osd_primary.o osd_cluster.o osd_rmw.o json11.o timerfd_interval.o base64.o
|
osd_primary.o osd_cluster.o osd_rmw.o json11.o timerfd_interval.o base64.o timerfd_manager.o
|
||||||
base64.o: base64.cpp base64.h
|
base64.o: base64.cpp base64.h
|
||||||
g++ $(CXXFLAGS) -c -o $@ $<
|
g++ $(CXXFLAGS) -c -o $@ $<
|
||||||
osd_secondary.o: osd_secondary.cpp osd.h osd_ops.h ringloop.h
|
osd_secondary.o: osd_secondary.cpp osd.h osd_ops.h ringloop.h
|
||||||
|
|
7
osd.cpp
7
osd.cpp
|
@ -42,6 +42,8 @@ osd_t::osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringlo
|
||||||
print_stats();
|
print_stats();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
this->tfd = new timerfd_manager_t(ringloop);
|
||||||
|
|
||||||
if (run_primary)
|
if (run_primary)
|
||||||
init_primary();
|
init_primary();
|
||||||
|
|
||||||
|
@ -51,6 +53,11 @@ osd_t::osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringlo
|
||||||
|
|
||||||
osd_t::~osd_t()
|
osd_t::~osd_t()
|
||||||
{
|
{
|
||||||
|
if (tfd)
|
||||||
|
{
|
||||||
|
delete tfd;
|
||||||
|
tfd = NULL;
|
||||||
|
}
|
||||||
if (stats_tfd)
|
if (stats_tfd)
|
||||||
{
|
{
|
||||||
delete stats_tfd;
|
delete stats_tfd;
|
||||||
|
|
7
osd.h
7
osd.h
|
@ -16,6 +16,7 @@
|
||||||
#include "blockstore.h"
|
#include "blockstore.h"
|
||||||
#include "ringloop.h"
|
#include "ringloop.h"
|
||||||
#include "timerfd_interval.h"
|
#include "timerfd_interval.h"
|
||||||
|
#include "timerfd_manager.h"
|
||||||
#include "osd_ops.h"
|
#include "osd_ops.h"
|
||||||
#include "osd_peering_pg.h"
|
#include "osd_peering_pg.h"
|
||||||
#include "json11/json11.hpp"
|
#include "json11/json11.hpp"
|
||||||
|
@ -48,6 +49,9 @@
|
||||||
#define MAX_RECOVERY_QUEUE 2048
|
#define MAX_RECOVERY_QUEUE 2048
|
||||||
#define DEFAULT_RECOVERY_QUEUE 4
|
#define DEFAULT_RECOVERY_QUEUE 4
|
||||||
|
|
||||||
|
#define MAX_CONSUL_ATTEMPTS 5
|
||||||
|
#define CONSUL_RETRY_INTERVAL 1000
|
||||||
|
|
||||||
//#define OSD_STUB
|
//#define OSD_STUB
|
||||||
|
|
||||||
extern const char* osd_op_names[];
|
extern const char* osd_op_names[];
|
||||||
|
@ -210,6 +214,8 @@ class osd_t
|
||||||
// peer OSDs
|
// peer OSDs
|
||||||
|
|
||||||
std::vector<std::string> bind_addresses;
|
std::vector<std::string> bind_addresses;
|
||||||
|
int consul_failed_attempts = 0;
|
||||||
|
|
||||||
std::map<uint64_t, int> osd_peer_fds;
|
std::map<uint64_t, int> osd_peer_fds;
|
||||||
std::map<pg_num_t, pg_t> pgs;
|
std::map<pg_num_t, pg_t> pgs;
|
||||||
std::set<pg_num_t> dirty_pgs;
|
std::set<pg_num_t> dirty_pgs;
|
||||||
|
@ -233,6 +239,7 @@ class osd_t
|
||||||
uint64_t pg_stripe_size = 4*1024*1024; // 4 MB by default
|
uint64_t pg_stripe_size = 4*1024*1024; // 4 MB by default
|
||||||
ring_loop_t *ringloop;
|
ring_loop_t *ringloop;
|
||||||
timerfd_interval *stats_tfd = NULL, *sync_tfd = NULL, *consul_tfd = NULL;
|
timerfd_interval *stats_tfd = NULL, *sync_tfd = NULL, *consul_tfd = NULL;
|
||||||
|
timerfd_manager_t *tfd = NULL;
|
||||||
|
|
||||||
int wait_state = 0;
|
int wait_state = 0;
|
||||||
int epoll_fd = 0;
|
int epoll_fd = 0;
|
||||||
|
|
|
@ -132,9 +132,27 @@ void osd_t::report_status()
|
||||||
{
|
{
|
||||||
int pos = res.find("\r\n\r\n");
|
int pos = res.find("\r\n\r\n");
|
||||||
if (pos >= 0)
|
if (pos >= 0)
|
||||||
|
{
|
||||||
res = res.substr(pos+4);
|
res = res.substr(pos+4);
|
||||||
|
}
|
||||||
if (err != 0 || res != "true")
|
if (err != 0 || res != "true")
|
||||||
|
{
|
||||||
|
consul_failed_attempts++;
|
||||||
printf("Error reporting state to Consul: code %d (%s), response text: %s\n", err, strerror(err), res.c_str());
|
printf("Error reporting state to Consul: code %d (%s), response text: %s\n", err, strerror(err), res.c_str());
|
||||||
|
if (consul_failed_attempts > MAX_CONSUL_ATTEMPTS)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Cluster connection failed");
|
||||||
|
}
|
||||||
|
// Retry
|
||||||
|
tfd->set_timer(CONSUL_RETRY_INTERVAL, false, [this](int timer_id)
|
||||||
|
{
|
||||||
|
report_status();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
consul_failed_attempts = 0;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue