2020-01-21 14:04:58 +03:00
|
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <sys/epoll.h>
|
2020-01-23 21:43:45 +03:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
2020-04-16 23:22:32 +03:00
|
|
|
#include "base64.h"
|
2020-01-21 14:04:58 +03:00
|
|
|
#include "osd.h"
|
|
|
|
|
2020-04-20 17:44:03 +03:00
|
|
|
void osd_t::connect_peer(osd_num_t peer_osd, const char *peer_host, int peer_port, std::function<void(osd_num_t, int)> callback)
|
2020-01-21 14:04:58 +03:00
|
|
|
{
|
|
|
|
struct sockaddr_in addr;
|
|
|
|
int r;
|
|
|
|
if ((r = inet_pton(AF_INET, peer_host, &addr.sin_addr)) != 1)
|
|
|
|
{
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, -EINVAL);
|
2020-01-21 14:04:58 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
addr.sin_family = AF_INET;
|
|
|
|
addr.sin_port = htons(peer_port ? peer_port : 11203);
|
|
|
|
int peer_fd = socket(AF_INET, SOCK_STREAM, 0);
|
|
|
|
if (peer_fd < 0)
|
|
|
|
{
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, -errno);
|
2020-01-21 14:04:58 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
2020-04-20 15:43:07 +03:00
|
|
|
int timeout_id = -1;
|
|
|
|
if (peer_connect_timeout > 0)
|
|
|
|
{
|
2020-04-20 17:44:03 +03:00
|
|
|
timeout_id = tfd->set_timer(1000*peer_connect_timeout, false, [this, peer_fd](int timer_id)
|
2020-04-20 15:43:07 +03:00
|
|
|
{
|
|
|
|
auto callback = clients[peer_fd].connect_callback;
|
2020-04-20 17:44:03 +03:00
|
|
|
osd_num_t peer_osd = clients[peer_fd].osd_num;
|
2020-04-20 15:43:07 +03:00
|
|
|
stop_client(peer_fd);
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, -EIO);
|
2020-04-20 15:43:07 +03:00
|
|
|
return;
|
|
|
|
});
|
|
|
|
}
|
2020-01-21 14:04:58 +03:00
|
|
|
r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
|
|
|
if (r < 0 && errno != EINPROGRESS)
|
|
|
|
{
|
|
|
|
close(peer_fd);
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, -errno);
|
2020-01-21 14:04:58 +03:00
|
|
|
return;
|
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
assert(peer_osd != osd_num);
|
2020-01-21 14:04:58 +03:00
|
|
|
clients[peer_fd] = (osd_client_t){
|
|
|
|
.peer_addr = addr,
|
|
|
|
.peer_port = peer_port,
|
|
|
|
.peer_fd = peer_fd,
|
|
|
|
.peer_state = PEER_CONNECTING,
|
|
|
|
.connect_callback = callback,
|
2020-04-20 15:43:07 +03:00
|
|
|
.connect_timeout_id = timeout_id,
|
2020-04-20 17:44:03 +03:00
|
|
|
.osd_num = peer_osd,
|
2020-03-02 02:58:00 +03:00
|
|
|
.in_buf = malloc(receive_buffer_size),
|
2020-01-21 14:04:58 +03:00
|
|
|
};
|
2020-04-20 17:44:03 +03:00
|
|
|
osd_peer_fds[peer_osd] = peer_fd;
|
2020-01-21 14:04:58 +03:00
|
|
|
// Add FD to epoll (EPOLLOUT for tracking connect() result)
|
|
|
|
epoll_event ev;
|
|
|
|
ev.data.fd = peer_fd;
|
2020-02-29 01:46:12 +03:00
|
|
|
ev.events = EPOLLOUT | EPOLLIN | EPOLLRDHUP | EPOLLET;
|
2020-01-21 14:04:58 +03:00
|
|
|
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, peer_fd, &ev) < 0)
|
|
|
|
{
|
|
|
|
throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void osd_t::handle_connect_result(int peer_fd)
|
|
|
|
{
|
|
|
|
auto & cl = clients[peer_fd];
|
2020-04-20 15:43:07 +03:00
|
|
|
if (cl.connect_timeout_id >= 0)
|
|
|
|
{
|
|
|
|
tfd->clear_timer(cl.connect_timeout_id);
|
|
|
|
cl.connect_timeout_id = -1;
|
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
osd_num_t peer_osd = cl.osd_num;
|
2020-02-09 18:22:29 +03:00
|
|
|
auto callback = cl.connect_callback;
|
2020-01-21 14:04:58 +03:00
|
|
|
int result = 0;
|
|
|
|
socklen_t result_len = sizeof(result);
|
|
|
|
if (getsockopt(peer_fd, SOL_SOCKET, SO_ERROR, &result, &result_len) < 0)
|
|
|
|
{
|
|
|
|
result = errno;
|
|
|
|
}
|
|
|
|
if (result != 0)
|
|
|
|
{
|
|
|
|
stop_client(peer_fd);
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, -result);
|
2020-01-21 14:04:58 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
int one = 1;
|
|
|
|
setsockopt(peer_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
|
|
|
|
// Disable EPOLLOUT on this fd
|
|
|
|
cl.connect_callback = NULL;
|
|
|
|
cl.peer_state = PEER_CONNECTED;
|
|
|
|
epoll_event ev;
|
|
|
|
ev.data.fd = peer_fd;
|
2020-02-29 01:46:12 +03:00
|
|
|
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
|
2020-01-21 14:04:58 +03:00
|
|
|
if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, peer_fd, &ev) < 0)
|
|
|
|
{
|
|
|
|
throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno));
|
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
callback(peer_osd, peer_fd);
|
2020-01-21 14:04:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Peering loop
|
|
|
|
void osd_t::handle_peers()
|
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
if (peering_state & OSD_CONNECTING_PEERS)
|
2020-01-21 14:04:58 +03:00
|
|
|
{
|
2020-04-16 23:22:32 +03:00
|
|
|
load_and_connect_peers();
|
2020-01-21 14:04:58 +03:00
|
|
|
}
|
2020-02-11 02:30:46 +03:00
|
|
|
if (peering_state & OSD_PEERING_PGS)
|
2020-01-21 14:04:58 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
bool still = false;
|
|
|
|
for (auto & p: pgs)
|
2020-01-21 14:04:58 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
if (p.second.state == PG_PEERING)
|
2020-01-21 14:04:58 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
if (!p.second.peering_state->list_ops.size())
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
p.second.calc_object_states();
|
2020-03-31 17:50:50 +03:00
|
|
|
incomplete_objects += p.second.incomplete_objects.size();
|
|
|
|
misplaced_objects += p.second.misplaced_objects.size();
|
2020-04-04 02:18:29 +03:00
|
|
|
// FIXME: degraded objects may currently include misplaced, too! Report them separately?
|
2020-03-31 17:50:50 +03:00
|
|
|
degraded_objects += p.second.degraded_objects.size();
|
2020-03-13 21:41:54 +03:00
|
|
|
if (p.second.state & PG_HAS_UNCLEAN)
|
|
|
|
peering_state = peering_state | OSD_FLUSHING_PGS;
|
2020-03-25 02:17:12 +03:00
|
|
|
else
|
|
|
|
peering_state = peering_state | OSD_RECOVERING;
|
2020-01-22 02:36:14 +03:00
|
|
|
}
|
2020-02-11 02:30:46 +03:00
|
|
|
else
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
still = true;
|
2020-01-22 02:36:14 +03:00
|
|
|
}
|
2020-01-21 14:04:58 +03:00
|
|
|
}
|
|
|
|
}
|
2020-03-13 21:41:54 +03:00
|
|
|
if (!still)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
|
|
|
// Done all PGs
|
|
|
|
peering_state = peering_state & ~OSD_PEERING_PGS;
|
|
|
|
}
|
|
|
|
}
|
2020-04-11 12:05:28 +03:00
|
|
|
if ((peering_state & OSD_FLUSHING_PGS) && !readonly)
|
2020-03-13 21:41:54 +03:00
|
|
|
{
|
|
|
|
bool still = false;
|
|
|
|
for (auto & p: pgs)
|
|
|
|
{
|
|
|
|
if (p.second.state & PG_HAS_UNCLEAN)
|
|
|
|
{
|
|
|
|
if (!p.second.flush_batch)
|
|
|
|
{
|
|
|
|
submit_pg_flush_ops(p.first);
|
|
|
|
}
|
|
|
|
still = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!still)
|
|
|
|
{
|
2020-03-24 01:13:04 +03:00
|
|
|
peering_state = peering_state & ~OSD_FLUSHING_PGS | OSD_RECOVERING;
|
|
|
|
}
|
|
|
|
}
|
2020-04-11 12:05:28 +03:00
|
|
|
if ((peering_state & OSD_RECOVERING) && !readonly)
|
2020-03-24 01:13:04 +03:00
|
|
|
{
|
|
|
|
if (!continue_recovery())
|
|
|
|
{
|
|
|
|
peering_state = peering_state & ~OSD_RECOVERING;
|
2020-03-13 21:41:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-20 17:44:03 +03:00
|
|
|
void osd_t::repeer_pgs(osd_num_t peer_osd)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
|
|
|
// Re-peer affected PGs
|
2020-03-13 21:41:54 +03:00
|
|
|
for (auto & p: pgs)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
|
|
|
bool repeer = false;
|
2020-04-07 01:53:13 +03:00
|
|
|
if (p.second.state != PG_OFFLINE)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
for (osd_num_t pg_osd: p.second.all_peers)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-20 17:44:03 +03:00
|
|
|
if (pg_osd == peer_osd)
|
2020-04-07 01:53:13 +03:00
|
|
|
{
|
|
|
|
repeer = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (repeer)
|
|
|
|
{
|
|
|
|
// Repeer this pg
|
2020-04-20 17:44:03 +03:00
|
|
|
printf("Repeer PG %d because of OSD %lu\n", p.second.pg_num, peer_osd);
|
2020-04-07 01:53:13 +03:00
|
|
|
start_pg_peering(p.second.pg_num);
|
|
|
|
peering_state |= OSD_PEERING_PGS;
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
|
|
|
}
|
2020-01-21 14:04:58 +03:00
|
|
|
}
|
|
|
|
}
|
2020-01-22 02:36:14 +03:00
|
|
|
|
2020-02-11 02:30:46 +03:00
|
|
|
// Repeer on each connect/disconnect peer event
|
2020-03-13 21:41:54 +03:00
|
|
|
void osd_t::start_pg_peering(pg_num_t pg_num)
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
auto & pg = pgs[pg_num];
|
2020-02-11 02:30:46 +03:00
|
|
|
pg.state = PG_PEERING;
|
2020-03-14 22:19:45 +03:00
|
|
|
pg.print_state();
|
2020-04-03 13:03:42 +03:00
|
|
|
// Reset PG state
|
2020-02-11 02:30:46 +03:00
|
|
|
pg.state_dict.clear();
|
2020-03-31 17:50:50 +03:00
|
|
|
incomplete_objects -= pg.incomplete_objects.size();
|
|
|
|
misplaced_objects -= pg.misplaced_objects.size();
|
|
|
|
degraded_objects -= pg.degraded_objects.size();
|
2020-03-30 19:43:12 +03:00
|
|
|
pg.incomplete_objects.clear();
|
2020-03-24 01:13:04 +03:00
|
|
|
pg.misplaced_objects.clear();
|
|
|
|
pg.degraded_objects.clear();
|
2020-03-13 21:41:54 +03:00
|
|
|
pg.flush_actions.clear();
|
2020-03-30 19:43:12 +03:00
|
|
|
pg.ver_override.clear();
|
2020-03-13 21:41:54 +03:00
|
|
|
if (pg.flush_batch)
|
2020-03-30 19:43:12 +03:00
|
|
|
{
|
2020-03-13 21:41:54 +03:00
|
|
|
delete pg.flush_batch;
|
2020-03-30 19:43:12 +03:00
|
|
|
}
|
2020-03-13 21:41:54 +03:00
|
|
|
pg.flush_batch = NULL;
|
2020-03-30 19:43:12 +03:00
|
|
|
for (auto p: pg.write_queue)
|
|
|
|
{
|
|
|
|
cancel_op(p.second);
|
|
|
|
}
|
|
|
|
pg.write_queue.clear();
|
2020-03-31 02:09:25 +03:00
|
|
|
for (auto it = unstable_writes.begin(); it != unstable_writes.end(); )
|
|
|
|
{
|
2020-04-03 13:03:42 +03:00
|
|
|
// Forget this PG's unstable writes
|
2020-03-31 02:09:25 +03:00
|
|
|
pg_num_t n = (it->first.oid.inode + it->first.oid.stripe / pg_stripe_size) % pg_count + 1;
|
|
|
|
if (n == pg.pg_num)
|
|
|
|
unstable_writes.erase(it++);
|
|
|
|
else
|
|
|
|
it++;
|
|
|
|
}
|
2020-04-03 13:03:42 +03:00
|
|
|
pg.inflight = 0;
|
|
|
|
dirty_pgs.erase(pg.pg_num);
|
2020-04-19 00:20:18 +03:00
|
|
|
// Calculate current write OSD set
|
2020-02-11 02:30:46 +03:00
|
|
|
pg.pg_cursize = 0;
|
2020-04-17 02:33:44 +03:00
|
|
|
pg.cur_set.resize(pg.target_set.size());
|
|
|
|
for (int role = 0; role < pg.target_set.size(); role++)
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-04-17 02:33:44 +03:00
|
|
|
pg.cur_set[role] = pg.target_set[role] == this->osd_num ||
|
|
|
|
osd_peer_fds.find(pg.target_set[role]) != osd_peer_fds.end() ? pg.target_set[role] : 0;
|
2020-02-11 02:30:46 +03:00
|
|
|
if (pg.cur_set[role] != 0)
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-02-11 02:30:46 +03:00
|
|
|
pg.pg_cursize++;
|
|
|
|
}
|
|
|
|
}
|
2020-04-19 00:20:18 +03:00
|
|
|
if (pg.target_history.size())
|
|
|
|
{
|
|
|
|
// Refuse to start PG if no peers are available from any of the historical OSD sets
|
|
|
|
// (PG history is kept up to the latest active+clean state)
|
|
|
|
for (auto & history_set: pg.target_history)
|
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
for (auto history_osd: history_set)
|
|
|
|
{
|
|
|
|
if (history_osd != 0 && osd_peer_fds.find(history_osd) != osd_peer_fds.end())
|
|
|
|
{
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
pg.state = PG_INCOMPLETE;
|
|
|
|
pg.print_state();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-11 02:30:46 +03:00
|
|
|
if (pg.pg_cursize < pg.pg_minsize)
|
|
|
|
{
|
|
|
|
pg.state = PG_INCOMPLETE;
|
2020-03-14 22:19:45 +03:00
|
|
|
pg.print_state();
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-04-19 00:20:18 +03:00
|
|
|
std::set<osd_num_t> cur_peers;
|
|
|
|
for (auto peer_osd: pg.all_peers)
|
|
|
|
{
|
2020-04-20 17:44:03 +03:00
|
|
|
if (peer_osd == this->osd_num || osd_peer_fds.find(peer_osd) != osd_peer_fds.end())
|
2020-04-19 00:20:18 +03:00
|
|
|
{
|
|
|
|
cur_peers.insert(peer_osd);
|
|
|
|
}
|
2020-04-19 01:01:26 +03:00
|
|
|
else if (wanted_peers.find(peer_osd) == wanted_peers.end())
|
|
|
|
{
|
|
|
|
wanted_peers[peer_osd] = { 0 };
|
|
|
|
peering_state |= OSD_CONNECTING_PEERS;
|
|
|
|
}
|
2020-04-19 00:20:18 +03:00
|
|
|
}
|
2020-02-11 02:30:46 +03:00
|
|
|
if (pg.peering_state)
|
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
// Adjust the peering operation that's still in progress - discard unneeded results
|
2020-04-20 17:44:03 +03:00
|
|
|
for (auto it = pg.peering_state->list_ops.begin(); it != pg.peering_state->list_ops.end();)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
if (pg.state == PG_INCOMPLETE || cur_peers.find(it->first) == cur_peers.end())
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
|
|
|
// Discard the result after completion, which, chances are, will be unsuccessful
|
2020-02-13 19:13:16 +03:00
|
|
|
auto list_op = it->second;
|
2020-02-11 02:30:46 +03:00
|
|
|
if (list_op->peer_fd == 0)
|
|
|
|
{
|
|
|
|
// Self
|
2020-02-23 23:19:11 +03:00
|
|
|
list_op->bs_op->callback = [list_op](blockstore_op_t *bs_op)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-02-23 23:19:11 +03:00
|
|
|
if (list_op->bs_op->buf)
|
|
|
|
free(list_op->bs_op->buf);
|
2020-02-11 02:30:46 +03:00
|
|
|
delete list_op;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Peer
|
|
|
|
list_op->callback = [](osd_op_t *list_op)
|
|
|
|
{
|
|
|
|
delete list_op;
|
|
|
|
};
|
|
|
|
}
|
2020-02-13 19:13:16 +03:00
|
|
|
pg.peering_state->list_ops.erase(it);
|
|
|
|
it = pg.peering_state->list_ops.begin();
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
else
|
|
|
|
it++;
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
for (auto it = pg.peering_state->list_results.begin(); it != pg.peering_state->list_results.end();)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
if (pg.state == PG_INCOMPLETE || cur_peers.find(it->first) == cur_peers.end())
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-02-24 01:01:34 +03:00
|
|
|
if (it->second.buf)
|
|
|
|
{
|
|
|
|
free(it->second.buf);
|
|
|
|
}
|
|
|
|
pg.peering_state->list_results.erase(it);
|
|
|
|
it = pg.peering_state->list_results.begin();
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-04-20 17:44:03 +03:00
|
|
|
else
|
|
|
|
it++;
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-01-22 02:36:14 +03:00
|
|
|
}
|
2020-02-11 02:30:46 +03:00
|
|
|
if (pg.state == PG_INCOMPLETE)
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-02-11 02:30:46 +03:00
|
|
|
if (pg.peering_state)
|
|
|
|
{
|
|
|
|
delete pg.peering_state;
|
|
|
|
pg.peering_state = NULL;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!pg.peering_state)
|
|
|
|
{
|
|
|
|
pg.peering_state = new pg_peering_state_t();
|
2020-04-19 00:20:18 +03:00
|
|
|
pg.peering_state->pg_num = pg.pg_num;
|
2020-02-11 02:30:46 +03:00
|
|
|
}
|
2020-04-19 00:20:18 +03:00
|
|
|
for (osd_num_t peer_osd: cur_peers)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
if (pg.peering_state->list_ops.find(peer_osd) != pg.peering_state->list_ops.end() ||
|
|
|
|
pg.peering_state->list_results.find(peer_osd) != pg.peering_state->list_results.end())
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
2020-04-19 00:20:18 +03:00
|
|
|
submit_list_subop(peer_osd, pg.peering_state);
|
|
|
|
}
|
|
|
|
ringloop->wakeup();
|
|
|
|
}
|
|
|
|
|
|
|
|
void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
|
|
|
|
{
|
|
|
|
if (role_osd == this->osd_num)
|
|
|
|
{
|
|
|
|
// Self
|
|
|
|
osd_op_t *op = new osd_op_t();
|
|
|
|
op->op_type = 0;
|
|
|
|
op->peer_fd = 0;
|
|
|
|
op->bs_op = new blockstore_op_t();
|
|
|
|
op->bs_op->opcode = BS_OP_LIST;
|
|
|
|
op->bs_op->oid.stripe = pg_stripe_size;
|
|
|
|
op->bs_op->len = pg_count;
|
|
|
|
op->bs_op->offset = ps->pg_num-1;
|
|
|
|
op->bs_op->callback = [ps, op, role_osd](blockstore_op_t *bs_op)
|
2020-01-22 02:36:14 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
if (op->bs_op->retval < 0)
|
2020-01-24 02:23:27 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
throw std::runtime_error("local OP_LIST failed");
|
|
|
|
}
|
|
|
|
printf(
|
|
|
|
"[PG %u] Got object list from OSD %lu (local): %d object versions (%lu of them stable)\n",
|
|
|
|
ps->pg_num, role_osd, bs_op->retval, bs_op->version
|
|
|
|
);
|
|
|
|
ps->list_results[role_osd] = {
|
|
|
|
.buf = (obj_ver_id*)op->bs_op->buf,
|
|
|
|
.total_count = (uint64_t)op->bs_op->retval,
|
|
|
|
.stable_count = op->bs_op->version,
|
2020-01-24 02:23:27 +03:00
|
|
|
};
|
2020-04-19 00:20:18 +03:00
|
|
|
ps->list_ops.erase(role_osd);
|
|
|
|
delete op;
|
|
|
|
};
|
|
|
|
bs->enqueue_op(op->bs_op);
|
|
|
|
ps->list_ops[role_osd] = op;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Peer
|
|
|
|
auto & cl = clients[osd_peer_fds[role_osd]];
|
|
|
|
osd_op_t *op = new osd_op_t();
|
|
|
|
op->op_type = OSD_OP_OUT;
|
|
|
|
op->send_list.push_back(op->req.buf, OSD_PACKET_SIZE);
|
|
|
|
op->peer_fd = cl.peer_fd;
|
|
|
|
op->req = {
|
|
|
|
.sec_list = {
|
|
|
|
.header = {
|
|
|
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
|
|
|
.id = this->next_subop_id++,
|
|
|
|
.opcode = OSD_OP_SECONDARY_LIST,
|
2020-02-11 02:30:46 +03:00
|
|
|
},
|
2020-04-19 00:20:18 +03:00
|
|
|
.list_pg = ps->pg_num,
|
|
|
|
.pg_count = pg_count,
|
|
|
|
.pg_stripe_size = pg_stripe_size,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
op->callback = [this, ps, role_osd](osd_op_t *op)
|
|
|
|
{
|
|
|
|
if (op->reply.hdr.retval < 0)
|
2020-02-11 02:30:46 +03:00
|
|
|
{
|
2020-04-19 00:20:18 +03:00
|
|
|
printf("Failed to get object list from OSD %lu (retval=%ld), disconnecting peer\n", role_osd, op->reply.hdr.retval);
|
2020-02-11 02:30:46 +03:00
|
|
|
ps->list_ops.erase(role_osd);
|
2020-04-19 00:20:18 +03:00
|
|
|
stop_client(op->peer_fd);
|
2020-02-11 02:30:46 +03:00
|
|
|
delete op;
|
2020-04-19 00:20:18 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
printf(
|
|
|
|
"[PG %u] Got object list from OSD %lu: %ld object versions (%lu of them stable)\n",
|
|
|
|
ps->pg_num, role_osd, op->reply.hdr.retval, op->reply.sec_list.stable_count
|
|
|
|
);
|
|
|
|
ps->list_results[role_osd] = {
|
|
|
|
.buf = (obj_ver_id*)op->buf,
|
|
|
|
.total_count = (uint64_t)op->reply.hdr.retval,
|
|
|
|
.stable_count = op->reply.sec_list.stable_count,
|
2020-02-11 02:30:46 +03:00
|
|
|
};
|
2020-04-19 00:20:18 +03:00
|
|
|
// set op->buf to NULL so it doesn't get freed
|
|
|
|
op->buf = NULL;
|
|
|
|
ps->list_ops.erase(role_osd);
|
|
|
|
delete op;
|
|
|
|
};
|
|
|
|
outbox_push(cl, op);
|
|
|
|
ps->list_ops[role_osd] = op;
|
2020-01-22 02:36:14 +03:00
|
|
|
}
|
|
|
|
}
|
2020-04-03 13:03:42 +03:00
|
|
|
|
|
|
|
bool osd_t::stop_pg(pg_num_t pg_num)
|
|
|
|
{
|
|
|
|
auto pg_it = pgs.find(pg_num);
|
|
|
|
if (pg_it == pgs.end())
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
auto & pg = pg_it->second;
|
|
|
|
if (!(pg.state & PG_ACTIVE))
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
pg.state = pg.state & ~PG_ACTIVE | PG_STOPPING;
|
|
|
|
if (pg.inflight == 0)
|
|
|
|
{
|
|
|
|
finish_stop_pg(pg);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void osd_t::finish_stop_pg(pg_t & pg)
|
|
|
|
{
|
|
|
|
pg.state = PG_OFFLINE;
|
|
|
|
}
|