Compare commits
8 Commits
Author | SHA1 | Date |
---|---|---|
|
df2847df2d | |
|
327c98a4b6 | |
|
3cc0abfd81 | |
|
80e5f8ba76 | |
|
4b660f1ce8 | |
|
dfde0e60f0 | |
|
013f688ffe | |
|
cf9738ddbe |
docker
etc/apt/sources.list.d
mon
|
@ -3,7 +3,7 @@ VITASTOR_VERSION ?= v2.1.0
|
|||
all: build push
|
||||
|
||||
build:
|
||||
@docker build --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
|
||||
@docker build --no-cache --rm -t vitalif/vitastor:$(VITASTOR_VERSION) .
|
||||
|
||||
push:
|
||||
@docker push vitalif/vitastor:$(VITASTOR_VERSION)
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
deb http://vitastor.io/debian bookworm main
|
||||
deb http://http.debian.net/debian/ bookworm-backports main
|
||||
|
|
|
@ -342,7 +342,7 @@ function filter_tree_by_rules(osd_tree, rules, selected)
|
|||
// Convert from
|
||||
// node_list = { id: string|number, level: string, size?: number, parent?: string|number }[]
|
||||
// to
|
||||
// node_tree = { [node_id]: { id, level, size?, parent?, children?: child_node_id[], levels: { [level]: id, ... } } }
|
||||
// node_tree = { [node_id]: { id, level, size?, parent?, children?: child_node[], levels: { [level]: id, ... } } }
|
||||
function index_tree(node_list)
|
||||
{
|
||||
const tree = { '': { children: [], levels: {} } };
|
||||
|
@ -357,7 +357,7 @@ function index_tree(node_list)
|
|||
tree[parent_id].children = tree[parent_id].children || [];
|
||||
tree[parent_id].children.push(tree[node.id]);
|
||||
}
|
||||
const cur = tree[''].children;
|
||||
const cur = [ ...tree[''].children ];
|
||||
for (let i = 0; i < cur.length; i++)
|
||||
{
|
||||
cur[i].levels[cur[i].level] = cur[i].id;
|
||||
|
|
|
@ -15,7 +15,7 @@ function get_osd_tree(global_config, state)
|
|||
const stat = state.osd.stats[osd_num];
|
||||
const osd_cfg = state.config.osd[osd_num];
|
||||
let reweight = osd_cfg == null ? 1 : Number(osd_cfg.reweight);
|
||||
if (reweight < 0 || isNaN(reweight))
|
||||
if (isNaN(reweight) || reweight < 0 || reweight > 0)
|
||||
reweight = 1;
|
||||
if (stat && stat.size && reweight && (state.osd.state[osd_num] || Number(stat.time) >= down_time ||
|
||||
osd_cfg && osd_cfg.noout))
|
||||
|
|
|
@ -40,6 +40,11 @@ async function run()
|
|||
console.log("/etc/systemd/system/vitastor-etcd.service already exists");
|
||||
process.exit(1);
|
||||
}
|
||||
if (!in_docker && fs.existsSync("/etc/systemd/system/etcd.service"))
|
||||
{
|
||||
console.log("/etc/systemd/system/etcd.service already exists");
|
||||
process.exit(1);
|
||||
}
|
||||
const config = JSON.parse(fs.readFileSync(config_path, { encoding: 'utf-8' }));
|
||||
if (!config.etcd_address)
|
||||
{
|
||||
|
@ -97,8 +102,8 @@ WantedBy=multi-user.target
|
|||
`);
|
||||
await system(`useradd etcd`);
|
||||
await system(`systemctl daemon-reload`);
|
||||
await system(`systemctl enable etcd`);
|
||||
await system(`systemctl start etcd`);
|
||||
await system(`systemctl enable vitastor-etcd`);
|
||||
await system(`systemctl start vitastor-etcd`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -628,7 +628,7 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
|||
},
|
||||
};
|
||||
#ifdef WITH_RDMA
|
||||
if (rdma_contexts.size())
|
||||
if (!use_rdmacm && rdma_contexts.size())
|
||||
{
|
||||
// Choose the right context for the selected network
|
||||
msgr_rdma_context_t *selected_ctx = choose_rdma_context(cl);
|
||||
|
@ -701,7 +701,7 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
|||
return;
|
||||
}
|
||||
#ifdef WITH_RDMA
|
||||
if (cl->rdma_conn && config["rdma_address"].is_string())
|
||||
if (!use_rdmacm && cl->rdma_conn && config["rdma_address"].is_string())
|
||||
{
|
||||
msgr_rdma_address_t addr;
|
||||
if (!msgr_rdma_address_t::from_string(config["rdma_address"].string_value().c_str(), &addr) ||
|
||||
|
@ -800,7 +800,8 @@ bool osd_messenger_t::is_rdma_enabled()
|
|||
{
|
||||
return rdma_contexts.size() > 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
#ifdef WITH_RDMACM
|
||||
bool osd_messenger_t::is_use_rdmacm()
|
||||
{
|
||||
return use_rdmacm;
|
||||
|
|
|
@ -97,6 +97,7 @@ struct osd_wanted_peer_t
|
|||
json11::Json raw_address_list;
|
||||
json11::Json address_list;
|
||||
int port = 0;
|
||||
// FIXME: Remove separate WITH_RDMACM?
|
||||
#ifdef WITH_RDMACM
|
||||
int rdmacm_port = 0;
|
||||
#endif
|
||||
|
@ -286,6 +287,7 @@ protected:
|
|||
msgr_rdma_context_t* rdmacm_create_qp(rdma_cm_id *cmid);
|
||||
void rdmacm_accept(rdma_cm_event *ev);
|
||||
void rdmacm_try_connect_peer(uint64_t peer_osd, const std::string & addr, int rdmacm_port, int fallback_tcp_port);
|
||||
void rdmacm_set_conn_timeout(rdmacm_connecting_t *conn);
|
||||
void rdmacm_on_connect_peer_error(rdma_cm_id *cmid, int res);
|
||||
void rdmacm_address_resolved(rdma_cm_event *ev);
|
||||
void rdmacm_route_resolved(rdma_cm_event *ev);
|
||||
|
|
|
@ -70,6 +70,7 @@ msgr_rdma_context_t::~msgr_rdma_context_t()
|
|||
msgr_rdma_connection_t::~msgr_rdma_connection_t()
|
||||
{
|
||||
ctx->reserve_cqe(-max_send-max_recv);
|
||||
#ifdef WITH_RDMACM
|
||||
if (qp && !cmid)
|
||||
ibv_destroy_qp(qp);
|
||||
if (cmid)
|
||||
|
@ -79,6 +80,10 @@ msgr_rdma_connection_t::~msgr_rdma_connection_t()
|
|||
rdma_destroy_qp(cmid);
|
||||
rdma_destroy_id(cmid);
|
||||
}
|
||||
#else
|
||||
if (qp)
|
||||
ibv_destroy_qp(qp);
|
||||
#endif
|
||||
if (recv_buffers.size())
|
||||
{
|
||||
for (auto b: recv_buffers)
|
||||
|
|
|
@ -70,7 +70,7 @@ void osd_messenger_t::rdmacm_destroy_listener(rdma_cm_id *listener)
|
|||
|
||||
void osd_messenger_t::handle_rdmacm_events()
|
||||
{
|
||||
// rdma_destroy_id infinitely waits for pthread_cond if called before all events are acked :-(
|
||||
// rdma_destroy_id infinitely waits for pthread_cond if called before all events are acked :-(...
|
||||
std::vector<rdma_cm_event> events_copy;
|
||||
while (1)
|
||||
{
|
||||
|
@ -83,7 +83,15 @@ void osd_messenger_t::handle_rdmacm_events()
|
|||
fprintf(stderr, "Failed to get RDMA-CM event: %s (code %d)\n", strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
events_copy.push_back(*ev);
|
||||
// ...so we save a copy of all events EXCEPT connection requests, otherwise they sometimes fail with EVENT_DISCONNECT
|
||||
if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST)
|
||||
{
|
||||
rdmacm_accept(ev);
|
||||
}
|
||||
else
|
||||
{
|
||||
events_copy.push_back(*ev);
|
||||
}
|
||||
r = rdma_ack_cm_event(ev);
|
||||
if (r != 0)
|
||||
{
|
||||
|
@ -96,7 +104,7 @@ void osd_messenger_t::handle_rdmacm_events()
|
|||
auto ev = &evl;
|
||||
if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST)
|
||||
{
|
||||
rdmacm_accept(ev);
|
||||
// Do nothing, handled above
|
||||
}
|
||||
else if (ev->event == RDMA_CM_EVENT_CONNECT_ERROR ||
|
||||
ev->event == RDMA_CM_EVENT_REJECTED ||
|
||||
|
@ -287,29 +295,34 @@ void osd_messenger_t::rdmacm_accept(rdma_cm_event *ev)
|
|||
rdma_destroy_id(ev->id);
|
||||
return;
|
||||
}
|
||||
rdma_context->cm_refs++;
|
||||
// Wrap into a new msgr_rdma_connection_t
|
||||
msgr_rdma_connection_t *conn = new msgr_rdma_connection_t;
|
||||
conn->ctx = rdma_context;
|
||||
conn->max_send = rdma_max_send;
|
||||
conn->max_recv = rdma_max_recv;
|
||||
conn->max_sge = rdma_max_sge > rdma_context->attrx.orig_attr.max_sge
|
||||
? rdma_context->attrx.orig_attr.max_sge : rdma_max_sge;
|
||||
conn->max_msg = rdma_max_msg;
|
||||
// Wait for RDMA_CM_ESTABLISHED, and enable the connection only after it
|
||||
auto conn = new rdmacm_connecting_t;
|
||||
conn->cmid = ev->id;
|
||||
conn->qp = ev->id->qp;
|
||||
auto cl = new osd_client_t();
|
||||
cl->peer_fd = fake_fd;
|
||||
cl->peer_state = PEER_RDMA;
|
||||
cl->peer_addr = *(sockaddr_storage*)rdma_get_peer_addr(ev->id);
|
||||
cl->in_buf = malloc_or_die(receive_buffer_size);
|
||||
cl->rdma_conn = conn;
|
||||
clients[fake_fd] = cl;
|
||||
rdmacm_connections[ev->id] = cl;
|
||||
// Add initial receive request(s)
|
||||
try_recv_rdma(cl);
|
||||
fprintf(stderr, "[OSD %ju] new client %d: connection from %s via RDMA-CM\n", this->osd_num, fake_fd,
|
||||
addr_to_string(cl->peer_addr).c_str());
|
||||
conn->peer_fd = fake_fd;
|
||||
conn->parsed_addr = *(sockaddr_storage*)rdma_get_peer_addr(ev->id);
|
||||
conn->rdma_context = rdma_context;
|
||||
rdmacm_set_conn_timeout(conn);
|
||||
rdmacm_connecting[ev->id] = conn;
|
||||
fprintf(stderr, "[OSD %ju] new client %d: connection from %s via RDMA-CM\n", this->osd_num, conn->peer_fd,
|
||||
addr_to_string(conn->parsed_addr).c_str());
|
||||
}
|
||||
|
||||
void osd_messenger_t::rdmacm_set_conn_timeout(rdmacm_connecting_t *conn)
|
||||
{
|
||||
conn->timeout_ms = peer_connect_timeout*1000;
|
||||
if (peer_connect_timeout > 0)
|
||||
{
|
||||
conn->timeout_id = tfd->set_timer(1000*peer_connect_timeout, false, [this, cmid = conn->cmid](int timer_id)
|
||||
{
|
||||
auto conn = rdmacm_connecting.at(cmid);
|
||||
conn->timeout_id = -1;
|
||||
if (conn->peer_osd)
|
||||
fprintf(stderr, "RDMA-CM connection to %s timed out\n", conn->addr.c_str());
|
||||
else
|
||||
fprintf(stderr, "Incoming RDMA-CM connection from %s timed out\n", addr_to_string(conn->parsed_addr).c_str());
|
||||
rdmacm_on_connect_peer_error(cmid, -EPIPE);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void osd_messenger_t::rdmacm_on_connect_peer_error(rdma_cm_id *cmid, int res)
|
||||
|
@ -332,15 +345,18 @@ void osd_messenger_t::rdmacm_on_connect_peer_error(rdma_cm_id *cmid, int res)
|
|||
}
|
||||
rdmacm_connecting.erase(cmid);
|
||||
delete conn;
|
||||
if (!disable_tcp)
|
||||
if (peer_osd)
|
||||
{
|
||||
// Fall back to TCP instead of just reporting the error to on_connect_peer()
|
||||
try_connect_peer_tcp(peer_osd, addr.c_str(), tcp_port);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TCP is disabled
|
||||
on_connect_peer(peer_osd, res == 0 ? -EINVAL : (res > 0 ? -res : res));
|
||||
if (!disable_tcp)
|
||||
{
|
||||
// Fall back to TCP instead of just reporting the error to on_connect_peer()
|
||||
try_connect_peer_tcp(peer_osd, addr.c_str(), tcp_port);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TCP is disabled
|
||||
on_connect_peer(peer_osd, res == 0 ? -EINVAL : (res > 0 ? -res : res));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,6 +390,8 @@ void osd_messenger_t::rdmacm_try_connect_peer(uint64_t peer_osd, const std::stri
|
|||
on_connect_peer(peer_osd, res);
|
||||
return;
|
||||
}
|
||||
if (log_level > 0)
|
||||
fprintf(stderr, "Trying to connect to OSD %ju at %s:%d via RDMA-CM\n", peer_osd, addr.c_str(), rdmacm_port);
|
||||
auto conn = new rdmacm_connecting_t;
|
||||
rdmacm_connecting[cmid] = conn;
|
||||
conn->cmid = cmid;
|
||||
|
@ -383,19 +401,7 @@ void osd_messenger_t::rdmacm_try_connect_peer(uint64_t peer_osd, const std::stri
|
|||
conn->parsed_addr = sa;
|
||||
conn->rdmacm_port = rdmacm_port;
|
||||
conn->tcp_port = fallback_tcp_port;
|
||||
conn->timeout_ms = peer_connect_timeout*1000;
|
||||
conn->timeout_id = -1;
|
||||
if (peer_connect_timeout > 0)
|
||||
{
|
||||
conn->timeout_id = tfd->set_timer(1000*peer_connect_timeout, false, [this, cmid](int timer_id)
|
||||
{
|
||||
auto conn = rdmacm_connecting.at(cmid);
|
||||
conn->timeout_id = -1;
|
||||
fprintf(stderr, "RDMA-CM connection to %s timed out\n", conn->addr.c_str());
|
||||
rdmacm_on_connect_peer_error(cmid, -EPIPE);
|
||||
return;
|
||||
});
|
||||
}
|
||||
rdmacm_set_conn_timeout(conn);
|
||||
if (rdma_resolve_addr(cmid, NULL, (sockaddr*)&conn->parsed_addr, conn->timeout_ms) != 0)
|
||||
{
|
||||
auto res = -errno;
|
||||
|
@ -494,7 +500,7 @@ void osd_messenger_t::rdmacm_established(rdma_cm_event *ev)
|
|||
// Wrap into a new msgr_rdma_connection_t
|
||||
msgr_rdma_connection_t *rc = new msgr_rdma_connection_t;
|
||||
rc->ctx = conn->rdma_context;
|
||||
rc->ctx->cm_refs++;
|
||||
rc->ctx->cm_refs++; // FIXME now unused, count also connecting_t's when used
|
||||
rc->max_send = rdma_max_send;
|
||||
rc->max_recv = rdma_max_recv;
|
||||
rc->max_sge = rdma_max_sge > rc->ctx->attrx.orig_attr.max_sge
|
||||
|
@ -514,14 +520,20 @@ void osd_messenger_t::rdmacm_established(rdma_cm_event *ev)
|
|||
cl->rdma_conn = rc;
|
||||
clients[conn->peer_fd] = cl;
|
||||
if (conn->timeout_id >= 0)
|
||||
{
|
||||
tfd->clear_timer(conn->timeout_id);
|
||||
}
|
||||
delete conn;
|
||||
rdmacm_connecting.erase(cmid);
|
||||
rdmacm_connections[cmid] = cl;
|
||||
if (log_level > 0)
|
||||
if (log_level > 0 && peer_osd)
|
||||
{
|
||||
fprintf(stderr, "Successfully connected with OSD %ju using RDMA-CM\n", peer_osd);
|
||||
}
|
||||
// Add initial receive request(s)
|
||||
try_recv_rdma(cl);
|
||||
osd_peer_fds[peer_osd] = cl->peer_fd;
|
||||
on_connect_peer(peer_osd, cl->peer_fd);
|
||||
if (peer_osd)
|
||||
{
|
||||
check_peer_config(cl);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,12 @@ struct osd_changer_t
|
|||
state = 100;
|
||||
return;
|
||||
}
|
||||
if (set_reweight && new_reweight > 1)
|
||||
{
|
||||
result = (cli_result_t){ .err = EINVAL, .text = "Reweight can't be larger than 1" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
parent->etcd_txn(json11::Json::object {
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
|
|
|
@ -22,8 +22,8 @@ int nfs3_fsstat_proc(void *opaque, rpc_op_t *rop)
|
|||
{
|
||||
auto ttb = pst_it->second["total_raw_tb"].number_value();
|
||||
auto ftb = (pst_it->second["total_raw_tb"].number_value() - pst_it->second["used_raw_tb"].number_value());
|
||||
tbytes = ttb / pst_it->second["raw_to_usable"].number_value() * ((uint64_t)2<<40);
|
||||
fbytes = ftb / pst_it->second["raw_to_usable"].number_value() * ((uint64_t)2<<40);
|
||||
tbytes = ttb / pst_it->second["raw_to_usable"].number_value() * ((uint64_t)1<<40);
|
||||
fbytes = ftb / pst_it->second["raw_to_usable"].number_value() * ((uint64_t)1<<40);
|
||||
}
|
||||
*reply = (FSSTAT3res){
|
||||
.status = NFS3_OK,
|
||||
|
|
Loading…
Reference in New Issue