Compare commits

...

3 Commits

Author SHA1 Message Date
Vitaliy Filippov 60a300621e Support scattered write in node.js binding
Test / test_rebalance_verify (push) Successful in 1m27s Details
Test / test_rebalance_verify_imm (push) Successful in 1m26s Details
Test / test_root_node (push) Successful in 6s Details
Test / test_rebalance_verify_ec (push) Successful in 1m34s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m35s Details
Test / test_write_no_same (push) Successful in 7s Details
Test / test_switch_primary (push) Successful in 31s Details
Test / test_write (push) Successful in 30s Details
Test / test_write_xor (push) Successful in 33s Details
Test / test_heal_pg_size_2 (push) Successful in 2m15s Details
Test / test_heal_antietcd (push) Successful in 2m16s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m18s Details
Test / test_heal_csum_32k (push) Successful in 2m19s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m13s Details
Test / test_osd_tags (push) Successful in 9s Details
Test / test_enospc (push) Successful in 10s Details
Test / test_enospc_xor (push) Successful in 11s Details
Test / test_enospc_imm (push) Successful in 10s Details
Test / test_enospc_imm_xor (push) Successful in 13s Details
Test / test_scrub (push) Successful in 13s Details
Test / test_scrub_zero_osd_2 (push) Successful in 12s Details
Test / test_scrub_xor (push) Successful in 12s Details
Test / test_scrub_pg_size_3 (push) Successful in 14s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m14s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s Details
Test / test_heal_csum_4k (push) Successful in 2m13s Details
Test / test_nfs (push) Successful in 11s Details
Test / test_scrub_ec (push) Successful in 16s Details
Test / test_heal_ec (push) Failing after 10m7s Details
2024-07-31 01:15:38 +03:00
Vitaliy Filippov 731a902cd0 Fix a rare client hang on PG state changes 2024-07-31 01:15:38 +03:00
Vitaliy Filippov 94a13d1d2e Always continue operations to not miss resuming after the lack of PG primary
Should fix spurious client hangs during PG primary switchover
2024-07-31 01:15:37 +03:00
3 changed files with 38 additions and 15 deletions

View File

@ -24,6 +24,7 @@ public:
} }
iovec iov; iovec iov;
std::vector<iovec> iov_list;
NodeVitastorImage *img = NULL; NodeVitastorImage *img = NULL;
int op = 0; int op = 0;
uint64_t offset = 0, len = 0, version = 0; uint64_t offset = 0, len = 0, version = 0;
@ -141,10 +142,8 @@ NAN_METHOD(NodeVitastor::Read)
static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos) static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{ {
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust(); uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
char *buf = node::Buffer::Data(info[argpos+1]); const auto & bufarg = info[argpos+1];
uint64_t len = node::Buffer::Length(info[argpos+1]);
uint64_t version = 0; uint64_t version = 0;
if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject()) if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject())
{ {
auto key = Nan::New<v8::String>("version").ToLocalChecked(); auto key = Nan::New<v8::String>("version").ToLocalChecked();
@ -159,14 +158,33 @@ static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::
auto req = new NodeVitastorRequest(callback); auto req = new NodeVitastorRequest(callback);
req->offset = offset; req->offset = offset;
req->len = len;
req->version = version; req->version = version;
req->iov = { .iov_base = buf, .iov_len = req->len };
if (bufarg->IsArray())
{
auto buffers = bufarg.As<v8::Array>();
req->len = 0;
for (uint32_t i = 0; i < buffers->Length(); i++)
{
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
char *buf = node::Buffer::Data(buffer_obj);
uint64_t len = node::Buffer::Length(buffer_obj);
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
req->len += len;
}
}
else
{
char *buf = node::Buffer::Data(bufarg);
uint64_t len = node::Buffer::Length(bufarg);
req->iov = { .iov_base = buf, .iov_len = req->len };
req->len = len;
}
return req; return req;
} }
// write(pool, inode, offset, buffer, { version }?, callback(err)) // write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
NAN_METHOD(NodeVitastor::Write) NAN_METHOD(NodeVitastor::Write)
{ {
TRACE("NodeVitastor::Write"); TRACE("NodeVitastor::Write");
@ -179,7 +197,10 @@ NAN_METHOD(NodeVitastor::Write)
auto req = getWriteRequest(info, 2); auto req = getWriteRequest(info, 2);
std::unique_lock<std::mutex> lock(self->mu); std::unique_lock<std::mutex> lock(self->mu);
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version, &req->iov, 1, on_write_finish, req); vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
on_write_finish, req);
} }
// sync(callback(err)) // sync(callback(err))
@ -421,7 +442,10 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
else if (req->op == NODE_VITASTOR_WRITE) else if (req->op == NODE_VITASTOR_WRITE)
{ {
uint64_t ino = vitastor_c_inode_get_num(watch); uint64_t ino = vitastor_c_inode_get_num(watch);
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version, &req->iov, 1, NodeVitastor::on_write_finish, req); vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
NodeVitastor::on_write_finish, req);
} }
else if (req->op == NODE_VITASTOR_SYNC) else if (req->op == NODE_VITASTOR_SYNC)
{ {

View File

@ -19,7 +19,7 @@ public:
static NAN_METHOD(Create); static NAN_METHOD(Create);
// read(pool, inode, offset, len, callback(err, buffer, version)) // read(pool, inode, offset, len, callback(err, buffer, version))
static NAN_METHOD(Read); static NAN_METHOD(Read);
// write(pool, inode, offset, buffer, { version }?, callback(err)) // write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write); static NAN_METHOD(Write);
// sync(callback(err)) // sync(callback(err))
static NAN_METHOD(Sync); static NAN_METHOD(Sync);
@ -56,7 +56,7 @@ public:
static NAN_METHOD(Create); static NAN_METHOD(Create);
// read(offset, len, callback(err, buffer, version)) // read(offset, len, callback(err, buffer, version))
static NAN_METHOD(Read); static NAN_METHOD(Read);
// write(offset, buffer, { version }?, callback(err)) // write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write); static NAN_METHOD(Write);
// sync(callback(err)) // sync(callback(err))
static NAN_METHOD(Sync); static NAN_METHOD(Sync);

View File

@ -452,11 +452,10 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
if (pg_cfg.cur_primary != prev_primary) if (pg_cfg.cur_primary != prev_primary)
{ {
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations // Repeat this PG operations because an OSD which stopped being primary may not fsync operations
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0) wb->repeat_ops_for(this, 0, pool_id, pg_num);
{
continue_ops();
}
} }
// Always continue to resume operations hung because of lack of the primary OSD
continue_ops();
} }
bool cluster_client_t::get_immediate_commit(uint64_t inode) bool cluster_client_t::get_immediate_commit(uint64_t inode)
@ -1066,11 +1065,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
!pg_it->second.pause && pg_it->second.cur_primary) !pg_it->second.pause && pg_it->second.cur_primary)
{ {
osd_num_t primary_osd = pg_it->second.cur_primary; osd_num_t primary_osd = pg_it->second.cur_primary;
part->osd_num = primary_osd;
auto peer_it = msgr.osd_peer_fds.find(primary_osd); auto peer_it = msgr.osd_peer_fds.find(primary_osd);
if (peer_it != msgr.osd_peer_fds.end()) if (peer_it != msgr.osd_peer_fds.end())
{ {
int peer_fd = peer_it->second; int peer_fd = peer_it->second;
part->osd_num = primary_osd;
part->flags |= PART_SENT; part->flags |= PART_SENT;
op->inflight_count++; op->inflight_count++;
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * ( uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (