forked from vitalif/vitastor
Compare commits
14 Commits
master
...
hotfix-1.0
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | fc83e3821c | |
Vitaliy Filippov | 0d707fc83b | |
Vitaliy Filippov | 4f99f78430 | |
Vitaliy Filippov | f926f8c2e0 | |
Vitaliy Filippov | d73ad12c56 | |
Vitaliy Filippov | d68cec10e2 | |
Vitaliy Filippov | 9afa200a33 | |
Vitaliy Filippov | aff6f3e970 | |
Vitaliy Filippov | 49fca80f1c | |
Vitaliy Filippov | a028b4fa4c | |
Vitaliy Filippov | da2bfd0b1e | |
Vitaliy Filippov | cec5ceab77 | |
Vitaliy Filippov | dc90faec7e | |
Vitaliy Filippov | 20c62a4244 |
16
mon/mon.js
16
mon/mon.js
|
@ -539,10 +539,18 @@ class Mon
|
||||||
{
|
{
|
||||||
retries = 1;
|
retries = 1;
|
||||||
}
|
}
|
||||||
|
const tried = {};
|
||||||
while (retries < 0 || retry < retries)
|
while (retries < 0 || retry < retries)
|
||||||
{
|
{
|
||||||
const cur_addr = this.pick_next_etcd();
|
const cur_addr = this.pick_next_etcd();
|
||||||
const base = 'ws'+cur_addr.substr(4);
|
const base = 'ws'+cur_addr.substr(4);
|
||||||
|
let now = Date.now();
|
||||||
|
if (tried[base] && now-tried[base] < timeout)
|
||||||
|
{
|
||||||
|
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
|
||||||
|
now = Date.now();
|
||||||
|
}
|
||||||
|
tried[base] = now;
|
||||||
const ok = await new Promise((ok, no) =>
|
const ok = await new Promise((ok, no) =>
|
||||||
{
|
{
|
||||||
const timer_id = setTimeout(() =>
|
const timer_id = setTimeout(() =>
|
||||||
|
@ -1788,10 +1796,18 @@ class Mon
|
||||||
{
|
{
|
||||||
retries = 1;
|
retries = 1;
|
||||||
}
|
}
|
||||||
|
const tried = {};
|
||||||
while (retries < 0 || retry < retries)
|
while (retries < 0 || retry < retries)
|
||||||
{
|
{
|
||||||
retry++;
|
retry++;
|
||||||
const base = this.pick_next_etcd();
|
const base = this.pick_next_etcd();
|
||||||
|
let now = Date.now();
|
||||||
|
if (tried[base] && now-tried[base] < timeout)
|
||||||
|
{
|
||||||
|
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
|
||||||
|
now = Date.now();
|
||||||
|
}
|
||||||
|
tried[base] = now;
|
||||||
const res = await POST(base+path, body, timeout);
|
const res = await POST(base+path, body, timeout);
|
||||||
if (res.error)
|
if (res.error)
|
||||||
{
|
{
|
||||||
|
|
|
@ -19,8 +19,8 @@ bool string_to_addr(std::string str, bool parse_port, int default_port, struct s
|
||||||
if (p != std::string::npos && !(str.length() > 0 && str[p-1] == ']')) // "[ipv6]" which contains ':'
|
if (p != std::string::npos && !(str.length() > 0 && str[p-1] == ']')) // "[ipv6]" which contains ':'
|
||||||
{
|
{
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
int n = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
|
int scanned = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
|
||||||
if (n != 1 || default_port >= 0x10000)
|
if (scanned != 1 || default_port >= 0x10000)
|
||||||
return false;
|
return false;
|
||||||
str = str.substr(0, p);
|
str = str.substr(0, p);
|
||||||
}
|
}
|
||||||
|
|
|
@ -393,6 +393,7 @@ void blockstore_impl_t::init_op(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
// Call constructor without allocating memory. We'll call destructor before returning op back
|
// Call constructor without allocating memory. We'll call destructor before returning op back
|
||||||
new ((void*)op->private_data) blockstore_op_private_t;
|
new ((void*)op->private_data) blockstore_op_private_t;
|
||||||
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
PRIV(op)->wait_for = 0;
|
PRIV(op)->wait_for = 0;
|
||||||
PRIV(op)->op_state = 0;
|
PRIV(op)->op_state = 0;
|
||||||
PRIV(op)->pending_ops = 0;
|
PRIV(op)->pending_ops = 0;
|
||||||
|
|
|
@ -210,7 +210,7 @@ struct blockstore_op_private_t
|
||||||
std::vector<copy_buffer_t> read_vec;
|
std::vector<copy_buffer_t> read_vec;
|
||||||
|
|
||||||
// Sync, write
|
// Sync, write
|
||||||
int min_flushed_journal_sector, max_flushed_journal_sector;
|
uint64_t min_flushed_journal_sector, max_flushed_journal_sector;
|
||||||
|
|
||||||
// Write
|
// Write
|
||||||
struct iovec iov_zerofill[3];
|
struct iovec iov_zerofill[3];
|
||||||
|
@ -220,7 +220,6 @@ struct blockstore_op_private_t
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
std::vector<obj_ver_id> sync_big_writes, sync_small_writes;
|
std::vector<obj_ver_id> sync_big_writes, sync_small_writes;
|
||||||
int sync_small_checked, sync_big_checked;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef uint32_t pool_id_t;
|
typedef uint32_t pool_id_t;
|
||||||
|
|
|
@ -198,6 +198,7 @@ void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_
|
||||||
priv->pending_ops++;
|
priv->pending_ops++;
|
||||||
if (!priv->min_flushed_journal_sector)
|
if (!priv->min_flushed_journal_sector)
|
||||||
priv->min_flushed_journal_sector = 1+cur_sector;
|
priv->min_flushed_journal_sector = 1+cur_sector;
|
||||||
|
assert(priv->min_flushed_journal_sector <= journal.sector_count);
|
||||||
priv->max_flushed_journal_sector = 1+cur_sector;
|
priv->max_flushed_journal_sector = 1+cur_sector;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,6 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
unsynced_big_write_count -= unsynced_big_writes.size();
|
unsynced_big_write_count -= unsynced_big_writes.size();
|
||||||
PRIV(op)->sync_big_writes.swap(unsynced_big_writes);
|
PRIV(op)->sync_big_writes.swap(unsynced_big_writes);
|
||||||
PRIV(op)->sync_small_writes.swap(unsynced_small_writes);
|
PRIV(op)->sync_small_writes.swap(unsynced_small_writes);
|
||||||
PRIV(op)->sync_small_checked = 0;
|
|
||||||
PRIV(op)->sync_big_checked = 0;
|
|
||||||
unsynced_big_writes.clear();
|
unsynced_big_writes.clear();
|
||||||
unsynced_small_writes.clear();
|
unsynced_small_writes.clear();
|
||||||
if (PRIV(op)->sync_big_writes.size() > 0)
|
if (PRIV(op)->sync_big_writes.size() > 0)
|
||||||
|
|
|
@ -286,14 +286,19 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
printf("Restoring %lx:%lx version: v%lu -> v%lu\n", op->oid.inode, op->oid.stripe, op->version, PRIV(op)->real_version);
|
printf("Restoring %lx:%lx version: v%lu -> v%lu\n", op->oid.inode, op->oid.stripe, op->version, PRIV(op)->real_version);
|
||||||
#endif
|
#endif
|
||||||
auto prev_it = dirty_it;
|
auto prev_it = dirty_it;
|
||||||
|
if (prev_it != dirty_db.begin())
|
||||||
|
{
|
||||||
prev_it--;
|
prev_it--;
|
||||||
if (prev_it->first.oid == op->oid && prev_it->first.version >= PRIV(op)->real_version)
|
if (prev_it->first.oid == op->oid && prev_it->first.version >= PRIV(op)->real_version)
|
||||||
{
|
{
|
||||||
// Original version is still invalid
|
// Original version is still invalid
|
||||||
// All subsequent writes to the same object must be canceled too
|
// All subsequent writes to the same object must be canceled too
|
||||||
|
printf("Tried to write %lx:%lx v%lu after delete (old version v%lu), but already have v%lu\n",
|
||||||
|
op->oid.inode, op->oid.stripe, PRIV(op)->real_version, op->version, prev_it->first.version);
|
||||||
cancel_all_writes(op, dirty_it, -EEXIST);
|
cancel_all_writes(op, dirty_it, -EEXIST);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
op->version = PRIV(op)->real_version;
|
op->version = PRIV(op)->real_version;
|
||||||
PRIV(op)->real_version = 0;
|
PRIV(op)->real_version = 0;
|
||||||
dirty_entry e = dirty_it->second;
|
dirty_entry e = dirty_it->second;
|
||||||
|
@ -378,7 +383,6 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
sqe, dsk.data_fd, PRIV(op)->iov_zerofill, vcnt, dsk.data_offset + (loc << dsk.block_order) + op->offset - stripe_offset
|
sqe, dsk.data_fd, PRIV(op)->iov_zerofill, vcnt, dsk.data_offset + (loc << dsk.block_order) + op->offset - stripe_offset
|
||||||
);
|
);
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
|
||||||
if (immediate_commit != IMMEDIATE_ALL)
|
if (immediate_commit != IMMEDIATE_ALL)
|
||||||
{
|
{
|
||||||
// Increase the counter, but don't save into unsynced_writes yet (can't sync until the write is finished)
|
// Increase the counter, but don't save into unsynced_writes yet (can't sync until the write is finished)
|
||||||
|
@ -415,17 +419,11 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
// Got SQEs. Prepare previous journal sector write if required
|
// Got SQEs. Prepare previous journal sector write if required
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE &&
|
||||||
{
|
!journal.entry_fits(sizeof(journal_entry_small_write) + dyn_size))
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_small_write) + dyn_size))
|
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Then pre-fill journal entry
|
// Then pre-fill journal entry
|
||||||
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
||||||
|
@ -750,18 +748,12 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
|
||||||
}
|
}
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
// Prepare journal sector write
|
// Prepare journal sector write
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE &&
|
||||||
{
|
(dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
||||||
if ((dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Pre-fill journal entry
|
// Pre-fill journal entry
|
||||||
journal_entry_del *je = (journal_entry_del*)prefill_single_journal_entry(
|
journal_entry_del *je = (journal_entry_del*)prefill_single_journal_entry(
|
||||||
journal, JE_DELETE, sizeof(struct journal_entry_del)
|
journal, JE_DELETE, sizeof(struct journal_entry_del)
|
||||||
|
|
|
@ -357,6 +357,8 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
|
||||||
p->ringloop = NULL;
|
p->ringloop = NULL;
|
||||||
}
|
}
|
||||||
// Print result
|
// Print result
|
||||||
|
fflush(stderr);
|
||||||
|
fflush(stdout);
|
||||||
if (p->json_output && !result.data.is_null())
|
if (p->json_output && !result.data.is_null())
|
||||||
{
|
{
|
||||||
printf("%s\n", result.data.dump().c_str());
|
printf("%s\n", result.data.dump().c_str());
|
||||||
|
|
|
@ -77,8 +77,8 @@ struct alloc_osd_t
|
||||||
std::string key = base64_decode(kv["key"].string_value());
|
std::string key = base64_decode(kv["key"].string_value());
|
||||||
osd_num_t cur_osd;
|
osd_num_t cur_osd;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + parent->cli->st_cli.etcd_prefix.length(), "/osd/stats/%lu%c", &cur_osd, &null_byte);
|
int scanned = sscanf(key.c_str() + parent->cli->st_cli.etcd_prefix.length(), "/osd/stats/%lu%c", &cur_osd, &null_byte);
|
||||||
if (!cur_osd || null_byte != 0)
|
if (scanned != 1 || !cur_osd)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -67,8 +67,8 @@ resume_1:
|
||||||
// pool ID
|
// pool ID
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
|
int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
|
if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
@ -82,8 +82,8 @@ resume_1:
|
||||||
// osd ID
|
// osd ID
|
||||||
osd_num_t osd_num;
|
osd_num_t osd_num;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/osd/stats/%lu%c", &osd_num, &null_byte);
|
int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/osd/stats/%lu%c", &osd_num, &null_byte);
|
||||||
if (!osd_num || osd_num >= POOL_ID_MAX || null_byte != 0)
|
if (scanned != 1 || !osd_num || osd_num >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -133,8 +133,8 @@ resume_1:
|
||||||
// pool ID
|
// pool ID
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
|
int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
|
if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
@ -149,9 +149,9 @@ resume_1:
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
inode_t only_inode_num;
|
inode_t only_inode_num;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(),
|
int scanned = sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(),
|
||||||
"/inode/stats/%u/%lu%c", &pool_id, &only_inode_num, &null_byte);
|
"/inode/stats/%u/%lu%c", &pool_id, &only_inode_num, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || INODE_POOL(only_inode_num) != 0 || null_byte != 0)
|
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || INODE_POOL(only_inode_num) != 0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
@ -174,7 +174,7 @@ resume_1:
|
||||||
{ "size", 0 },
|
{ "size", 0 },
|
||||||
{ "readonly", false },
|
{ "readonly", false },
|
||||||
{ "pool_id", (uint64_t)INODE_POOL(inode_num) },
|
{ "pool_id", (uint64_t)INODE_POOL(inode_num) },
|
||||||
{ "pool_name", pool_it == parent->cli->st_cli.pool_config.end()
|
{ "pool_name", pool_it != parent->cli->st_cli.pool_config.end()
|
||||||
? (pool_it->second.name == "" ? "<Unnamed>" : pool_it->second.name) : "?" },
|
? (pool_it->second.name == "" ? "<Unnamed>" : pool_it->second.name) : "?" },
|
||||||
{ "inode_num", INODE_NO_POOL(inode_num) },
|
{ "inode_num", INODE_NO_POOL(inode_num) },
|
||||||
{ "inode_id", inode_num },
|
{ "inode_id", inode_num },
|
||||||
|
|
|
@ -384,8 +384,8 @@ resume_100:
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
inode_t inode = 0;
|
inode_t inode = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
|
int scanned = sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
|
||||||
if (!inode || null_byte != 0)
|
if (scanned != 2 || !inode)
|
||||||
{
|
{
|
||||||
result = (cli_result_t){ .err = EIO, .text = "Bad key returned from etcd: "+kv.key };
|
result = (cli_result_t){ .err = EIO, .text = "Bad key returned from etcd: "+kv.key };
|
||||||
state = 100;
|
state = 100;
|
||||||
|
|
|
@ -132,8 +132,8 @@ resume_2:
|
||||||
auto kv = parent->cli->st_cli.parse_etcd_kv(osd_stats[i]);
|
auto kv = parent->cli->st_cli.parse_etcd_kv(osd_stats[i]);
|
||||||
osd_num_t stat_osd_num = 0;
|
osd_num_t stat_osd_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.size(), "/osd/stats/%lu%c", &stat_osd_num, &null_byte);
|
int scanned = sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.size(), "/osd/stats/%lu%c", &stat_osd_num, &null_byte);
|
||||||
if (!stat_osd_num || null_byte != 0)
|
if (scanned != 1 || !stat_osd_num)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -565,11 +565,11 @@ void cluster_client_t::copy_write(cluster_op_t *op, std::map<object_id, cluster_
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint64_t new_len = 0;
|
uint64_t new_len = 0;
|
||||||
if (dirty_it == dirty_buffers.end())
|
if (dirty_it == dirty_buffers.end() || dirty_it->first.inode != op->inode)
|
||||||
{
|
{
|
||||||
new_len = len;
|
new_len = len;
|
||||||
}
|
}
|
||||||
else if (dirty_it->first.inode != op->inode || dirty_it->first.stripe > pos)
|
else if (dirty_it->first.stripe > pos)
|
||||||
{
|
{
|
||||||
new_len = dirty_it->first.stripe - pos;
|
new_len = dirty_it->first.stripe - pos;
|
||||||
if (new_len > len)
|
if (new_len > len)
|
||||||
|
@ -881,6 +881,7 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
||||||
uint64_t end = (op->offset + op->len) > (stripe + pg_block_size)
|
uint64_t end = (op->offset + op->len) > (stripe + pg_block_size)
|
||||||
? (stripe + pg_block_size) : (op->offset + op->len);
|
? (stripe + pg_block_size) : (op->offset + op->len);
|
||||||
op->parts[i].iov.reset();
|
op->parts[i].iov.reset();
|
||||||
|
op->parts[i].flags = 0;
|
||||||
if (op->cur_inode != op->inode)
|
if (op->cur_inode != op->inode)
|
||||||
{
|
{
|
||||||
// Read remaining parts from upper layers
|
// Read remaining parts from upper layers
|
||||||
|
@ -918,7 +919,10 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
||||||
else
|
else
|
||||||
add_iov(cur-prev, skip_prev, op, iov_idx, iov_pos, op->parts[i].iov, scrap_buffer, scrap_buffer_size);
|
add_iov(cur-prev, skip_prev, op, iov_idx, iov_pos, op->parts[i].iov, scrap_buffer, scrap_buffer_size);
|
||||||
if (end == begin)
|
if (end == begin)
|
||||||
|
{
|
||||||
op->done_count++;
|
op->done_count++;
|
||||||
|
op->parts[i].flags = PART_DONE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
|
@ -930,7 +934,6 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
||||||
op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
||||||
op->parts[i].pg_num = pg_num;
|
op->parts[i].pg_num = pg_num;
|
||||||
op->parts[i].osd_num = 0;
|
op->parts[i].osd_num = 0;
|
||||||
op->parts[i].flags = 0;
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -684,8 +684,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
// ID
|
// ID
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
|
int scanned = sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
|
if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool ID %s is invalid (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
|
fprintf(stderr, "Pool ID %s is invalid (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
|
||||||
continue;
|
continue;
|
||||||
|
@ -829,8 +829,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
{
|
{
|
||||||
pool_id_t pool_id;
|
pool_id_t pool_id;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
|
int scanned = sscanf(pool_item.first.c_str(), "%u%c", &pool_id, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
|
if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool ID %s is invalid in PG configuration (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
|
fprintf(stderr, "Pool ID %s is invalid in PG configuration (must be a number less than 0x%x), skipping pool\n", pool_item.first.c_str(), POOL_ID_MAX);
|
||||||
continue;
|
continue;
|
||||||
|
@ -838,8 +838,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
for (auto & pg_item: pool_item.second.object_items())
|
for (auto & pg_item: pool_item.second.object_items())
|
||||||
{
|
{
|
||||||
pg_num_t pg_num = 0;
|
pg_num_t pg_num = 0;
|
||||||
sscanf(pg_item.first.c_str(), "%u%c", &pg_num, &null_byte);
|
int scanned = sscanf(pg_item.first.c_str(), "%u%c", &pg_num, &null_byte);
|
||||||
if (!pg_num || null_byte != 0)
|
if (scanned != 1 || !pg_num)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad key in pool %u PG configuration: %s (must be a number), skipped\n", pool_id, pg_item.first.c_str());
|
fprintf(stderr, "Bad key in pool %u PG configuration: %s (must be a number), skipped\n", pool_id, pg_item.first.c_str());
|
||||||
continue;
|
continue;
|
||||||
|
@ -889,8 +889,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
pg_num_t pg_num = 0;
|
pg_num_t pg_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + etcd_prefix.length()+12, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
int scanned = sscanf(key.c_str() + etcd_prefix.length()+12, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0)
|
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !pg_num)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||||
}
|
}
|
||||||
|
@ -944,8 +944,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
pg_num_t pg_num = 0;
|
pg_num_t pg_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
int scanned = sscanf(key.c_str() + etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || !pg_num || null_byte != 0)
|
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !pg_num)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||||
}
|
}
|
||||||
|
@ -1015,8 +1015,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
uint64_t pool_id = 0;
|
uint64_t pool_id = 0;
|
||||||
uint64_t inode_num = 0;
|
uint64_t inode_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + etcd_prefix.length()+14, "%lu/%lu%c", &pool_id, &inode_num, &null_byte);
|
int scanned = sscanf(key.c_str() + etcd_prefix.length()+14, "%lu/%lu%c", &pool_id, &inode_num, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || !inode_num || (inode_num >> (64-POOL_ID_BITS)) || null_byte != 0)
|
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !inode_num || (inode_num >> (64-POOL_ID_BITS)))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -242,6 +242,7 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
|
||||||
op.sec_rw.version = UINT64_MAX; // last unstable
|
op.sec_rw.version = UINT64_MAX; // last unstable
|
||||||
op.sec_rw.offset = io->offset % bsd->block_size;
|
op.sec_rw.offset = io->offset % bsd->block_size;
|
||||||
op.sec_rw.len = io->xfer_buflen;
|
op.sec_rw.len = io->xfer_buflen;
|
||||||
|
op.sec_rw.attr_len = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -263,6 +264,7 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
|
||||||
op.sec_rw.version = 0; // assign automatically
|
op.sec_rw.version = 0; // assign automatically
|
||||||
op.sec_rw.offset = io->offset % bsd->block_size;
|
op.sec_rw.offset = io->offset % bsd->block_size;
|
||||||
op.sec_rw.len = io->xfer_buflen;
|
op.sec_rw.len = io->xfer_buflen;
|
||||||
|
op.sec_rw.attr_len = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -19,12 +19,12 @@ std::string msgr_rdma_address_t::to_string()
|
||||||
bool msgr_rdma_address_t::from_string(const char *str, msgr_rdma_address_t *dest)
|
bool msgr_rdma_address_t::from_string(const char *str, msgr_rdma_address_t *dest)
|
||||||
{
|
{
|
||||||
uint64_t* gid = (uint64_t*)&dest->gid;
|
uint64_t* gid = (uint64_t*)&dest->gid;
|
||||||
int n = sscanf(
|
int scanned = sscanf(
|
||||||
str, "%hx:%x:%x:%16lx%16lx", &dest->lid, &dest->qpn, &dest->psn, gid, gid+1
|
str, "%hx:%x:%x:%16lx%16lx", &dest->lid, &dest->qpn, &dest->psn, gid, gid+1
|
||||||
);
|
);
|
||||||
gid[0] = be64toh(gid[0]);
|
gid[0] = be64toh(gid[0]);
|
||||||
gid[1] = be64toh(gid[1]);
|
gid[1] = be64toh(gid[1]);
|
||||||
return n == 5;
|
return scanned == 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
msgr_rdma_context_t::~msgr_rdma_context_t()
|
msgr_rdma_context_t::~msgr_rdma_context_t()
|
||||||
|
|
|
@ -346,8 +346,8 @@ void nfs_proxy_t::parse_stats(etcd_kv_t & kv)
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
inode_t inode_num = 0;
|
inode_t inode_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode_num, &null_byte);
|
int scanned = sscanf(key.c_str() + cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode_num, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX || !inode_num || null_byte != 0)
|
if (scanned != 2 || !pool_id || pool_id >= POOL_ID_MAX || !inode_num)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||||
}
|
}
|
||||||
|
@ -360,8 +360,8 @@ void nfs_proxy_t::parse_stats(etcd_kv_t & kv)
|
||||||
{
|
{
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(key.c_str() + cli->st_cli.etcd_prefix.length()+12, "%u%c", &pool_id, &null_byte);
|
int scanned = sscanf(key.c_str() + cli->st_cli.etcd_prefix.length()+12, "%u%c", &pool_id, &null_byte);
|
||||||
if (!pool_id || pool_id >= POOL_ID_MAX)
|
if (scanned != 1 || !pool_id || pool_id >= POOL_ID_MAX)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
fprintf(stderr, "Bad etcd key %s, ignoring\n", key.c_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -649,7 +649,7 @@ void osd_t::apply_pg_config()
|
||||||
auto pg_it = this->pgs.find({ .pool_id = pool_id, .pg_num = pg_num });
|
auto pg_it = this->pgs.find({ .pool_id = pool_id, .pg_num = pg_num });
|
||||||
bool currently_taken = pg_it != this->pgs.end() && pg_it->second.state != PG_OFFLINE;
|
bool currently_taken = pg_it != this->pgs.end() && pg_it->second.state != PG_OFFLINE;
|
||||||
// Check pool block size and bitmap granularity
|
// Check pool block size and bitmap granularity
|
||||||
if (this->bs_block_size != pool_item.second.data_block_size ||
|
if (take && this->bs_block_size != pool_item.second.data_block_size ||
|
||||||
this->bs_bitmap_granularity != pool_item.second.bitmap_granularity)
|
this->bs_bitmap_granularity != pool_item.second.bitmap_granularity)
|
||||||
{
|
{
|
||||||
if (!warned_block_size)
|
if (!warned_block_size)
|
||||||
|
@ -967,8 +967,8 @@ void osd_t::report_pg_states()
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
pg_num_t pg_num = 0;
|
pg_num_t pg_num = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
sscanf(kv.key.c_str() + st_cli.etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
int scanned = sscanf(kv.key.c_str() + st_cli.etcd_prefix.length()+10, "%u/%u%c", &pool_id, &pg_num, &null_byte);
|
||||||
if (null_byte == 0)
|
if (scanned == 2)
|
||||||
{
|
{
|
||||||
auto pg_it = pgs.find({ .pool_id = pool_id, .pg_num = pg_num });
|
auto pg_it = pgs.find({ .pool_id = pool_id, .pg_num = pg_num });
|
||||||
if (pg_it != pgs.end() && pg_it->second.state != PG_OFFLINE && pg_it->second.state != PG_STARTING &&
|
if (pg_it != pgs.end() && pg_it->second.state != PG_OFFLINE && pg_it->second.state != PG_STARTING &&
|
||||||
|
|
|
@ -197,7 +197,11 @@ static void vitastor_parse_filename(const char *filename, QDict *options, Error
|
||||||
!strcmp(name, "rdma-mtu"))
|
!strcmp(name, "rdma-mtu"))
|
||||||
{
|
{
|
||||||
unsigned long long num_val;
|
unsigned long long num_val;
|
||||||
|
#if QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1
|
||||||
if (parse_uint_full(value, &num_val, 0))
|
if (parse_uint_full(value, &num_val, 0))
|
||||||
|
#else
|
||||||
|
if (parse_uint_full(value, 0, &num_val))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
error_setg(errp, "Illegal %s: %s", name, value);
|
error_setg(errp, "Illegal %s: %s", name, value);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -320,7 +324,7 @@ static void vitastor_aio_fd_write(void *fddv)
|
||||||
static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
||||||
{
|
{
|
||||||
aio_set_fd_handler(ctx, fd,
|
aio_set_fd_handler(ctx, fd,
|
||||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 && (QEMU_VERSION_MAJOR < 8 || QEMU_VERSION_MAJOR == 8 && QEMU_VERSION_MINOR < 1)
|
||||||
0 /*is_external*/,
|
0 /*is_external*/,
|
||||||
#endif
|
#endif
|
||||||
fd_read,
|
fd_read,
|
||||||
|
@ -489,6 +493,10 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
|
bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
|
||||||
|
#if QEMU_VERSION_MAJOR > 5 || QEMU_VERSION_MAJOR == 5 && QEMU_VERSION_MINOR >= 1
|
||||||
|
/* When extending regular files, we get zeros from the OS */
|
||||||
|
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
|
||||||
|
#endif
|
||||||
//client->aio_context = bdrv_get_aio_context(bs);
|
//client->aio_context = bdrv_get_aio_context(bs);
|
||||||
qdict_del(options, "use-rdma");
|
qdict_del(options, "use-rdma");
|
||||||
qdict_del(options, "rdma-mtu");
|
qdict_del(options, "rdma-mtu");
|
||||||
|
@ -585,7 +593,11 @@ static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offse
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Resize inode to <offset> bytes
|
// TODO: Resize inode to <offset> bytes
|
||||||
client->size = offset / BDRV_SECTOR_SIZE;
|
#if QEMU_VERSION_MAJOR >= 4
|
||||||
|
client->size = exact || client->size < offset ? offset : client->size;
|
||||||
|
#else
|
||||||
|
client->size = offset;
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue