forked from vitalif/vitastor
Test and fix degraded-read
parent
97d3fc593c
commit
1513d0490a
|
@ -8,6 +8,7 @@
|
||||||
void osd_t::init_primary()
|
void osd_t::init_primary()
|
||||||
{
|
{
|
||||||
// Initial test version of clustering code requires exactly 2 peers
|
// Initial test version of clustering code requires exactly 2 peers
|
||||||
|
// FIXME Hardcode
|
||||||
if (config["peer1"] == "" || config["peer2"] == "")
|
if (config["peer1"] == "" || config["peer2"] == "")
|
||||||
throw std::runtime_error("run_primary requires two peers");
|
throw std::runtime_error("run_primary requires two peers");
|
||||||
peers.push_back(parse_peer(config["peer1"]));
|
peers.push_back(parse_peer(config["peer1"]));
|
||||||
|
@ -16,8 +17,9 @@ void osd_t::init_primary()
|
||||||
throw std::runtime_error("peer1 and peer2 osd numbers are the same");
|
throw std::runtime_error("peer1 and peer2 osd numbers are the same");
|
||||||
pgs.push_back((pg_t){
|
pgs.push_back((pg_t){
|
||||||
.state = PG_OFFLINE,
|
.state = PG_OFFLINE,
|
||||||
|
.pg_cursize = 2, // or 3
|
||||||
.pg_num = 1,
|
.pg_num = 1,
|
||||||
.target_set = { 1, 2, 3 },
|
.target_set = { 1, 0, 3 }, // or { 1, 2, 3 }
|
||||||
});
|
});
|
||||||
pg_count = 1;
|
pg_count = 1;
|
||||||
peering_state = 1;
|
peering_state = 1;
|
||||||
|
|
|
@ -3,10 +3,10 @@
|
||||||
|
|
||||||
// read: read directly or read paired stripe(s), reconstruct, return
|
// read: read directly or read paired stripe(s), reconstruct, return
|
||||||
// write: read paired stripe(s), modify, write
|
// write: read paired stripe(s), modify, write
|
||||||
|
//
|
||||||
// nuance: take care to read the same version from paired stripes!
|
// nuance: take care to read the same version from paired stripes!
|
||||||
// if there are no write requests in progress we're good (stripes must be in sync)
|
// to do so, we remember "last readable" version until a write request completes
|
||||||
// and... remember the last readable version during a write request
|
// and we postpone other write requests to the same stripe until completion of previous ones
|
||||||
// and... postpone other write requests to the same stripe until the completion of previous ones
|
|
||||||
//
|
//
|
||||||
// sync: sync peers, get unstable versions from somewhere, stabilize them
|
// sync: sync peers, get unstable versions from somewhere, stabilize them
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ void osd_t::exec_primary_read(osd_op_t *cur_op)
|
||||||
auto vo_it = pgs[pg_num].ver_override.find(oid);
|
auto vo_it = pgs[pg_num].ver_override.find(oid);
|
||||||
op_data->target_ver = vo_it != pgs[pg_num].ver_override.end() ? vo_it->second : UINT64_MAX;
|
op_data->target_ver = vo_it != pgs[pg_num].ver_override.end() ? vo_it->second : UINT64_MAX;
|
||||||
}
|
}
|
||||||
if (pgs[pg_num].pg_cursize == 3)
|
if (pgs[pg_num].pg_cursize == pgs[pg_num].pg_size)
|
||||||
{
|
{
|
||||||
// Fast happy-path
|
// Fast happy-path
|
||||||
submit_read_subops(pgs[pg_num].pg_minsize, pgs[pg_num].target_set.data(), cur_op);
|
submit_read_subops(pgs[pg_num].pg_minsize, pgs[pg_num].target_set.data(), cur_op);
|
||||||
|
@ -162,8 +162,9 @@ int osd_t::extend_missing_stripes(osd_read_stripe_t *stripes, osd_num_t *target_
|
||||||
{
|
{
|
||||||
for (int role = 0; role < minsize; role++)
|
for (int role = 0; role < minsize; role++)
|
||||||
{
|
{
|
||||||
if (stripes[role*2+1].end != 0 && target_set[role] == 0)
|
if (stripes[role].end != 0 && target_set[role] == 0)
|
||||||
{
|
{
|
||||||
|
stripes[role].real_start = stripes[role].real_end = 0;
|
||||||
// Stripe is missing. Extend read to other stripes.
|
// Stripe is missing. Extend read to other stripes.
|
||||||
// We need at least pg_minsize stripes to recover the lost part.
|
// We need at least pg_minsize stripes to recover the lost part.
|
||||||
int exist = 0;
|
int exist = 0;
|
||||||
|
@ -212,6 +213,11 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd
|
||||||
stripes[role].pos = buf_size;
|
stripes[role].pos = buf_size;
|
||||||
buf_size += stripes[role].real_end - stripes[role].real_start;
|
buf_size += stripes[role].real_end - stripes[role].real_start;
|
||||||
}
|
}
|
||||||
|
else if (stripes[role].end != 0)
|
||||||
|
{
|
||||||
|
stripes[role].pos = buf_size;
|
||||||
|
buf_size += stripes[role].end - stripes[role].start;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
osd_op_t *subops = new osd_op_t[n_subops];
|
osd_op_t *subops = new osd_op_t[n_subops];
|
||||||
cur_op->buf = memalign(MEM_ALIGNMENT, buf_size);
|
cur_op->buf = memalign(MEM_ALIGNMENT, buf_size);
|
||||||
|
@ -227,7 +233,6 @@ void osd_t::submit_read_subops(int read_pg_size, const uint64_t* target_set, osd
|
||||||
auto role_osd_num = target_set[role];
|
auto role_osd_num = target_set[role];
|
||||||
if (role_osd_num != 0)
|
if (role_osd_num != 0)
|
||||||
{
|
{
|
||||||
printf("Read subop from %lu: %lu / %lu\n", role_osd_num, op_data->oid.inode, op_data->oid.stripe | role);
|
|
||||||
if (role_osd_num == this->osd_num)
|
if (role_osd_num == this->osd_num)
|
||||||
{
|
{
|
||||||
subops[subop].bs_op = {
|
subops[subop].bs_op = {
|
||||||
|
|
14
test_osd.cpp
14
test_osd.cpp
|
@ -22,7 +22,7 @@ uint64_t test_write(int connect_fd, uint64_t inode, uint64_t stripe, uint64_t ve
|
||||||
|
|
||||||
void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_t len);
|
void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_t len);
|
||||||
|
|
||||||
bool check_pattern(void *buf, uint64_t len, uint64_t pattern);
|
bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern);
|
||||||
|
|
||||||
#define PATTERN0 0x8c4641acc762840e
|
#define PATTERN0 0x8c4641acc762840e
|
||||||
#define PATTERN1 0x70a549add9a2280a
|
#define PATTERN1 0x70a549add9a2280a
|
||||||
|
@ -51,13 +51,13 @@ int main(int narg, char *args[])
|
||||||
// Cluster read
|
// Cluster read
|
||||||
connect_fd = connect_osd("127.0.0.1", 11203);
|
connect_fd = connect_osd("127.0.0.1", 11203);
|
||||||
data = test_primary_read(connect_fd, 2, 0, 128*1024);
|
data = test_primary_read(connect_fd, 2, 0, 128*1024);
|
||||||
if (data && check_pattern(data, 128*1024, PATTERN0))
|
if (data && check_pattern(data, 0, 128*1024, PATTERN0))
|
||||||
printf("inode=2 0-128K OK\n");
|
printf("inode=2 0-128K OK\n");
|
||||||
if (data)
|
if (data)
|
||||||
free(data);
|
free(data);
|
||||||
data = test_primary_read(connect_fd, 2, 0, 256*1024);
|
data = test_primary_read(connect_fd, 2, 0, 256*1024);
|
||||||
if (data && check_pattern(data, 128*1024, PATTERN0) &&
|
if (data && check_pattern(data, 0, 128*1024, PATTERN0) &&
|
||||||
check_pattern(data+128*1024, 128*1024, PATTERN1))
|
check_pattern(data, 128*1024, 128*1024, PATTERN1))
|
||||||
printf("inode=2 0-256K OK\n");
|
printf("inode=2 0-256K OK\n");
|
||||||
if (data)
|
if (data)
|
||||||
free(data);
|
free(data);
|
||||||
|
@ -186,13 +186,13 @@ void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_pattern(void *buf, uint64_t len, uint64_t pattern)
|
bool check_pattern(void *buf, uint64_t offset, uint64_t len, uint64_t pattern)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < len/sizeof(uint64_t); i++)
|
for (int i = 0; i < len/sizeof(uint64_t); i++)
|
||||||
{
|
{
|
||||||
if (((uint64_t*)buf)[i] != pattern)
|
if (((uint64_t*)(buf+offset))[i] != pattern)
|
||||||
{
|
{
|
||||||
printf("(result[%d] = %lu) != %lu\n", i, ((uint64_t*)buf)[i], pattern);
|
printf("(result + %lu bytes = %lx) != %lx\n", i*sizeof(uint64_t)+offset, ((uint64_t*)buf+offset)[i], pattern);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue