forked from vitalif/vitastor
(Almost) Implement misplaced recovery, integrating it into calc_rmw()
parent
6212195440
commit
cf7de0f181
|
@ -204,6 +204,22 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto pg_it = pgs.begin(); pg_it != pgs.end(); pg_it++)
|
||||||
|
{
|
||||||
|
if ((pg_it->second.state & (PG_ACTIVE | PG_HAS_MISPLACED)) == (PG_ACTIVE | PG_HAS_MISPLACED))
|
||||||
|
{
|
||||||
|
for (auto obj_it = pg_it->second.misplaced_objects.begin(); obj_it != pg_it->second.misplaced_objects.end(); obj_it++)
|
||||||
|
{
|
||||||
|
if (recovery_ops.find(obj_it->first) == recovery_ops.end())
|
||||||
|
{
|
||||||
|
op.degraded = false;
|
||||||
|
op.pg_num = pg_it->first;
|
||||||
|
op.oid = obj_it->first;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,15 +282,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
|
||||||
pg->print_state();
|
pg->print_state();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (st->state == OBJ_DEGRADED)
|
|
||||||
{
|
|
||||||
pg->clean_count++;
|
pg->clean_count++;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
assert(st->state == (OBJ_DEGRADED|OBJ_MISPLACED));
|
|
||||||
pg->misplaced_objects[op->oid] = change_osd_set(st, pg);
|
|
||||||
}
|
|
||||||
st->object_count--;
|
st->object_count--;
|
||||||
if (!st->object_count)
|
if (!st->object_count)
|
||||||
{
|
{
|
||||||
|
@ -305,58 +313,3 @@ bool osd_t::continue_recovery()
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is likely not needed at all, because we'll always recover objects to the clean state
|
|
||||||
pg_osd_set_state_t* osd_t::change_osd_set(pg_osd_set_state_t *st, pg_t *pg)
|
|
||||||
{
|
|
||||||
pg_osd_set_state_t *new_st;
|
|
||||||
pg_osd_set_t new_set(st->osd_set);
|
|
||||||
for (uint64_t role = 0; role < pg->pg_size; role++)
|
|
||||||
{
|
|
||||||
if (pg->cur_set[role] != 0)
|
|
||||||
{
|
|
||||||
// Maintain order (outdated -> role -> osd_num)
|
|
||||||
int added = 0;
|
|
||||||
for (int j = 0; j < new_set.size(); j++)
|
|
||||||
{
|
|
||||||
if (new_set[j].role == role && new_set[j].osd_num == pg->cur_set[role])
|
|
||||||
{
|
|
||||||
if (new_set[j].outdated)
|
|
||||||
{
|
|
||||||
if (!added)
|
|
||||||
new_set[j].outdated = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
new_set.erase(new_set.begin()+j);
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (!added && (new_set[j].outdated || new_set[j].role > role ||
|
|
||||||
new_set[j].role == role && new_set[j].osd_num > pg->cur_set[role]))
|
|
||||||
{
|
|
||||||
new_set.insert(new_set.begin()+j, (pg_obj_loc_t){
|
|
||||||
.role = role,
|
|
||||||
.osd_num = pg->cur_set[role],
|
|
||||||
.outdated = false,
|
|
||||||
});
|
|
||||||
added = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
auto st_it = pg->state_dict.find(new_set);
|
|
||||||
if (st_it != pg->state_dict.end())
|
|
||||||
{
|
|
||||||
st_it = pg->state_dict.emplace(new_set, (pg_osd_set_state_t){
|
|
||||||
.read_target = pg->cur_set,
|
|
||||||
.osd_set = new_set,
|
|
||||||
.state = OBJ_MISPLACED,
|
|
||||||
.object_count = 0,
|
|
||||||
}).first;
|
|
||||||
}
|
|
||||||
new_st = &st_it->second;
|
|
||||||
new_st->object_count++;
|
|
||||||
return new_st;
|
|
||||||
}
|
|
||||||
|
|
105
osd_primary.cpp
105
osd_primary.cpp
|
@ -31,8 +31,9 @@ struct osd_primary_op_data_t
|
||||||
int degraded = 0, pg_size, pg_minsize;
|
int degraded = 0, pg_size, pg_minsize;
|
||||||
osd_rmw_stripe_t *stripes;
|
osd_rmw_stripe_t *stripes;
|
||||||
osd_op_t *subops = NULL;
|
osd_op_t *subops = NULL;
|
||||||
void *recovery_buf = NULL;
|
|
||||||
uint64_t *prev_set = NULL;
|
uint64_t *prev_set = NULL;
|
||||||
|
uint64_t object_state = 0;
|
||||||
|
|
||||||
// for sync. oops, requires freeing
|
// for sync. oops, requires freeing
|
||||||
std::vector<unstable_osd_num_t> *unstable_write_osds = NULL;
|
std::vector<unstable_osd_num_t> *unstable_write_osds = NULL;
|
||||||
pg_num_t *dirty_pgs = NULL;
|
pg_num_t *dirty_pgs = NULL;
|
||||||
|
@ -117,27 +118,32 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def)
|
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def, uint64_t &object_state)
|
||||||
{
|
{
|
||||||
if (!(pg.state & (PG_HAS_INCOMPLETE | PG_HAS_DEGRADED | PG_HAS_MISPLACED)))
|
if (!(pg.state & (PG_HAS_INCOMPLETE | PG_HAS_DEGRADED | PG_HAS_MISPLACED)))
|
||||||
{
|
{
|
||||||
|
object_state = 0;
|
||||||
return def;
|
return def;
|
||||||
}
|
}
|
||||||
auto st_it = pg.incomplete_objects.find(oid);
|
auto st_it = pg.incomplete_objects.find(oid);
|
||||||
if (st_it != pg.incomplete_objects.end())
|
if (st_it != pg.incomplete_objects.end())
|
||||||
{
|
{
|
||||||
|
object_state = st_it->second->state;
|
||||||
return st_it->second->read_target.data();
|
return st_it->second->read_target.data();
|
||||||
}
|
}
|
||||||
st_it = pg.degraded_objects.find(oid);
|
st_it = pg.degraded_objects.find(oid);
|
||||||
if (st_it != pg.degraded_objects.end())
|
if (st_it != pg.degraded_objects.end())
|
||||||
{
|
{
|
||||||
|
object_state = st_it->second->state;
|
||||||
return st_it->second->read_target.data();
|
return st_it->second->read_target.data();
|
||||||
}
|
}
|
||||||
st_it = pg.misplaced_objects.find(oid);
|
st_it = pg.misplaced_objects.find(oid);
|
||||||
if (st_it != pg.misplaced_objects.end())
|
if (st_it != pg.misplaced_objects.end())
|
||||||
{
|
{
|
||||||
|
object_state = st_it->second->state;
|
||||||
return st_it->second->read_target.data();
|
return st_it->second->read_target.data();
|
||||||
}
|
}
|
||||||
|
object_state = 0;
|
||||||
return def;
|
return def;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,7 +177,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// PG may be degraded or have misplaced objects
|
// PG may be degraded or have misplaced objects
|
||||||
uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data());
|
uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state);
|
||||||
if (extend_missing_stripes(op_data->stripes, cur_set, pg.pg_minsize, pg.pg_size) < 0)
|
if (extend_missing_stripes(op_data->stripes, cur_set, pg.pg_minsize, pg.pg_size) < 0)
|
||||||
{
|
{
|
||||||
finish_op(cur_op, -EIO);
|
finish_op(cur_op, -EIO);
|
||||||
|
@ -404,7 +410,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
// FIXME: Handle operation cancel
|
|
||||||
auto & pg = pgs[op_data->pg_num];
|
auto & pg = pgs[op_data->pg_num];
|
||||||
if (op_data->st == 1) goto resume_1;
|
if (op_data->st == 1) goto resume_1;
|
||||||
else if (op_data->st == 2) goto resume_2;
|
else if (op_data->st == 2) goto resume_2;
|
||||||
|
@ -413,8 +418,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
|
||||||
else if (op_data->st == 5) goto resume_5;
|
else if (op_data->st == 5) goto resume_5;
|
||||||
else if (op_data->st == 6) goto resume_6;
|
else if (op_data->st == 6) goto resume_6;
|
||||||
else if (op_data->st == 7) goto resume_7;
|
else if (op_data->st == 7) goto resume_7;
|
||||||
else if (op_data->st == 8) goto resume_8;
|
|
||||||
else if (op_data->st == 9) goto resume_9;
|
|
||||||
assert(op_data->st == 0);
|
assert(op_data->st == 0);
|
||||||
// Check if actions are pending for this object
|
// Check if actions are pending for this object
|
||||||
{
|
{
|
||||||
|
@ -441,91 +444,12 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
|
||||||
}
|
}
|
||||||
pg.write_queue.emplace(op_data->oid, cur_op);
|
pg.write_queue.emplace(op_data->oid, cur_op);
|
||||||
}
|
}
|
||||||
if (pg.state & PG_HAS_DEGRADED)
|
|
||||||
{
|
|
||||||
op_data->prev_set = NULL;
|
|
||||||
{
|
|
||||||
auto st_it = pg.degraded_objects.find(op_data->oid);
|
|
||||||
if (st_it != pg.degraded_objects.end())
|
|
||||||
op_data->prev_set = st_it->second->read_target.data();
|
|
||||||
}
|
|
||||||
if (op_data->prev_set != NULL)
|
|
||||||
{
|
|
||||||
// Read the whole object
|
|
||||||
op_data->recovery_buf = memalign(512, pg.pg_size * bs_block_size);
|
|
||||||
for (int i = 0; i < pg.pg_size; i++)
|
|
||||||
{
|
|
||||||
op_data->stripes[i].read_buf = op_data->recovery_buf + i*bs_block_size;
|
|
||||||
op_data->stripes[i].read_start = 0;
|
|
||||||
op_data->stripes[i].read_end = bs_block_size;
|
|
||||||
op_data->stripes[i].missing = op_data->prev_set[i] == 0;
|
|
||||||
op_data->stripes[i].write_end = 0;
|
|
||||||
}
|
|
||||||
op_data->degraded = 1;
|
|
||||||
submit_primary_subops(SUBMIT_READ, pg.pg_size, op_data->prev_set, cur_op);
|
|
||||||
resume_8:
|
|
||||||
op_data->st = 8;
|
|
||||||
return;
|
|
||||||
resume_9:
|
|
||||||
if (op_data->errors > 0)
|
|
||||||
{
|
|
||||||
pg_cancel_write_queue(pg, op_data->oid, op_data->epipe > 0 ? -EPIPE : -EIO);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
reconstruct_stripes(op_data->stripes, pg.pg_size);
|
|
||||||
if (cur_op->req.rw.len > 0)
|
|
||||||
{
|
|
||||||
memcpy(
|
|
||||||
op_data->recovery_buf + cur_op->req.rw.offset - op_data->oid.stripe,
|
|
||||||
cur_op->buf, cur_op->req.rw.len
|
|
||||||
);
|
|
||||||
}
|
|
||||||
free(cur_op->buf);
|
|
||||||
cur_op->buf = op_data->recovery_buf;
|
|
||||||
op_data->recovery_buf = NULL;
|
|
||||||
if (cur_op->req.rw.len > 0)
|
|
||||||
{
|
|
||||||
// Write modified parts
|
|
||||||
uint32_t start = 0, end = 0;
|
|
||||||
for (int role = 0; role < pg.pg_minsize; role++)
|
|
||||||
{
|
|
||||||
if (op_data->stripes[role].req_end != 0)
|
|
||||||
{
|
|
||||||
start = !end || op_data->stripes[role].req_start < start ? op_data->stripes[role].req_start : start;
|
|
||||||
end = std::max(op_data->stripes[role].req_end, end);
|
|
||||||
op_data->stripes[role].write_start = op_data->stripes[role].req_start;
|
|
||||||
op_data->stripes[role].write_end = op_data->stripes[role].req_end;
|
|
||||||
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int role = pg.pg_minsize; role < pg.pg_size; role++)
|
|
||||||
{
|
|
||||||
op_data->stripes[role].write_start = start;
|
|
||||||
op_data->stripes[role].write_end = end;
|
|
||||||
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Also write recovered parts
|
|
||||||
uint64_t *cur_set = pg.cur_set.data();
|
|
||||||
for (int role = 0; role < pg.pg_size; role++)
|
|
||||||
{
|
|
||||||
if (cur_set[role] != op_data->prev_set[role])
|
|
||||||
{
|
|
||||||
op_data->stripes[role].write_start = 0;
|
|
||||||
op_data->stripes[role].write_end = bs_block_size;
|
|
||||||
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pg.ver_override[op_data->oid] = op_data->fact_ver;
|
|
||||||
// Send writes
|
|
||||||
submit_primary_subops(SUBMIT_WRITE, pg.pg_size, cur_set, cur_op);
|
|
||||||
op_data->st = 4;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
resume_1:
|
resume_1:
|
||||||
// Determine blocks to read and write
|
// Determine blocks to read and write
|
||||||
cur_op->rmw_buf = calc_rmw_reads(cur_op->buf, op_data->stripes, pg.cur_set.data(), pg.pg_size, pg.pg_minsize, pg.pg_cursize);
|
// Missing chunks are allowed to be overwritten even in incomplete objects
|
||||||
|
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state);
|
||||||
|
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
||||||
|
pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size);
|
||||||
// Read required blocks
|
// Read required blocks
|
||||||
submit_primary_subops(SUBMIT_RMW_READ, pg.pg_size, pg.cur_set.data(), cur_op);
|
submit_primary_subops(SUBMIT_RMW_READ, pg.pg_size, pg.cur_set.data(), cur_op);
|
||||||
resume_2:
|
resume_2:
|
||||||
|
@ -540,7 +464,7 @@ resume_3:
|
||||||
// Save version override for parallel reads
|
// Save version override for parallel reads
|
||||||
pg.ver_override[op_data->oid] = op_data->fact_ver;
|
pg.ver_override[op_data->oid] = op_data->fact_ver;
|
||||||
// Recover missing stripes, calculate parity
|
// Recover missing stripes, calculate parity
|
||||||
calc_rmw_parity(op_data->stripes, pg.pg_size);
|
calc_rmw_parity(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size);
|
||||||
// Send writes
|
// Send writes
|
||||||
submit_primary_subops(SUBMIT_WRITE, pg.pg_size, pg.cur_set.data(), cur_op);
|
submit_primary_subops(SUBMIT_WRITE, pg.pg_size, pg.cur_set.data(), cur_op);
|
||||||
resume_4:
|
resume_4:
|
||||||
|
@ -693,7 +617,6 @@ resume_1:
|
||||||
syncs_in_progress.push_back(cur_op);
|
syncs_in_progress.push_back(cur_op);
|
||||||
}
|
}
|
||||||
resume_2:
|
resume_2:
|
||||||
// FIXME: Handle operation cancel
|
|
||||||
if (unstable_writes.size() == 0)
|
if (unstable_writes.size() == 0)
|
||||||
{
|
{
|
||||||
// Nothing to sync
|
// Nothing to sync
|
||||||
|
|
165
osd_rmw.cpp
165
osd_rmw.cpp
|
@ -1,4 +1,5 @@
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "xor.h"
|
#include "xor.h"
|
||||||
#include "osd_rmw.h"
|
#include "osd_rmw.h"
|
||||||
|
@ -79,18 +80,21 @@ void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role)
|
||||||
}
|
}
|
||||||
else if (prev >= 0)
|
else if (prev >= 0)
|
||||||
{
|
{
|
||||||
|
assert(stripes[role].read_start >= stripes[prev].read_start &&
|
||||||
|
stripes[role].read_start >= stripes[other].read_start);
|
||||||
memxor(
|
memxor(
|
||||||
stripes[prev].read_buf + (stripes[prev].read_start - stripes[role].read_start),
|
stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start),
|
||||||
stripes[other].read_buf + (stripes[other].read_start - stripes[other].read_start),
|
stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
|
||||||
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
|
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
|
||||||
);
|
);
|
||||||
prev = -1;
|
prev = -1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
assert(stripes[role].read_start >= stripes[other].read_start);
|
||||||
memxor(
|
memxor(
|
||||||
stripes[role].read_buf,
|
stripes[role].read_buf,
|
||||||
stripes[other].read_buf + (stripes[other].read_start - stripes[role].read_start),
|
stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
|
||||||
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
|
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -156,10 +160,11 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize)
|
void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set,
|
||||||
|
uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size)
|
||||||
{
|
{
|
||||||
// Generic parity modification (read-modify-write) algorithm
|
// Generic parity modification (read-modify-write) algorithm
|
||||||
// Reconstruct -> Read -> Calc parity -> Write
|
// Read -> Reconstruct missing chunks -> Calc parity chunks -> Write
|
||||||
// Now we always read continuous ranges. This means that an update of the beginning
|
// Now we always read continuous ranges. This means that an update of the beginning
|
||||||
// of one data stripe and the end of another will lead to a read of full paired stripes.
|
// of one data stripe and the end of another will lead to a read of full paired stripes.
|
||||||
// FIXME: (Maybe) read small individual ranges in that case instead.
|
// FIXME: (Maybe) read small individual ranges in that case instead.
|
||||||
|
@ -174,64 +179,79 @@ void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_s
|
||||||
stripes[role].write_end = stripes[role].req_end;
|
stripes[role].write_end = stripes[role].req_end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
int write_parity = 0;
|
||||||
|
for (int role = pg_minsize; role < pg_size; role++)
|
||||||
|
{
|
||||||
|
if (write_osd_set[role] != 0)
|
||||||
|
{
|
||||||
|
write_parity = 1;
|
||||||
|
stripes[role].write_start = start;
|
||||||
|
stripes[role].write_end = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (write_parity)
|
||||||
|
{
|
||||||
for (int role = 0; role < pg_minsize; role++)
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
{
|
{
|
||||||
cover_read(start, end, stripes[role]);
|
cover_read(start, end, stripes[role]);
|
||||||
}
|
}
|
||||||
int has_parity = 0;
|
|
||||||
for (int role = pg_minsize; role < pg_size; role++)
|
|
||||||
{
|
|
||||||
if (osd_set[role] != 0)
|
|
||||||
{
|
|
||||||
has_parity++;
|
|
||||||
stripes[role].write_start = start;
|
|
||||||
stripes[role].write_end = end;
|
|
||||||
}
|
}
|
||||||
else
|
if (write_osd_set != read_osd_set)
|
||||||
stripes[role].missing = true;
|
{
|
||||||
|
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
||||||
|
for (int role = 0; role < pg_size; role++)
|
||||||
|
{
|
||||||
|
if (write_osd_set[role] != read_osd_set[role])
|
||||||
|
{
|
||||||
|
// We need to get data for any moved / recovered chunk
|
||||||
|
// And we need a continuous write buffer so we'll only optimize
|
||||||
|
// for the case when the whole chunk is ovewritten in the request
|
||||||
|
if (stripes[role].req_start != 0 ||
|
||||||
|
stripes[role].req_end != chunk_size)
|
||||||
|
{
|
||||||
|
stripes[role].read_start = 0;
|
||||||
|
stripes[role].read_end = chunk_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (pg_cursize < pg_size)
|
if (pg_cursize < pg_size)
|
||||||
{
|
{
|
||||||
if (has_parity == 0)
|
// Some stripe(s) are missing, so we need to read parity
|
||||||
|
for (int role = 0; role < pg_size; role++)
|
||||||
{
|
{
|
||||||
// Parity is missing, we don't need to read anything
|
if (read_osd_set[role] == 0 && stripes[role].read_end != 0)
|
||||||
for (int role = 0; role < pg_minsize; role++)
|
|
||||||
{
|
|
||||||
stripes[role].read_end = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Other stripe(s) are missing
|
|
||||||
for (int role = 0; role < pg_minsize; role++)
|
|
||||||
{
|
|
||||||
if (osd_set[role] == 0 && stripes[role].read_end != 0)
|
|
||||||
{
|
{
|
||||||
stripes[role].missing = true;
|
stripes[role].missing = true;
|
||||||
for (int r2 = 0; r2 < pg_size; r2++)
|
int found = 0;
|
||||||
|
for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++)
|
||||||
{
|
{
|
||||||
// Read the non-covered range of <role> from all other stripes to reconstruct it
|
// Read the non-covered range of <role> from at least <minsize> other stripes to reconstruct it
|
||||||
if (r2 != role && osd_set[r2] != 0)
|
if (read_osd_set[r2] != 0)
|
||||||
{
|
{
|
||||||
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]);
|
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]);
|
||||||
|
found++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (found < pg_minsize)
|
||||||
|
{
|
||||||
|
// Incomplete object (FIXME)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Allocate read buffers
|
// Allocate read buffers
|
||||||
void *rmw_buf = alloc_read_buffer(stripes, pg_size, has_parity * (end - start));
|
void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start));
|
||||||
// Position parity & write buffers
|
// Position write buffers
|
||||||
uint64_t buf_pos = 0, in_pos = 0;
|
uint64_t buf_pos = 0, in_pos = 0;
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_size; role++)
|
||||||
{
|
{
|
||||||
if (stripes[role].req_end != 0)
|
if (stripes[role].req_end != 0)
|
||||||
{
|
{
|
||||||
stripes[role].write_buf = write_buf + in_pos;
|
stripes[role].write_buf = request_buf + in_pos;
|
||||||
in_pos += stripes[role].req_end - stripes[role].req_start;
|
in_pos += stripes[role].req_end - stripes[role].req_start;
|
||||||
}
|
}
|
||||||
else if (role >= pg_minsize && osd_set[role] != 0)
|
else if (role >= pg_minsize && read_osd_set[role] != 0)
|
||||||
{
|
{
|
||||||
stripes[role].write_buf = rmw_buf + buf_pos;
|
stripes[role].write_buf = rmw_buf + buf_pos;
|
||||||
buf_pos += end - start;
|
buf_pos += end - start;
|
||||||
|
@ -321,8 +341,9 @@ static void xor_multiple_buffers(buf_len_t *xor1, int n1, buf_len_t *xor2, int n
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size)
|
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size)
|
||||||
{
|
{
|
||||||
|
int pg_minsize = pg_size-1;
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_size; role++)
|
||||||
{
|
{
|
||||||
if (stripes[role].read_end != 0 && stripes[role].missing)
|
if (stripes[role].read_end != 0 && stripes[role].missing)
|
||||||
|
@ -332,21 +353,42 @@ void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
uint32_t start = 0, end = 0;
|
||||||
|
if (!stripes[pg_minsize].missing || write_osd_set != read_osd_set)
|
||||||
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size)
|
|
||||||
{
|
|
||||||
if (stripes[pg_size-1].missing)
|
|
||||||
{
|
{
|
||||||
// Parity OSD is unavailable
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
return;
|
{
|
||||||
|
if (stripes[role].req_end != 0)
|
||||||
|
{
|
||||||
|
start = !end || stripes[role].req_start < start ? stripes[role].req_start : start;
|
||||||
|
end = std::max(stripes[role].req_end, end);
|
||||||
}
|
}
|
||||||
reconstruct_stripes(stripes, pg_size);
|
}
|
||||||
|
}
|
||||||
|
if (write_osd_set != read_osd_set)
|
||||||
|
{
|
||||||
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
|
{
|
||||||
|
if (write_osd_set[role] != read_osd_set[role] &&
|
||||||
|
(stripes[role].req_start != 0 || stripes[role].req_end != chunk_size))
|
||||||
|
{
|
||||||
|
// Copy modified chunk into the read buffer to write it back
|
||||||
|
memcpy(
|
||||||
|
stripes[role].read_buf + stripes[role].req_start,
|
||||||
|
stripes[role].write_buf,
|
||||||
|
stripes[role].req_end - stripes[role].req_start
|
||||||
|
);
|
||||||
|
stripes[role].write_buf = stripes[role].read_buf;
|
||||||
|
stripes[role].write_start = 0;
|
||||||
|
stripes[role].write_end = chunk_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!stripes[pg_minsize].missing)
|
||||||
|
{
|
||||||
// Calculate new parity (EC k+1)
|
// Calculate new parity (EC k+1)
|
||||||
int parity = pg_size-1, prev = -2;
|
int parity = pg_minsize, prev = -2;
|
||||||
auto wr_end = stripes[parity].write_end;
|
for (int other = 0; other < pg_minsize; other++)
|
||||||
auto wr_start = stripes[parity].write_start;
|
|
||||||
for (int other = 0; other < pg_size-1; other++)
|
|
||||||
{
|
{
|
||||||
if (prev == -2)
|
if (prev == -2)
|
||||||
{
|
{
|
||||||
|
@ -358,15 +400,34 @@ void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size)
|
||||||
buf_len_t xor1[3], xor2[3];
|
buf_len_t xor1[3], xor2[3];
|
||||||
if (prev == -1)
|
if (prev == -1)
|
||||||
{
|
{
|
||||||
xor1[n1++] = { .buf = stripes[parity].write_buf, .len = wr_end-wr_start };
|
xor1[n1++] = { .buf = stripes[parity].write_buf, .len = end-start };
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
get_old_new_buffers(stripes[prev], wr_start, wr_end, xor1, n1);
|
get_old_new_buffers(stripes[prev], start, end, xor1, n1);
|
||||||
prev = -1;
|
prev = -1;
|
||||||
}
|
}
|
||||||
get_old_new_buffers(stripes[other], wr_start, wr_end, xor2, n2);
|
get_old_new_buffers(stripes[other], start, end, xor2, n2);
|
||||||
xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, wr_end-wr_start);
|
xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, end-start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (write_osd_set != read_osd_set)
|
||||||
|
{
|
||||||
|
for (int role = pg_minsize; role < pg_size; role++)
|
||||||
|
{
|
||||||
|
if (write_osd_set[role] != read_osd_set[role] && (start != 0 || end != chunk_size))
|
||||||
|
{
|
||||||
|
// Copy new parity into the read buffer to write it back
|
||||||
|
memcpy(
|
||||||
|
stripes[role].read_buf + start,
|
||||||
|
stripes[role].write_buf,
|
||||||
|
end - start
|
||||||
|
);
|
||||||
|
stripes[role].write_buf = stripes[role].read_buf;
|
||||||
|
stripes[role].write_start = 0;
|
||||||
|
stripes[role].write_end = chunk_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,14 +25,13 @@ struct osd_rmw_stripe_t
|
||||||
|
|
||||||
void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t len, osd_rmw_stripe_t *stripes);
|
void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t len, osd_rmw_stripe_t *stripes);
|
||||||
|
|
||||||
void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size);
|
|
||||||
|
|
||||||
void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role);
|
void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role);
|
||||||
|
|
||||||
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int minsize, int size);
|
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int minsize, int size);
|
||||||
|
|
||||||
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size);
|
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size);
|
||||||
|
|
||||||
void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize);
|
void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set,
|
||||||
|
uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size);
|
||||||
|
|
||||||
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size);
|
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size);
|
||||||
|
|
117
osd_rmw_test.cpp
117
osd_rmw_test.cpp
|
@ -2,6 +2,64 @@
|
||||||
#include "osd_rmw.cpp"
|
#include "osd_rmw.cpp"
|
||||||
#include "test_pattern.h"
|
#include "test_pattern.h"
|
||||||
|
|
||||||
|
/***
|
||||||
|
|
||||||
|
Cases:
|
||||||
|
|
||||||
|
1. split(offset=128K-4K, len=8K)
|
||||||
|
= [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 0 ] ]
|
||||||
|
|
||||||
|
2. read(offset=128K-4K, len=8K, osd_set=[1,0,3])
|
||||||
|
= { read: [ [ 0, 128K ], [ 0, 4K ], [ 0, 4K ] ] }
|
||||||
|
|
||||||
|
3. cover_read(0, 128K, { req: [ 128K-4K, 4K ] })
|
||||||
|
= { read: [ 0, 128K-4K ] }
|
||||||
|
|
||||||
|
4. write(offset=128K-4K, len=8K, osd_set=[1,0,3])
|
||||||
|
= {
|
||||||
|
read: [ [ 0, 128K ], [ 4K, 128K ], [ 4K, 128K ] ],
|
||||||
|
write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write0, write1 ],
|
||||||
|
rmw buffer: [ write2, read0, read1, read2 ],
|
||||||
|
}
|
||||||
|
+ check write2 buffer
|
||||||
|
|
||||||
|
5. write(offset=0, len=128K+64K, osd_set=[1,0,3])
|
||||||
|
= {
|
||||||
|
req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ],
|
||||||
|
read: [ [ 64K, 128K ], [ 64K, 128K ], [ 64K, 128K ] ],
|
||||||
|
write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write0, write1 ],
|
||||||
|
rmw buffer: [ write2, read0, read1, read2 ],
|
||||||
|
}
|
||||||
|
|
||||||
|
6. write(offset=0, len=128K+64K, osd_set=[1,2,3])
|
||||||
|
= {
|
||||||
|
req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ],
|
||||||
|
read: [ [ 0, 0 ], [ 64K, 128K ], [ 0, 0 ] ],
|
||||||
|
write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write0, write1 ],
|
||||||
|
rmw buffer: [ write2, read1 ],
|
||||||
|
}
|
||||||
|
|
||||||
|
7. calc_rmw(offset=128K-4K, len=8K, osd_set=[1,0,3], write_set=[1,2,3])
|
||||||
|
= {
|
||||||
|
read: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ],
|
||||||
|
write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write0, write1 ],
|
||||||
|
rmw buffer: [ write2, read0, read1, read2 ],
|
||||||
|
}
|
||||||
|
then, after calc_rmw_parity(): {
|
||||||
|
write: [ [ 128K-4K, 128K ], [ 0, 128K ], [ 0, 128K ] ],
|
||||||
|
write1==read1,
|
||||||
|
}
|
||||||
|
+ check write1 buffer
|
||||||
|
+ check write2 buffer
|
||||||
|
|
||||||
|
***/
|
||||||
|
|
||||||
|
void test7();
|
||||||
|
|
||||||
int main(int narg, char *args[])
|
int main(int narg, char *args[])
|
||||||
{
|
{
|
||||||
osd_num_t osd_set[3] = { 1, 0, 3 };
|
osd_num_t osd_set[3] = { 1, 0, 3 };
|
||||||
|
@ -28,10 +86,13 @@ int main(int narg, char *args[])
|
||||||
memset(stripes, 0, sizeof(stripes));
|
memset(stripes, 0, sizeof(stripes));
|
||||||
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
||||||
void* write_buf = malloc(8192);
|
void* write_buf = malloc(8192);
|
||||||
void* rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2);
|
void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
||||||
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||||
assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024);
|
assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024);
|
||||||
assert(stripes[2].read_start == 4096 && stripes[2].read_end == 128*1024);
|
assert(stripes[2].read_start == 4096 && stripes[2].read_end == 128*1024);
|
||||||
|
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
||||||
assert(stripes[1].read_buf == rmw_buf+128*1024*2);
|
assert(stripes[1].read_buf == rmw_buf+128*1024*2);
|
||||||
assert(stripes[2].read_buf == rmw_buf+128*1024*3-4096);
|
assert(stripes[2].read_buf == rmw_buf+128*1024*3-4096);
|
||||||
|
@ -43,7 +104,7 @@ int main(int narg, char *args[])
|
||||||
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data
|
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data
|
||||||
set_pattern(stripes[1].read_buf, 128*1024-4096, UINT64_MAX); // didn't read it, it's missing
|
set_pattern(stripes[1].read_buf, 128*1024-4096, UINT64_MAX); // didn't read it, it's missing
|
||||||
set_pattern(stripes[2].read_buf, 128*1024-4096, 0); // old parity = 0
|
set_pattern(stripes[2].read_buf, 128*1024-4096, 0); // old parity = 0
|
||||||
calc_rmw_parity(stripes, 3);
|
calc_rmw_parity(stripes, 3, osd_set, osd_set, 128*1024);
|
||||||
check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity
|
check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity
|
||||||
check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity
|
check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity
|
||||||
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
|
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
|
||||||
|
@ -57,10 +118,13 @@ int main(int narg, char *args[])
|
||||||
assert(stripes[2].req_end == 0);
|
assert(stripes[2].req_end == 0);
|
||||||
// Test 5.2
|
// Test 5.2
|
||||||
write_buf = malloc(64*1024*3);
|
write_buf = malloc(64*1024*3);
|
||||||
rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2);
|
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
||||||
assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024);
|
assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024);
|
||||||
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
||||||
assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024);
|
assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
||||||
assert(stripes[1].read_buf == rmw_buf+64*3*1024);
|
assert(stripes[1].read_buf == rmw_buf+64*3*1024);
|
||||||
assert(stripes[2].read_buf == rmw_buf+64*4*1024);
|
assert(stripes[2].read_buf == rmw_buf+64*4*1024);
|
||||||
|
@ -74,10 +138,13 @@ int main(int narg, char *args[])
|
||||||
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
||||||
osd_set[1] = 2;
|
osd_set[1] = 2;
|
||||||
write_buf = malloc(64*1024*3);
|
write_buf = malloc(64*1024*3);
|
||||||
rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 3);
|
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, osd_set, 128*1024);
|
||||||
assert(stripes[0].read_end == 0);
|
assert(stripes[0].read_end == 0);
|
||||||
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
||||||
assert(stripes[2].read_end == 0);
|
assert(stripes[2].read_end == 0);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
assert(stripes[0].read_buf == 0);
|
assert(stripes[0].read_buf == 0);
|
||||||
assert(stripes[1].read_buf == rmw_buf+128*1024);
|
assert(stripes[1].read_buf == rmw_buf+128*1024);
|
||||||
assert(stripes[2].read_buf == 0);
|
assert(stripes[2].read_buf == 0);
|
||||||
|
@ -87,7 +154,49 @@ int main(int narg, char *args[])
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
osd_set[1] = 0;
|
osd_set[1] = 0;
|
||||||
|
// Test 7
|
||||||
|
test7();
|
||||||
// End
|
// End
|
||||||
printf("all ok\n");
|
printf("all ok\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test7()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 0, 3 };
|
||||||
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
|
// Test 7.1
|
||||||
|
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
||||||
|
void *write_buf = malloc(8192);
|
||||||
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||||
|
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
|
||||||
|
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
|
||||||
|
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
||||||
|
assert(stripes[1].read_buf == rmw_buf+128*1024*2);
|
||||||
|
assert(stripes[2].read_buf == rmw_buf+128*1024*3);
|
||||||
|
assert(stripes[0].write_buf == write_buf);
|
||||||
|
assert(stripes[1].write_buf == write_buf+4096);
|
||||||
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
|
// Test 7.2
|
||||||
|
set_pattern(write_buf, 8192, PATTERN0);
|
||||||
|
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data
|
||||||
|
set_pattern(stripes[1].read_buf, 128*1024, UINT64_MAX); // didn't read it, it's missing
|
||||||
|
set_pattern(stripes[2].read_buf, 128*1024, 0); // old parity = 0
|
||||||
|
calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_buf == stripes[1].read_buf);
|
||||||
|
check_pattern(stripes[1].write_buf, 4096, PATTERN0);
|
||||||
|
check_pattern(stripes[1].write_buf+4096, 128*1024-4096, PATTERN1);
|
||||||
|
check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity
|
||||||
|
check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity
|
||||||
|
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
|
||||||
|
free(rmw_buf);
|
||||||
|
free(write_buf);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue