forked from vitalif/vitastor
Add a test for snapshots, fix bugs. Now the test passes
parent
9038d42327
commit
d0c2e31312
|
@ -0,0 +1,23 @@
|
|||
const fsp = require('fs').promises;
|
||||
|
||||
async function merge(file1, file2, out)
|
||||
{
|
||||
if (!out)
|
||||
{
|
||||
console.error('USAGE: nodejs merge.js layer1 layer2 output');
|
||||
process.exit();
|
||||
}
|
||||
const layer1 = await fsp.readFile(file1);
|
||||
const layer2 = await fsp.readFile(file2);
|
||||
const zero = Buffer.alloc(4096);
|
||||
for (let i = 0; i < layer2.length; i += 4096)
|
||||
{
|
||||
if (zero.compare(layer2, i, i+4096) != 0)
|
||||
{
|
||||
layer2.copy(layer1, i, i, i+4096);
|
||||
}
|
||||
}
|
||||
await fsp.writeFile(out, layer1);
|
||||
}
|
||||
|
||||
merge(process.argv[2], process.argv[3], process.argv[4]);
|
|
@ -153,6 +153,11 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
|||
{
|
||||
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
|
||||
}
|
||||
// FIXME: Due to the recent changes in entry_attr handling rename it back to bitmap
|
||||
if (entry_attr_size > meta_block_size/2)
|
||||
{
|
||||
throw std::runtime_error("entry_attr_size is too big");
|
||||
}
|
||||
if (journal.offset % journal_block_size)
|
||||
{
|
||||
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
||||
|
|
|
@ -8,7 +8,12 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
// Check or assign version number
|
||||
bool found = false, deleted = false, is_del = (op->opcode == BS_OP_DELETE);
|
||||
bool wait_big = false, wait_del = false;
|
||||
void *bmp = NULL;
|
||||
uint64_t version = 1;
|
||||
if (!is_del && entry_attr_size > sizeof(void*))
|
||||
{
|
||||
bmp = calloc_or_die(1, entry_attr_size);
|
||||
}
|
||||
if (dirty_db.size() > 0)
|
||||
{
|
||||
auto dirty_it = dirty_db.upper_bound((obj_ver_id){
|
||||
|
@ -25,6 +30,10 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
|
||||
? !IS_SYNCED(dirty_it->second.state)
|
||||
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
|
||||
if (entry_attr_size > sizeof(void*))
|
||||
memcpy(bmp, dirty_it->second.bitmap, entry_attr_size);
|
||||
else
|
||||
bmp = dirty_it->second.bitmap;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
|
@ -33,6 +42,8 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
if (clean_it != clean_db.end())
|
||||
{
|
||||
version = clean_it->second.version + 1;
|
||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
||||
memcpy((entry_attr_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, entry_attr_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -72,6 +83,10 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
{
|
||||
// Invalid version requested
|
||||
op->retval = -EEXIST;
|
||||
if (!is_del && entry_attr_size > sizeof(void*))
|
||||
{
|
||||
free(bmp);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -95,7 +110,6 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
#endif
|
||||
// FIXME No strict need to add it into dirty_db here, it's just left
|
||||
// from the previous implementation where reads waited for writes
|
||||
void *bmp = NULL;
|
||||
uint32_t state;
|
||||
if (is_del)
|
||||
state = BS_ST_DELETE | BS_ST_IN_FLIGHT;
|
||||
|
@ -110,10 +124,28 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||
state |= BS_ST_IN_FLIGHT;
|
||||
if (op->opcode == BS_OP_WRITE_STABLE)
|
||||
state |= BS_ST_INSTANT;
|
||||
if (entry_attr_size > sizeof(void*))
|
||||
bmp = calloc_or_die(1, entry_attr_size);
|
||||
if (op->bitmap)
|
||||
memcpy((entry_attr_size > sizeof(void*) ? bmp : &bmp), op->bitmap, entry_attr_size);
|
||||
{
|
||||
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
||||
uint8_t *bmp_ptr = (uint8_t*)(entry_attr_size > sizeof(void*) ? bmp : &bmp);
|
||||
uint32_t bit = op->offset/bitmap_granularity;
|
||||
uint32_t bits_left = op->len/bitmap_granularity;
|
||||
while (!(bit % 8) && bits_left > 8)
|
||||
{
|
||||
// Copy bytes
|
||||
bmp_ptr[bit/8] = ((uint8_t*)op->bitmap)[bit/8];
|
||||
bit += 8;
|
||||
bits_left -= 8;
|
||||
}
|
||||
while (bits_left > 0)
|
||||
{
|
||||
// Copy bits
|
||||
bmp_ptr[bit/8] = (bmp_ptr[bit/8] & ~(1 << (bit%8)))
|
||||
| (((uint8_t*)op->bitmap)[bit/8] & (1 << bit%8));
|
||||
bit++;
|
||||
bits_left--;
|
||||
}
|
||||
}
|
||||
}
|
||||
dirty_db.emplace((obj_ver_id){
|
||||
.oid = op->oid,
|
||||
|
|
|
@ -737,7 +737,11 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
|||
}
|
||||
assert(cur > prev);
|
||||
if (skip_prev)
|
||||
{
|
||||
// Just advance iov_idx & iov_pos
|
||||
add_iov(end-prev, true, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
|
||||
end = prev;
|
||||
}
|
||||
else
|
||||
add_iov(cur-prev, skip_prev, op, iov_idx, iov_pos, op->parts[i].iov, scrap_buffer, scrap_buffer_size);
|
||||
if (end == begin)
|
||||
|
|
|
@ -271,6 +271,12 @@ void etcd_state_client_t::load_pgs()
|
|||
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/config/inode0") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
|
||||
|
@ -666,7 +672,9 @@ void etcd_state_client_t::parse_state(const std::string & key, const json11::Jso
|
|||
if (parent_inode_num && !(parent_inode_num >> (64-POOL_ID_BITS)))
|
||||
{
|
||||
uint64_t parent_pool_id = value["parent_pool"].uint64_value();
|
||||
if (parent_pool_id >= POOL_ID_MAX)
|
||||
if (!parent_pool_id)
|
||||
parent_inode_num |= pool_id << (64-POOL_ID_BITS);
|
||||
else if (parent_pool_id >= POOL_ID_MAX)
|
||||
{
|
||||
printf(
|
||||
"Inode %lu/%lu parent_pool value is invalid, ignoring parent setting\n",
|
||||
|
|
|
@ -273,11 +273,9 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
|||
osd_op_t *op = req_it->second;
|
||||
memcpy(op->reply.buf, cl->read_op->req.buf, OSD_PACKET_SIZE);
|
||||
cl->sent_ops.erase(req_it);
|
||||
if ((op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ) &&
|
||||
op->reply.hdr.retval > 0)
|
||||
if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ)
|
||||
{
|
||||
// Read data. In this case we assume that the buffer is preallocated by the caller (!)
|
||||
assert(op->iov.count > 0);
|
||||
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len);
|
||||
if (op->reply.hdr.retval != (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len) ||
|
||||
bmp_len > op->bitmap_len)
|
||||
|
@ -292,11 +290,19 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
|||
{
|
||||
cl->recv_list.push_back(op->bitmap, bmp_len);
|
||||
}
|
||||
if (op->reply.hdr.retval > 0)
|
||||
{
|
||||
assert(op->iov.count > 0);
|
||||
cl->recv_list.append(op->iov);
|
||||
}
|
||||
cl->read_remaining = op->reply.hdr.retval + bmp_len;
|
||||
if (cl->read_remaining == 0)
|
||||
{
|
||||
goto reuse;
|
||||
}
|
||||
delete cl->read_op;
|
||||
cl->read_op = op;
|
||||
cl->read_state = CL_READ_REPLY_DATA;
|
||||
cl->read_remaining = op->reply.hdr.retval + bmp_len;
|
||||
}
|
||||
else if (op->reply.hdr.opcode == OSD_OP_SEC_LIST && op->reply.hdr.retval > 0)
|
||||
{
|
||||
|
@ -320,6 +326,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
|||
}
|
||||
else
|
||||
{
|
||||
reuse:
|
||||
// It's fine to reuse cl->read_op for the next reply
|
||||
handle_reply_ready(op);
|
||||
cl->recv_list.push_back(cl->read_op->req.buf, OSD_PACKET_SIZE);
|
||||
|
|
|
@ -21,7 +21,7 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
|||
|
||||
// Force external bitmap size
|
||||
entry_attr_size = bs_block_size / bs_bitmap_granularity / 8;
|
||||
config["entry_attr_size"] = entry_attr_size;
|
||||
config["entry_attr_size"] = std::to_string(entry_attr_size);
|
||||
|
||||
this->config = config;
|
||||
this->ringloop = ringloop;
|
||||
|
|
|
@ -154,6 +154,7 @@ resume_2:
|
|||
finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO);
|
||||
return;
|
||||
}
|
||||
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * entry_attr_size;
|
||||
if (op_data->degraded)
|
||||
{
|
||||
// Reconstruct missing stripes
|
||||
|
@ -166,6 +167,7 @@ resume_2:
|
|||
{
|
||||
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, entry_attr_size);
|
||||
}
|
||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||
for (int role = 0; role < op_data->pg_size; role++)
|
||||
{
|
||||
if (stripes[role].req_end != 0)
|
||||
|
@ -180,10 +182,9 @@ resume_2:
|
|||
}
|
||||
else
|
||||
{
|
||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
|
||||
}
|
||||
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * entry_attr_size;
|
||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||
finish_op(cur_op, cur_op->req.rw.len);
|
||||
}
|
||||
|
||||
|
|
|
@ -154,6 +154,8 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
|||
{
|
||||
clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin);
|
||||
subops[i].op_type = (uint64_t)cur_op;
|
||||
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
||||
subops[i].bitmap_len = entry_attr_size;
|
||||
subops[i].bs_op = new blockstore_op_t({
|
||||
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
|
||||
.callback = [subop = &subops[i], this](blockstore_op_t *bs_subop)
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash -ex
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((1024*1024-1))
|
||||
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((1024*1024-1))
|
||||
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((1024*1024-1))
|
||||
|
||||
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
|
||||
OSD1_PID=$!
|
||||
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
|
||||
OSD2_PID=$!
|
||||
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
|
||||
OSD3_PID=$!
|
||||
|
||||
cd mon
|
||||
npm install
|
||||
cd ..
|
||||
node mon/mon-main.js --etcd_url http://$ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
||||
MON_PID=$!
|
||||
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1,"pg_count":1,"failure_domain":"osd"}}'
|
||||
|
||||
sleep 2
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and (.[0].items["1"]["1"].osd_set | sort) == ["1","2","3"]'); then
|
||||
format_error "FAILED: 1 PG NOT CONFIGURED"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/pg/state/1/1 --print-value-only | jq -s -e '(. | length) != 0 and .[0].state == ["active"]'); then
|
||||
format_error "FAILED: 1 PG NOT UP"
|
||||
fi
|
||||
|
||||
if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then
|
||||
sudo rm -f /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
||||
sudo ln -s "$(realpath .)/build/src/block-vitastor.so" /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
||||
fi
|
||||
|
||||
# Test basic write and snapshot
|
||||
|
||||
LD_PRELOAD=libasan.so.5 \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/2 '{"name":"testimg@0"}'
|
||||
$ETCDCTL put /vitastor/config/inode/1/3 '{"parent_id":2,"name":"testimg"}'
|
||||
|
||||
LD_PRELOAD=libasan.so.5 \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface -rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M -number_ios=1024
|
||||
|
||||
LD_PRELOAD=libasan.so.5 \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -rw=read -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||
-O raw ./testdata/merged.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=2:size=$((32*1024*1024))" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg"}'
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
|
||||
node mon/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/check.bin
|
||||
|
||||
format_green OK
|
Loading…
Reference in New Issue