forked from vitalif/vitastor
Use clean_entry_bitmap_size instead of entry_attr_size back because of changed bitmap handling
parent
d0c2e31312
commit
ab39ce2bbb
|
@ -65,9 +65,8 @@ Input:
|
||||||
- offset, len = offset and length within object. length may be zero, in that case
|
- offset, len = offset and length within object. length may be zero, in that case
|
||||||
read operation only returns the version / write operation only bumps the version
|
read operation only returns the version / write operation only bumps the version
|
||||||
- buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0.
|
- buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0.
|
||||||
- bitmap = pointer to <entry_attr_size> bytes long (usually very short) arbitrary data
|
- bitmap = pointer to the new 'external' object bitmap data. Its part which is respective to the
|
||||||
stored for each object in the metadata area.
|
write request is copied into the metadata area bitwise and stored there.
|
||||||
Called "bitmap" because it's used for the "external bitmap" in Vitastor.
|
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
||||||
|
|
|
@ -428,7 +428,7 @@ resume_1:
|
||||||
{
|
{
|
||||||
new_clean_bitmap = (bs->inmemory_meta
|
new_clean_bitmap = (bs->inmemory_meta
|
||||||
? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
|
? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
|
||||||
: bs->clean_bitmap + (clean_loc >> bs->block_order)*(bs->clean_entry_bitmap_size + bs->entry_attr_size));
|
: bs->clean_bitmap + (clean_loc >> bs->block_order)*(2*bs->clean_entry_bitmap_size));
|
||||||
if (clean_init_bitmap)
|
if (clean_init_bitmap)
|
||||||
{
|
{
|
||||||
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
|
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
|
||||||
|
@ -510,11 +510,11 @@ resume_1:
|
||||||
{
|
{
|
||||||
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
|
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
if (bs->entry_attr_size)
|
// copy latest external bitmap/attributes
|
||||||
|
if (bs->clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
// copy latest external bitmap/attributes
|
void *bmp_ptr = bs->clean_entry_bitmap_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
|
||||||
void *bmp_ptr = bs->entry_attr_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
|
memcpy((void*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->clean_entry_bitmap_size);
|
||||||
memcpy((void*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->entry_attr_size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await_sqe(6);
|
await_sqe(6);
|
||||||
|
|
|
@ -218,7 +218,7 @@ class blockstore_impl_t
|
||||||
|
|
||||||
uint32_t block_order;
|
uint32_t block_order;
|
||||||
uint64_t block_count;
|
uint64_t block_count;
|
||||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, entry_attr_size = 0;
|
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
|
||||||
|
|
||||||
int meta_fd;
|
int meta_fd;
|
||||||
int data_fd;
|
int data_fd;
|
||||||
|
|
|
@ -98,9 +98,9 @@ void blockstore_init_meta::handle_entries(void* entries, unsigned count, int blo
|
||||||
for (unsigned i = 0; i < count; i++)
|
for (unsigned i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size);
|
clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size);
|
||||||
if (!bs->inmemory_meta && (bs->clean_entry_bitmap_size || bs->entry_attr_size))
|
if (!bs->inmemory_meta && bs->clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
memcpy(bs->clean_bitmap + (done_cnt+i)*(bs->clean_entry_bitmap_size + bs->entry_attr_size), &entry->bitmap, (bs->clean_entry_bitmap_size + bs->entry_attr_size));
|
memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->clean_entry_bitmap_size, &entry->bitmap, 2*bs->clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
if (entry->oid.inode > 0)
|
if (entry->oid.inode > 0)
|
||||||
{
|
{
|
||||||
|
@ -550,9 +550,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.version = je->small_write.version,
|
.version = je->small_write.version,
|
||||||
};
|
};
|
||||||
void *bmp = (void*)je + sizeof(journal_entry_small_write);
|
void *bmp = (void*)je + sizeof(journal_entry_small_write);
|
||||||
if (bs->entry_attr_size <= sizeof(void*))
|
if (bs->clean_entry_bitmap_size <= sizeof(void*))
|
||||||
{
|
{
|
||||||
memcpy(&bmp, bmp, bs->entry_attr_size);
|
memcpy(&bmp, bmp, bs->clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (!bs->journal.inmemory)
|
else if (!bs->journal.inmemory)
|
||||||
{
|
{
|
||||||
|
@ -560,8 +560,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
// will result in a lot of small allocations for entry bitmaps. This can
|
// will result in a lot of small allocations for entry bitmaps. This can
|
||||||
// only be fixed by using a patched map with dynamic entry size, but not
|
// only be fixed by using a patched map with dynamic entry size, but not
|
||||||
// the btree_map, because it doesn't keep iterators valid all the time.
|
// the btree_map, because it doesn't keep iterators valid all the time.
|
||||||
void *bmp_cp = malloc_or_die(bs->entry_attr_size);
|
void *bmp_cp = malloc_or_die(bs->clean_entry_bitmap_size);
|
||||||
memcpy(bmp_cp, bmp, bs->entry_attr_size);
|
memcpy(bmp_cp, bmp, bs->clean_entry_bitmap_size);
|
||||||
bmp = bmp_cp;
|
bmp = bmp_cp;
|
||||||
}
|
}
|
||||||
bs->dirty_db.emplace(ov, (dirty_entry){
|
bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
|
@ -630,9 +630,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.version = je->big_write.version,
|
.version = je->big_write.version,
|
||||||
};
|
};
|
||||||
void *bmp = (void*)je + sizeof(journal_entry_big_write);
|
void *bmp = (void*)je + sizeof(journal_entry_big_write);
|
||||||
if (bs->entry_attr_size <= sizeof(void*))
|
if (bs->clean_entry_bitmap_size <= sizeof(void*))
|
||||||
{
|
{
|
||||||
memcpy(&bmp, bmp, bs->entry_attr_size);
|
memcpy(&bmp, bmp, bs->clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (!bs->journal.inmemory)
|
else if (!bs->journal.inmemory)
|
||||||
{
|
{
|
||||||
|
@ -640,8 +640,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
// will result in a lot of small allocations for entry bitmaps. This can
|
// will result in a lot of small allocations for entry bitmaps. This can
|
||||||
// only be fixed by using a patched map with dynamic entry size, but not
|
// only be fixed by using a patched map with dynamic entry size, but not
|
||||||
// the btree_map, because it doesn't keep iterators valid all the time.
|
// the btree_map, because it doesn't keep iterators valid all the time.
|
||||||
void *bmp_cp = malloc_or_die(bs->entry_attr_size);
|
void *bmp_cp = malloc_or_die(bs->clean_entry_bitmap_size);
|
||||||
memcpy(bmp_cp, bmp, bs->entry_attr_size);
|
memcpy(bmp_cp, bmp, bs->clean_entry_bitmap_size);
|
||||||
bmp = bmp_cp;
|
bmp = bmp_cp;
|
||||||
}
|
}
|
||||||
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
|
|
|
@ -62,7 +62,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
||||||
meta_device = config["meta_device"];
|
meta_device = config["meta_device"];
|
||||||
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
||||||
entry_attr_size = strtoull(config["entry_attr_size"].c_str(), NULL, 10);
|
|
||||||
block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
||||||
inmemory_meta = config["inmemory_metadata"] != "false";
|
inmemory_meta = config["inmemory_metadata"] != "false";
|
||||||
journal_device = config["journal_device"];
|
journal_device = config["journal_device"];
|
||||||
|
@ -153,11 +152,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
|
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
|
||||||
}
|
}
|
||||||
// FIXME: Due to the recent changes in entry_attr handling rename it back to bitmap
|
|
||||||
if (entry_attr_size > meta_block_size/2)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("entry_attr_size is too big");
|
|
||||||
}
|
|
||||||
if (journal.offset % journal_block_size)
|
if (journal.offset % journal_block_size)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
||||||
|
@ -188,7 +182,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
}
|
}
|
||||||
// init some fields
|
// init some fields
|
||||||
clean_entry_bitmap_size = block_size / bitmap_granularity / 8;
|
clean_entry_bitmap_size = block_size / bitmap_granularity / 8;
|
||||||
clean_entry_size = sizeof(clean_disk_entry) + clean_entry_bitmap_size + entry_attr_size;
|
clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
||||||
journal.block_size = journal_block_size;
|
journal.block_size = journal_block_size;
|
||||||
journal.next_free = journal_block_size;
|
journal.next_free = journal_block_size;
|
||||||
journal.used_start = journal_block_size;
|
journal.used_start = journal_block_size;
|
||||||
|
@ -253,9 +247,9 @@ void blockstore_impl_t::calc_lengths()
|
||||||
if (!metadata_buffer)
|
if (!metadata_buffer)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata");
|
throw std::runtime_error("Failed to allocate memory for the metadata");
|
||||||
}
|
}
|
||||||
else if (clean_entry_bitmap_size || entry_attr_size)
|
else if (clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
clean_bitmap = (uint8_t*)malloc(block_count * (clean_entry_bitmap_size + entry_attr_size));
|
clean_bitmap = (uint8_t*)malloc(block_count * 2*clean_entry_bitmap_size);
|
||||||
if (!clean_bitmap)
|
if (!clean_bitmap)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,7 +105,7 @@ uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offse
|
||||||
clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset);
|
clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*(clean_entry_bitmap_size + entry_attr_size) + offset);
|
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*2*clean_entry_bitmap_size + offset);
|
||||||
return clean_entry_bitmap;
|
return clean_entry_bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,8 +151,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
result_version = dirty_it->first.version;
|
result_version = dirty_it->first.version;
|
||||||
if (read_op->bitmap)
|
if (read_op->bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
void *bmp_ptr = (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
||||||
memcpy(read_op->bitmap, bmp_ptr, entry_attr_size);
|
memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
||||||
|
@ -178,7 +178,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
if (read_op->bitmap)
|
if (read_op->bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
||||||
memcpy(read_op->bitmap, bmp_ptr, entry_attr_size);
|
memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fulfilled < read_op->len)
|
if (fulfilled < read_op->len)
|
||||||
|
|
|
@ -268,7 +268,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
{
|
{
|
||||||
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
||||||
}
|
}
|
||||||
if (entry_attr_size > sizeof(void*))
|
if (clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
free(dirty_it->second.bitmap);
|
free(dirty_it->second.bitmap);
|
||||||
dirty_it->second.bitmap = NULL;
|
dirty_it->second.bitmap = NULL;
|
||||||
|
|
|
@ -10,9 +10,9 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
bool wait_big = false, wait_del = false;
|
bool wait_big = false, wait_del = false;
|
||||||
void *bmp = NULL;
|
void *bmp = NULL;
|
||||||
uint64_t version = 1;
|
uint64_t version = 1;
|
||||||
if (!is_del && entry_attr_size > sizeof(void*))
|
if (!is_del && clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
bmp = calloc_or_die(1, entry_attr_size);
|
bmp = calloc_or_die(1, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
if (dirty_db.size() > 0)
|
if (dirty_db.size() > 0)
|
||||||
{
|
{
|
||||||
|
@ -30,8 +30,8 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
|
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
|
||||||
? !IS_SYNCED(dirty_it->second.state)
|
? !IS_SYNCED(dirty_it->second.state)
|
||||||
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
|
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
|
||||||
if (entry_attr_size > sizeof(void*))
|
if (clean_entry_bitmap_size > sizeof(void*))
|
||||||
memcpy(bmp, dirty_it->second.bitmap, entry_attr_size);
|
memcpy(bmp, dirty_it->second.bitmap, clean_entry_bitmap_size);
|
||||||
else
|
else
|
||||||
bmp = dirty_it->second.bitmap;
|
bmp = dirty_it->second.bitmap;
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
version = clean_it->second.version + 1;
|
version = clean_it->second.version + 1;
|
||||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
||||||
memcpy((entry_attr_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, entry_attr_size);
|
memcpy((clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -83,7 +83,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
// Invalid version requested
|
// Invalid version requested
|
||||||
op->retval = -EEXIST;
|
op->retval = -EEXIST;
|
||||||
if (!is_del && entry_attr_size > sizeof(void*))
|
if (!is_del && clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
free(bmp);
|
free(bmp);
|
||||||
}
|
}
|
||||||
|
@ -127,7 +127,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
if (op->bitmap)
|
if (op->bitmap)
|
||||||
{
|
{
|
||||||
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
||||||
uint8_t *bmp_ptr = (uint8_t*)(entry_attr_size > sizeof(void*) ? bmp : &bmp);
|
uint8_t *bmp_ptr = (uint8_t*)(clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp);
|
||||||
uint32_t bit = op->offset/bitmap_granularity;
|
uint32_t bit = op->offset/bitmap_granularity;
|
||||||
uint32_t bits_left = op->len/bitmap_granularity;
|
uint32_t bits_left = op->len/bitmap_granularity;
|
||||||
while (!(bit % 8) && bits_left > 8)
|
while (!(bit % 8) && bits_left > 8)
|
||||||
|
@ -166,7 +166,7 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
|
||||||
{
|
{
|
||||||
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
||||||
{
|
{
|
||||||
if (entry_attr_size > sizeof(void*))
|
if (clean_entry_bitmap_size > sizeof(void*))
|
||||||
free(dirty_it->second.bitmap);
|
free(dirty_it->second.bitmap);
|
||||||
dirty_db.erase(dirty_it++);
|
dirty_db.erase(dirty_it++);
|
||||||
}
|
}
|
||||||
|
@ -345,7 +345,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
// Then pre-fill journal entry
|
// Then pre-fill journal entry
|
||||||
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
||||||
sizeof(journal_entry_small_write) + entry_attr_size
|
sizeof(journal_entry_small_write) + clean_entry_bitmap_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -364,7 +364,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->data_offset = journal.next_free;
|
je->data_offset = journal.next_free;
|
||||||
je->crc32_data = crc32c(0, op->buf, op->len);
|
je->crc32_data = crc32c(0, op->buf, op->len);
|
||||||
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size);
|
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
if (immediate_commit != IMMEDIATE_NONE)
|
if (immediate_commit != IMMEDIATE_NONE)
|
||||||
|
@ -437,7 +437,7 @@ resume_2:
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
BS_SUBMIT_GET_SQE_DECL(sqe);
|
||||||
je = (journal_entry_big_write*)prefill_single_journal_entry(
|
je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + entry_attr_size
|
sizeof(journal_entry_big_write) + clean_entry_bitmap_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -453,7 +453,7 @@ resume_2:
|
||||||
je->offset = op->offset;
|
je->offset = op->offset;
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->location = dirty_it->second.location;
|
je->location = dirty_it->second.location;
|
||||||
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size);
|
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe,
|
prepare_journal_sector_write(journal, journal.cur_sector, sqe,
|
||||||
|
|
|
@ -18,10 +18,7 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
||||||
bs_block_size = DEFAULT_BLOCK_SIZE;
|
bs_block_size = DEFAULT_BLOCK_SIZE;
|
||||||
if (!bs_bitmap_granularity)
|
if (!bs_bitmap_granularity)
|
||||||
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
|
clean_entry_bitmap_size = bs_block_size / bs_bitmap_granularity / 8;
|
||||||
// Force external bitmap size
|
|
||||||
entry_attr_size = bs_block_size / bs_bitmap_granularity / 8;
|
|
||||||
config["entry_attr_size"] = std::to_string(entry_attr_size);
|
|
||||||
|
|
||||||
this->config = config;
|
this->config = config;
|
||||||
this->ringloop = ringloop;
|
this->ringloop = ringloop;
|
||||||
|
|
|
@ -126,7 +126,7 @@ class osd_t
|
||||||
bool stopping = false;
|
bool stopping = false;
|
||||||
int inflight_ops = 0;
|
int inflight_ops = 0;
|
||||||
blockstore_t *bs;
|
blockstore_t *bs;
|
||||||
uint32_t bs_block_size, bs_bitmap_granularity, entry_attr_size;
|
uint32_t bs_block_size, bs_bitmap_granularity, clean_entry_bitmap_size;
|
||||||
ring_loop_t *ringloop;
|
ring_loop_t *ringloop;
|
||||||
timerfd_manager_t *tfd = NULL;
|
timerfd_manager_t *tfd = NULL;
|
||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
|
|
|
@ -53,7 +53,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||||
}
|
}
|
||||||
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
|
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
|
||||||
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
|
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
|
||||||
1, sizeof(osd_primary_op_data_t) + (entry_attr_size + sizeof(osd_rmw_stripe_t)) * stripe_count
|
1, sizeof(osd_primary_op_data_t) + (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) * stripe_count
|
||||||
);
|
);
|
||||||
op_data->pg_num = pg_num;
|
op_data->pg_num = pg_num;
|
||||||
op_data->oid = oid;
|
op_data->oid = oid;
|
||||||
|
@ -65,7 +65,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||||
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
||||||
for (int i = 0; i < stripe_count; i++)
|
for (int i = 0; i < stripe_count; i++)
|
||||||
{
|
{
|
||||||
op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + entry_attr_size*i;
|
op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + clean_entry_bitmap_size*i;
|
||||||
}
|
}
|
||||||
pg_it->second.inflight++;
|
pg_it->second.inflight++;
|
||||||
return true;
|
return true;
|
||||||
|
@ -154,18 +154,18 @@ resume_2:
|
||||||
finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO);
|
finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * entry_attr_size;
|
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * clean_entry_bitmap_size;
|
||||||
if (op_data->degraded)
|
if (op_data->degraded)
|
||||||
{
|
{
|
||||||
// Reconstruct missing stripes
|
// Reconstruct missing stripes
|
||||||
osd_rmw_stripe_t *stripes = op_data->stripes;
|
osd_rmw_stripe_t *stripes = op_data->stripes;
|
||||||
if (op_data->scheme == POOL_SCHEME_XOR)
|
if (op_data->scheme == POOL_SCHEME_XOR)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_xor(stripes, op_data->pg_size, entry_attr_size);
|
reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, entry_attr_size);
|
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
|
|
|
@ -155,7 +155,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin);
|
clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin);
|
||||||
subops[i].op_type = (uint64_t)cur_op;
|
subops[i].op_type = (uint64_t)cur_op;
|
||||||
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
||||||
subops[i].bitmap_len = entry_attr_size;
|
subops[i].bitmap_len = clean_entry_bitmap_size;
|
||||||
subops[i].bs_op = new blockstore_op_t({
|
subops[i].bs_op = new blockstore_op_t({
|
||||||
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
|
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
|
||||||
.callback = [subop = &subops[i], this](blockstore_op_t *bs_subop)
|
.callback = [subop = &subops[i], this](blockstore_op_t *bs_subop)
|
||||||
|
@ -186,7 +186,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
subops[i].op_type = OSD_OP_OUT;
|
subops[i].op_type = OSD_OP_OUT;
|
||||||
subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
|
subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
|
||||||
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
||||||
subops[i].bitmap_len = entry_attr_size;
|
subops[i].bitmap_len = clean_entry_bitmap_size;
|
||||||
subops[i].req.sec_rw = {
|
subops[i].req.sec_rw = {
|
||||||
.header = {
|
.header = {
|
||||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
@ -200,7 +200,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
.version = op_version,
|
.version = op_version,
|
||||||
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
|
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
|
||||||
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
|
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
|
||||||
.attr_len = wr ? entry_attr_size : 0,
|
.attr_len = wr ? clean_entry_bitmap_size : 0,
|
||||||
};
|
};
|
||||||
#ifdef OSD_DEBUG
|
#ifdef OSD_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
|
|
@ -78,7 +78,7 @@ resume_1:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
||||||
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, entry_attr_size);
|
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
if (!cur_op->rmw_buf)
|
if (!cur_op->rmw_buf)
|
||||||
{
|
{
|
||||||
// Refuse partial overwrite of an incomplete object
|
// Refuse partial overwrite of an incomplete object
|
||||||
|
@ -123,11 +123,11 @@ resume_3:
|
||||||
// Recover missing stripes, calculate parity
|
// Recover missing stripes, calculate parity
|
||||||
if (pg.scheme == POOL_SCHEME_XOR)
|
if (pg.scheme == POOL_SCHEME_XOR)
|
||||||
{
|
{
|
||||||
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, entry_attr_size);
|
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
||||||
{
|
{
|
||||||
calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, entry_attr_size);
|
calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Send writes
|
// Send writes
|
||||||
|
|
|
@ -20,7 +20,7 @@ void osd_t::secondary_op_callback(osd_op_t *op)
|
||||||
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
|
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
{
|
{
|
||||||
if (op->bs_op->retval >= 0)
|
if (op->bs_op->retval >= 0)
|
||||||
op->reply.sec_rw.attr_len = entry_attr_size;
|
op->reply.sec_rw.attr_len = clean_entry_bitmap_size;
|
||||||
else
|
else
|
||||||
op->reply.sec_rw.attr_len = 0;
|
op->reply.sec_rw.attr_len = 0;
|
||||||
if (op->bs_op->retval > 0)
|
if (op->bs_op->retval > 0)
|
||||||
|
@ -62,8 +62,8 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
|
||||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
{
|
{
|
||||||
// Allocate memory for the read operation
|
// Allocate memory for the read operation
|
||||||
if (entry_attr_size > sizeof(unsigned))
|
if (clean_entry_bitmap_size > sizeof(unsigned))
|
||||||
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(entry_attr_size);
|
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
|
||||||
else
|
else
|
||||||
cur_op->bitmap = &cur_op->bmp_data;
|
cur_op->bitmap = &cur_op->bmp_data;
|
||||||
if (cur_op->req.sec_rw.len > 0)
|
if (cur_op->req.sec_rw.len > 0)
|
||||||
|
|
Loading…
Reference in New Issue