Verify data crc32 when reading journal
parent
2039df76a5
commit
9260cd263a
|
@ -91,7 +91,6 @@ void blockstore::loop()
|
||||||
delete journal_init_reader;
|
delete journal_init_reader;
|
||||||
journal_init_reader = NULL;
|
journal_init_reader = NULL;
|
||||||
initialized = 10;
|
initialized = 10;
|
||||||
printf("journal read\n");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,6 +270,8 @@ class blockstore
|
||||||
uint64_t meta_offset, meta_size, meta_area, meta_len;
|
uint64_t meta_offset, meta_size, meta_area, meta_len;
|
||||||
uint64_t data_offset, data_size, data_len;
|
uint64_t data_offset, data_size, data_len;
|
||||||
|
|
||||||
|
// FIXME: add readonly option
|
||||||
|
|
||||||
struct journal_t journal;
|
struct journal_t journal;
|
||||||
journal_flusher_t *flusher;
|
journal_flusher_t *flusher;
|
||||||
|
|
||||||
|
|
|
@ -137,17 +137,18 @@ bool iszero(uint64_t *buf, int len)
|
||||||
|
|
||||||
void blockstore_init_journal::handle_event(ring_data_t *data1)
|
void blockstore_init_journal::handle_event(ring_data_t *data1)
|
||||||
{
|
{
|
||||||
// Step 3: Read journal
|
if (data1->res <= 0)
|
||||||
if (data1->res < 0)
|
|
||||||
{
|
{
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
std::string("read journal failed at offset ") + std::to_string(journal_pos) +
|
std::string("read journal failed at offset ") + std::to_string(journal_pos) +
|
||||||
std::string(": ") + strerror(-data1->res)
|
std::string(": ") + strerror(-data1->res)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
done_pos = journal_pos;
|
done.push_back({
|
||||||
done_buf = submitted;
|
.buf = submitted_buf,
|
||||||
done_len = data1->res;
|
.pos = journal_pos,
|
||||||
|
.len = (uint64_t)data1->res,
|
||||||
|
});
|
||||||
journal_pos += data1->res;
|
journal_pos += data1->res;
|
||||||
if (journal_pos >= bs->journal.len)
|
if (journal_pos >= bs->journal.len)
|
||||||
{
|
{
|
||||||
|
@ -155,7 +156,7 @@ void blockstore_init_journal::handle_event(ring_data_t *data1)
|
||||||
journal_pos = 512;
|
journal_pos = 512;
|
||||||
wrapped = true;
|
wrapped = true;
|
||||||
}
|
}
|
||||||
submitted = 0;
|
submitted_buf = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GET_SQE() \
|
#define GET_SQE() \
|
||||||
|
@ -174,21 +175,23 @@ int blockstore_init_journal::loop()
|
||||||
goto resume_3;
|
goto resume_3;
|
||||||
else if (wait_state == 4)
|
else if (wait_state == 4)
|
||||||
goto resume_4;
|
goto resume_4;
|
||||||
|
else if (wait_state == 5)
|
||||||
|
goto resume_5;
|
||||||
printf("Reading blockstore journal\n");
|
printf("Reading blockstore journal\n");
|
||||||
if (!bs->journal.inmemory)
|
if (!bs->journal.inmemory)
|
||||||
{
|
{
|
||||||
journal_buffer = (uint8_t*)memalign(DISK_ALIGNMENT, 2*JOURNAL_BUFFER_SIZE);
|
submitted_buf = memalign(512, 1024);
|
||||||
if (!journal_buffer)
|
if (!submitted_buf)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
journal_buffer = bs->journal.buffer;
|
submitted_buf = bs->journal.buffer;
|
||||||
// Read first block of the journal
|
// Read first block of the journal
|
||||||
sqe = bs->get_sqe();
|
sqe = bs->get_sqe();
|
||||||
if (!sqe)
|
if (!sqe)
|
||||||
throw std::runtime_error("io_uring is full while trying to read journal");
|
throw std::runtime_error("io_uring is full while trying to read journal");
|
||||||
data = ((ring_data_t*)sqe->user_data);
|
data = ((ring_data_t*)sqe->user_data);
|
||||||
data->iov = { journal_buffer, 512 };
|
data->iov = { submitted_buf, 512 };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
|
@ -199,7 +202,7 @@ resume_1:
|
||||||
wait_state = 1;
|
wait_state = 1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (iszero((uint64_t*)journal_buffer, 3))
|
if (iszero((uint64_t*)submitted_buf, 3))
|
||||||
{
|
{
|
||||||
// Journal is empty
|
// Journal is empty
|
||||||
// FIXME handle this wrapping to 512 better
|
// FIXME handle this wrapping to 512 better
|
||||||
|
@ -209,8 +212,8 @@ resume_1:
|
||||||
// Cool effect. Same operations result in journal replay.
|
// Cool effect. Same operations result in journal replay.
|
||||||
// FIXME: Randomize initial crc32. Track crc32 when trimming.
|
// FIXME: Randomize initial crc32. Track crc32 when trimming.
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
memset(journal_buffer, 0, 1024);
|
memset(submitted_buf, 0, 1024);
|
||||||
*((journal_entry_start*)journal_buffer) = {
|
*((journal_entry_start*)submitted_buf) = {
|
||||||
.crc32 = 0,
|
.crc32 = 0,
|
||||||
.magic = JOURNAL_MAGIC,
|
.magic = JOURNAL_MAGIC,
|
||||||
.type = JE_START,
|
.type = JE_START,
|
||||||
|
@ -218,8 +221,8 @@ resume_1:
|
||||||
.reserved = 0,
|
.reserved = 0,
|
||||||
.journal_start = 512,
|
.journal_start = 512,
|
||||||
};
|
};
|
||||||
((journal_entry_start*)journal_buffer)->crc32 = je_crc32((journal_entry*)journal_buffer);
|
((journal_entry_start*)submitted_buf)->crc32 = je_crc32((journal_entry*)submitted_buf);
|
||||||
data->iov = (struct iovec){ journal_buffer, 1024 };
|
data->iov = (struct iovec){ submitted_buf, 1024 };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
|
@ -236,11 +239,13 @@ resume_1:
|
||||||
wait_state = 4;
|
wait_state = 4;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
if (!bs->journal.inmemory)
|
||||||
|
free(submitted_buf);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// First block always contains a single JE_START entry
|
// First block always contains a single JE_START entry
|
||||||
je_start = (journal_entry_start*)journal_buffer;
|
je_start = (journal_entry_start*)submitted_buf;
|
||||||
if (je_start->magic != JOURNAL_MAGIC ||
|
if (je_start->magic != JOURNAL_MAGIC ||
|
||||||
je_start->type != JE_START ||
|
je_start->type != JE_START ||
|
||||||
je_start->size != sizeof(journal_entry_start) ||
|
je_start->size != sizeof(journal_entry_start) ||
|
||||||
|
@ -250,12 +255,15 @@ resume_1:
|
||||||
throw std::runtime_error("first entry of the journal is corrupt");
|
throw std::runtime_error("first entry of the journal is corrupt");
|
||||||
}
|
}
|
||||||
next_free = journal_pos = bs->journal.used_start = je_start->journal_start;
|
next_free = journal_pos = bs->journal.used_start = je_start->journal_start;
|
||||||
|
if (!bs->journal.inmemory)
|
||||||
|
free(submitted_buf);
|
||||||
|
submitted_buf = NULL;
|
||||||
crc32_last = 0;
|
crc32_last = 0;
|
||||||
// Read journal
|
// Read journal
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
resume_2:
|
resume_2:
|
||||||
if (submitted)
|
if (submitted_buf)
|
||||||
{
|
{
|
||||||
wait_state = 2;
|
wait_state = 2;
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -265,31 +273,71 @@ resume_1:
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
uint64_t end = bs->journal.len;
|
uint64_t end = bs->journal.len;
|
||||||
if (journal_pos < bs->journal.used_start)
|
if (journal_pos < bs->journal.used_start)
|
||||||
{
|
|
||||||
end = bs->journal.used_start;
|
end = bs->journal.used_start;
|
||||||
}
|
if (!bs->journal.inmemory)
|
||||||
|
submitted_buf = memalign(512, JOURNAL_BUFFER_SIZE);
|
||||||
|
else
|
||||||
|
submitted_buf = bs->journal.buffer + journal_pos;
|
||||||
data->iov = {
|
data->iov = {
|
||||||
journal_buffer + (bs->journal.inmemory ? journal_pos : (done_buf == 1 ? JOURNAL_BUFFER_SIZE : 0)),
|
submitted_buf,
|
||||||
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
|
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
|
||||||
};
|
};
|
||||||
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
||||||
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
submitted = done_buf == 1 ? 2 : 1;
|
|
||||||
}
|
}
|
||||||
if (done_buf && handle_journal_part(journal_buffer + (bs->journal.inmemory
|
while (done.size() > 0)
|
||||||
? done_pos
|
|
||||||
: (done_buf == 1 ? 0 : JOURNAL_BUFFER_SIZE)), done_len) == 0)
|
|
||||||
{
|
{
|
||||||
// journal ended. wait for the next read to complete, then stop
|
handle_res = handle_journal_part(done[0].buf, done[0].pos, done[0].len);
|
||||||
resume_3:
|
if (handle_res == 0)
|
||||||
if (submitted)
|
|
||||||
{
|
{
|
||||||
wait_state = 3;
|
// journal ended
|
||||||
return 1;
|
// zero out corrupted entry, if required
|
||||||
|
if (init_write_buf)
|
||||||
|
{
|
||||||
|
GET_SQE();
|
||||||
|
data->iov = { init_write_buf, 512 };
|
||||||
|
data->callback = simple_callback;
|
||||||
|
wait_count++;
|
||||||
|
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||||
|
bs->ringloop->submit();
|
||||||
|
resume_5:
|
||||||
|
if (wait_count > 0)
|
||||||
|
{
|
||||||
|
wait_state = 5;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// wait for the next read to complete, then stop
|
||||||
|
resume_3:
|
||||||
|
if (submitted_buf)
|
||||||
|
{
|
||||||
|
wait_state = 3;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// free buffers
|
||||||
|
if (!bs->journal.inmemory)
|
||||||
|
for (auto & e: done)
|
||||||
|
free(e.buf);
|
||||||
|
done.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (handle_res == 1)
|
||||||
|
{
|
||||||
|
// OK, remove it
|
||||||
|
if (!bs->journal.inmemory)
|
||||||
|
{
|
||||||
|
free(done[0].buf);
|
||||||
|
}
|
||||||
|
done.erase(done.begin());
|
||||||
|
}
|
||||||
|
else if (handle_res == 2)
|
||||||
|
{
|
||||||
|
// Need to wait for more reads
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!submitted)
|
if (!submitted_buf)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -298,25 +346,30 @@ resume_1:
|
||||||
// Trim journal on start so we don't stall when all entries are older
|
// Trim journal on start so we don't stall when all entries are older
|
||||||
bs->journal.trim();
|
bs->journal.trim();
|
||||||
printf("Journal entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->block_count);
|
printf("Journal entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->block_count);
|
||||||
if (!bs->journal.inmemory)
|
|
||||||
{
|
|
||||||
free(journal_buffer);
|
|
||||||
}
|
|
||||||
bs->journal.crc32_last = crc32_last;
|
bs->journal.crc32_last = crc32_last;
|
||||||
journal_buffer = NULL;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, uint64_t len)
|
||||||
{
|
{
|
||||||
|
uint64_t proc_pos, pos;
|
||||||
|
if (continue_pos != 0)
|
||||||
|
{
|
||||||
|
proc_pos = (continue_pos / 512) * 512;
|
||||||
|
pos = continue_pos % 512;
|
||||||
|
continue_pos = 0;
|
||||||
|
goto resume;
|
||||||
|
}
|
||||||
while (next_free >= done_pos && next_free < done_pos+len)
|
while (next_free >= done_pos && next_free < done_pos+len)
|
||||||
{
|
{
|
||||||
uint64_t proc_pos = next_free, pos = 0;
|
proc_pos = next_free;
|
||||||
|
pos = 0;
|
||||||
next_free += 512;
|
next_free += 512;
|
||||||
if (next_free >= bs->journal.len)
|
if (next_free >= bs->journal.len)
|
||||||
{
|
{
|
||||||
next_free = 512;
|
next_free = 512;
|
||||||
}
|
}
|
||||||
|
resume:
|
||||||
while (pos < 512)
|
while (pos < 512)
|
||||||
{
|
{
|
||||||
journal_entry *je = (journal_entry*)((uint8_t*)buf + proc_pos - done_pos + pos);
|
journal_entry *je = (journal_entry*)((uint8_t*)buf + proc_pos - done_pos + pos);
|
||||||
|
@ -335,12 +388,13 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
started = true;
|
|
||||||
pos += je->size;
|
|
||||||
crc32_last = je->crc32;
|
|
||||||
if (je->type == JE_SMALL_WRITE)
|
if (je->type == JE_SMALL_WRITE)
|
||||||
{
|
{
|
||||||
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
printf("je_small_write oid=%lu:%lu ver=%lu offset=%u len=%u\n", je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version, je->small_write.offset, je->small_write.len);
|
||||||
|
#endif
|
||||||
// oid, version, offset, len
|
// oid, version, offset, len
|
||||||
|
uint64_t prev_free = next_free;
|
||||||
if (next_free + je->small_write.len > bs->journal.len)
|
if (next_free + je->small_write.len > bs->journal.len)
|
||||||
{
|
{
|
||||||
// data continues from the beginning of the journal
|
// data continues from the beginning of the journal
|
||||||
|
@ -358,6 +412,45 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
snprintf(err, 1024, "BUG: calculated journal data offset (%lu) != stored journal data offset (%lu)", location, je->small_write.data_offset);
|
snprintf(err, 1024, "BUG: calculated journal data offset (%lu) != stored journal data offset (%lu)", location, je->small_write.data_offset);
|
||||||
throw std::runtime_error(err);
|
throw std::runtime_error(err);
|
||||||
}
|
}
|
||||||
|
uint32_t data_crc32 = 0;
|
||||||
|
if (location >= done_pos && location+je->small_write.len <= done_pos+len)
|
||||||
|
{
|
||||||
|
// data is within this buffer
|
||||||
|
data_crc32 = crc32c(0, buf + location - done_pos, je->small_write.len);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// this case is even more interesting because we must carry data crc32 check to next buffer(s)
|
||||||
|
uint64_t covered = 0;
|
||||||
|
for (int i = 0; i < done.size(); i++)
|
||||||
|
{
|
||||||
|
if (location+je->small_write.len > done[i].pos &&
|
||||||
|
location < done[i].pos+done[i].len)
|
||||||
|
{
|
||||||
|
uint64_t part_end = (location+je->small_write.len < done[i].pos+done[i].len
|
||||||
|
? location+je->small_write.len : done[i].pos+done[i].len);
|
||||||
|
uint64_t part_begin = (location < done[i].pos ? done[i].pos : location);
|
||||||
|
covered += part_end - part_begin;
|
||||||
|
data_crc32 = crc32c(data_crc32, done[i].buf + part_begin - done[i].pos, part_end - part_begin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (covered < je->small_write.len)
|
||||||
|
{
|
||||||
|
continue_pos = proc_pos+pos;
|
||||||
|
next_free = prev_free;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (data_crc32 != je->small_write.crc32_data)
|
||||||
|
{
|
||||||
|
// journal entry is corrupt, stop here
|
||||||
|
// interesting thing is that we must clear the corrupt entry if we're not readonly
|
||||||
|
memset(buf + proc_pos - done_pos + pos, 0, 512 - pos);
|
||||||
|
bs->journal.next_free = prev_free;
|
||||||
|
init_write_buf = buf + proc_pos - done_pos;
|
||||||
|
init_write_sector = proc_pos;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
auto clean_it = bs->clean_db.find(je->small_write.oid);
|
auto clean_it = bs->clean_db.find(je->small_write.oid);
|
||||||
if (clean_it == bs->clean_db.end() ||
|
if (clean_it == bs->clean_db.end() ||
|
||||||
clean_it->second.version < je->big_write.version)
|
clean_it->second.version < je->big_write.version)
|
||||||
|
@ -366,9 +459,6 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
.oid = je->small_write.oid,
|
.oid = je->small_write.oid,
|
||||||
.version = je->small_write.version,
|
.version = je->small_write.version,
|
||||||
};
|
};
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
|
||||||
printf("je_small_write oid=%lu:%lu ver=%lu offset=%u len=%u\n", ov.oid.inode, ov.oid.stripe, ov.version, je->small_write.offset, je->small_write.len);
|
|
||||||
#endif
|
|
||||||
bs->dirty_db.emplace(ov, (dirty_entry){
|
bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = ST_J_SYNCED,
|
.state = ST_J_SYNCED,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
|
@ -387,6 +477,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
}
|
}
|
||||||
else if (je->type == JE_BIG_WRITE)
|
else if (je->type == JE_BIG_WRITE)
|
||||||
{
|
{
|
||||||
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
printf("je_big_write oid=%lu:%lu ver=%lu\n", je->big_write.oid.inode, je->big_write.oid.stripe, je->big_write.version);
|
||||||
|
#endif
|
||||||
auto clean_it = bs->clean_db.find(je->big_write.oid);
|
auto clean_it = bs->clean_db.find(je->big_write.oid);
|
||||||
if (clean_it == bs->clean_db.end() ||
|
if (clean_it == bs->clean_db.end() ||
|
||||||
clean_it->second.version < je->big_write.version)
|
clean_it->second.version < je->big_write.version)
|
||||||
|
@ -396,9 +489,6 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
.oid = je->big_write.oid,
|
.oid = je->big_write.oid,
|
||||||
.version = je->big_write.version,
|
.version = je->big_write.version,
|
||||||
};
|
};
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
|
||||||
printf("je_big_write oid=%lu:%lu ver=%lu\n", ov.oid.inode, ov.oid.stripe, ov.version);
|
|
||||||
#endif
|
|
||||||
bs->dirty_db.emplace(ov, (dirty_entry){
|
bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = ST_D_META_SYNCED,
|
.state = ST_D_META_SYNCED,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
|
@ -418,6 +508,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
}
|
}
|
||||||
else if (je->type == JE_STABLE)
|
else if (je->type == JE_STABLE)
|
||||||
{
|
{
|
||||||
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
printf("je_stable oid=%lu:%lu ver=%lu\n", je->stable.oid.inode, je->stable.oid.stripe, je->stable.version);
|
||||||
|
#endif
|
||||||
// oid, version
|
// oid, version
|
||||||
obj_ver_id ov = {
|
obj_ver_id ov = {
|
||||||
.oid = je->stable.oid,
|
.oid = je->stable.oid,
|
||||||
|
@ -428,13 +521,10 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
{
|
{
|
||||||
// journal contains a legitimate STABLE entry for a non-existing dirty write
|
// journal contains a legitimate STABLE entry for a non-existing dirty write
|
||||||
// this probably means that journal was trimmed between WRITTEN and STABLE entries
|
// this probably means that journal was trimmed between WRITTEN and STABLE entries
|
||||||
// skip for now. but FIXME: maybe warn about it in the future
|
// skip it
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
|
||||||
printf("je_stable oid=%lu:%lu ver=%lu\n", ov.oid.inode, ov.oid.stripe, ov.version);
|
|
||||||
#endif
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
it->second.state = (it->second.state == ST_D_META_SYNCED
|
it->second.state = (it->second.state == ST_D_META_SYNCED
|
||||||
|
@ -456,6 +546,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
}
|
}
|
||||||
else if (je->type == JE_DELETE)
|
else if (je->type == JE_DELETE)
|
||||||
{
|
{
|
||||||
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
printf("je_delete oid=%lu:%lu ver=%lu\n", je->del.oid.inode, je->del.oid.stripe, je->del.version);
|
||||||
|
#endif
|
||||||
// oid, version
|
// oid, version
|
||||||
obj_ver_id ov = {
|
obj_ver_id ov = {
|
||||||
.oid = je->del.oid,
|
.oid = je->del.oid,
|
||||||
|
@ -471,6 +564,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t len)
|
||||||
});
|
});
|
||||||
bs->journal.used_sectors[proc_pos]++;
|
bs->journal.used_sectors[proc_pos]++;
|
||||||
}
|
}
|
||||||
|
started = true;
|
||||||
|
pos += je->size;
|
||||||
|
crc32_last = je->crc32;
|
||||||
entries_loaded++;
|
entries_loaded++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,23 +18,32 @@ public:
|
||||||
int loop();
|
int loop();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bs_init_journal_done
|
||||||
|
{
|
||||||
|
void *buf;
|
||||||
|
uint64_t pos, len;
|
||||||
|
};
|
||||||
|
|
||||||
class blockstore_init_journal
|
class blockstore_init_journal
|
||||||
{
|
{
|
||||||
blockstore *bs;
|
blockstore *bs;
|
||||||
int wait_state = 0, wait_count = 0;
|
int wait_state = 0, wait_count = 0, handle_res = 0;
|
||||||
uint64_t entries_loaded = 0;
|
uint64_t entries_loaded = 0;
|
||||||
void *journal_buffer = NULL;
|
|
||||||
uint32_t crc32_last = 0;
|
uint32_t crc32_last = 0;
|
||||||
bool started = false;
|
bool started = false;
|
||||||
uint64_t done_pos = 0, journal_pos = 0;
|
|
||||||
uint64_t next_free = 512;
|
uint64_t next_free = 512;
|
||||||
|
std::vector<bs_init_journal_done> done;
|
||||||
|
uint64_t journal_pos = 0;
|
||||||
|
uint64_t continue_pos = 0;
|
||||||
|
void *init_write_buf = NULL;
|
||||||
|
uint64_t init_write_sector = 0;
|
||||||
bool wrapped = false;
|
bool wrapped = false;
|
||||||
int submitted = 0, done_buf = 0, done_len = 0;
|
void *submitted_buf;
|
||||||
struct io_uring_sqe *sqe;
|
struct io_uring_sqe *sqe;
|
||||||
struct ring_data_t *data;
|
struct ring_data_t *data;
|
||||||
journal_entry_start *je_start;
|
journal_entry_start *je_start;
|
||||||
std::function<void(ring_data_t*)> simple_callback;
|
std::function<void(ring_data_t*)> simple_callback;
|
||||||
int handle_journal_part(void *buf, uint64_t len);
|
int handle_journal_part(void *buf, uint64_t done_pos, uint64_t len);
|
||||||
void handle_event(ring_data_t *data);
|
void handle_event(ring_data_t *data);
|
||||||
public:
|
public:
|
||||||
blockstore_init_journal(blockstore* bs);
|
blockstore_init_journal(blockstore* bs);
|
||||||
|
|
|
@ -41,7 +41,6 @@ struct __attribute__((__packed__)) journal_entry_small_write
|
||||||
// data_offset is its offset within journal
|
// data_offset is its offset within journal
|
||||||
uint64_t data_offset;
|
uint64_t data_offset;
|
||||||
uint32_t crc32_data;
|
uint32_t crc32_data;
|
||||||
// FIXME verify data crc32c
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) journal_entry_big_write
|
struct __attribute__((__packed__)) journal_entry_big_write
|
||||||
|
|
Loading…
Reference in New Issue