2019-11-03 01:34:29 +03:00
|
|
|
#pragma once
|
|
|
|
|
2019-11-04 15:46:33 +03:00
|
|
|
#include "crc32c.h"
|
|
|
|
|
2019-11-03 01:34:29 +03:00
|
|
|
#define MIN_JOURNAL_SIZE 4*1024*1024
|
|
|
|
#define JOURNAL_MAGIC 0x4A33
|
2019-11-04 15:46:33 +03:00
|
|
|
#define JOURNAL_BUFFER_SIZE 4*1024*1024
|
2019-11-03 01:34:29 +03:00
|
|
|
|
|
|
|
// Journal entries
|
|
|
|
// Journal entries are linked to each other by their crc32 value
|
|
|
|
// The journal is almost a blockchain, because object versions constantly increase
|
|
|
|
#define JE_START 0x01
|
|
|
|
#define JE_SMALL_WRITE 0x02
|
|
|
|
#define JE_BIG_WRITE 0x03
|
|
|
|
#define JE_STABLE 0x04
|
|
|
|
#define JE_DELETE 0x05
|
|
|
|
|
2019-11-04 01:42:40 +03:00
|
|
|
// crc32c comes first to ease calculation and is equal to crc32()
|
2019-11-03 01:34:29 +03:00
|
|
|
struct __attribute__((__packed__)) journal_entry_start
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32_replaced;
|
|
|
|
uint64_t journal_start;
|
2019-11-03 01:34:29 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct __attribute__((__packed__)) journal_entry_small_write
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
|
|
|
uint32_t crc32_prev;
|
|
|
|
object_id oid;
|
|
|
|
uint64_t version;
|
|
|
|
uint32_t offset;
|
|
|
|
uint32_t len;
|
|
|
|
// small_write entries contain <len> bytes of data, but data is stored in the next journal sector
|
|
|
|
};
|
|
|
|
|
|
|
|
struct __attribute__((__packed__)) journal_entry_big_write
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
|
|
|
uint32_t crc32_prev;
|
|
|
|
object_id oid;
|
|
|
|
uint64_t version;
|
2019-11-12 20:55:17 +03:00
|
|
|
uint64_t location;
|
2019-11-03 01:34:29 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct __attribute__((__packed__)) journal_entry_stable
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
|
|
|
uint32_t crc32_prev;
|
|
|
|
object_id oid;
|
|
|
|
uint64_t version;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct __attribute__((__packed__)) journal_entry_del
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
|
|
|
uint32_t crc32_prev;
|
|
|
|
object_id oid;
|
|
|
|
uint64_t version;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct __attribute__((__packed__)) journal_entry
|
|
|
|
{
|
|
|
|
union
|
|
|
|
{
|
|
|
|
struct __attribute__((__packed__))
|
|
|
|
{
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32;
|
2019-11-03 01:34:29 +03:00
|
|
|
uint16_t magic;
|
|
|
|
uint16_t type;
|
|
|
|
uint32_t size;
|
2019-11-04 01:42:40 +03:00
|
|
|
uint32_t crc32_prev;
|
2019-11-03 01:34:29 +03:00
|
|
|
};
|
|
|
|
journal_entry_start start;
|
|
|
|
journal_entry_small_write small_write;
|
|
|
|
journal_entry_big_write big_write;
|
|
|
|
journal_entry_stable stable;
|
|
|
|
journal_entry_del del;
|
|
|
|
};
|
|
|
|
};
|
2019-11-04 15:46:33 +03:00
|
|
|
|
|
|
|
inline uint32_t je_crc32(journal_entry *je)
|
|
|
|
{
|
|
|
|
return crc32c_zero4(((uint8_t*)je)+4, je->size-4);
|
|
|
|
}
|
2019-11-07 16:58:30 +03:00
|
|
|
|
|
|
|
struct journal_sector_info_t
|
|
|
|
{
|
|
|
|
uint64_t offset;
|
|
|
|
uint64_t usage_count;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct journal_t
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
uint64_t device_size;
|
|
|
|
|
|
|
|
uint64_t offset, len;
|
|
|
|
uint64_t next_free = 512;
|
|
|
|
uint64_t used_start = 512;
|
|
|
|
uint32_t crc32_last = 0;
|
|
|
|
|
|
|
|
// Current sector(s) used for writing
|
|
|
|
uint8_t *sector_buf;
|
|
|
|
journal_sector_info_t *sector_info;
|
|
|
|
uint64_t sector_count;
|
2019-11-11 00:28:14 +03:00
|
|
|
int cur_sector = 0;
|
|
|
|
int in_sector_pos = 0;
|
2019-11-07 16:58:30 +03:00
|
|
|
};
|
2019-11-11 00:28:14 +03:00
|
|
|
|
|
|
|
struct blockstore_journal_check_t
|
|
|
|
{
|
|
|
|
blockstore *bs;
|
|
|
|
uint64_t next_pos, next_sector, next_in_pos;
|
|
|
|
int sectors_required;
|
|
|
|
|
|
|
|
blockstore_journal_check_t(blockstore *bs);
|
|
|
|
int check_available(blockstore_operation *op, int required, int size, int data_after);
|
|
|
|
};
|
|
|
|
|
|
|
|
inline journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size)
|
|
|
|
{
|
|
|
|
if (512 - journal.in_sector_pos < size)
|
|
|
|
{
|
|
|
|
// Move to the next journal sector
|
|
|
|
// Also select next sector buffer in memory
|
|
|
|
journal.cur_sector = ((journal.cur_sector + 1) % journal.sector_count);
|
|
|
|
journal.sector_info[journal.cur_sector].offset = journal.next_free;
|
|
|
|
journal.in_sector_pos = 0;
|
|
|
|
journal.next_free = (journal.next_free+512) < journal.len ? journal.next_free + 512 : 512;
|
|
|
|
memset(journal.sector_buf + 512*journal.cur_sector, 0, 512);
|
|
|
|
}
|
|
|
|
journal_entry *je = (struct journal_entry*)(
|
|
|
|
journal.sector_buf + 512*journal.cur_sector + journal.in_sector_pos
|
|
|
|
);
|
|
|
|
journal.in_sector_pos += size;
|
|
|
|
je->magic = JOURNAL_MAGIC;
|
|
|
|
je->type = type;
|
|
|
|
je->size = size;
|
|
|
|
je->crc32_prev = journal.crc32_last;
|
|
|
|
return je;
|
|
|
|
}
|
2019-11-11 18:24:04 +03:00
|
|
|
|
|
|
|
// FIXME: make inline
|
2019-11-13 21:17:04 +03:00
|
|
|
void prepare_journal_sector_write(journal_t & journal, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb);
|