Compare commits
3 Commits
b760951aa7
...
1d94afbd51
Author | SHA1 | Date |
---|---|---|
|
1d94afbd51 | |
|
3634f005f1 | |
|
263a3b5ad6 |
docs/usage
src
|
@ -14,6 +14,7 @@ It supports the following commands:
|
|||
- [upgrade-simple](#upgrade-simple)
|
||||
- [resize](#resize)
|
||||
- [raw-resize](#raw-resize)
|
||||
- [trim](#trim)
|
||||
- [start/stop/restart/enable/disable](#start/stop/restart/enable/disable)
|
||||
- [purge](#purge)
|
||||
- [read-sb](#read-sb)
|
||||
|
@ -97,6 +98,9 @@ Options (both modes):
|
|||
--data_device_block 4k Override data device block size
|
||||
--meta_device_block 4k Override metadata device block size
|
||||
--journal_device_block 4k Override journal device block size
|
||||
--discard_on_start 0 TRIM unused data device blocks every OSD start (default off)
|
||||
--min_discard_size 1M Minimum TRIM block size
|
||||
--json Enable JSON output
|
||||
```
|
||||
|
||||
[immediate_commit](../config/layout-cluster.en.md#immediate_commit) setting is
|
||||
|
@ -179,6 +183,19 @@ parameters from OSD command line (i.e. from systemd unit or superblock).
|
|||
SIZE may include k/m/g/t suffixes. If any of the new layout parameter
|
||||
options are not specified, old values will be used.
|
||||
|
||||
## trim
|
||||
|
||||
`vitastor-disk trim <osd_num>|<osd_device> [<osd_num>|<osd_device>...]`
|
||||
|
||||
Try to discard unused blocks (SSD TRIM) on the data device of each of the OSD(s).
|
||||
|
||||
May only be used on stopped OSDs. Options:
|
||||
|
||||
```
|
||||
--min_discard_size 1M Minimum TRIM block size
|
||||
--discard_granularity 0 Override device's discard granularity
|
||||
```
|
||||
|
||||
## start/stop/restart/enable/disable
|
||||
|
||||
`vitastor-disk start|stop|restart|enable|disable [--now] <device> [device2 device3 ...]`
|
||||
|
|
|
@ -99,6 +99,9 @@ vitastor-disk - инструмент командной строки для уп
|
|||
--data_device_block 4k Задать размер блока устройства данных
|
||||
--meta_device_block 4k Задать размер блока метаданных
|
||||
--journal_device_block 4k Задать размер блока журнала
|
||||
--discard_on_start 0 Выполнять TRIM пустых блоков данных при запуске OSD (по умолчанию нет)
|
||||
--min_discard_size 1M Минимальный размер блока для TRIM
|
||||
--json Включить JSON-вывод
|
||||
```
|
||||
|
||||
Настройка [immediate_commit](../config/layout-cluster.ru.md#immediate_commit)
|
||||
|
@ -182,6 +185,20 @@ throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
|
|||
`РАЗМЕР` может быть указан с суффиксами k/m/g/t. Если любой из новых параметров
|
||||
расположения не указан, он принимается равным старому значению.
|
||||
|
||||
## trim
|
||||
|
||||
`vitastor-disk trim <osd_num>|<osd_device> [<osd_num>|<osd_device>...]`
|
||||
|
||||
Попробовать пометить пустые блоки дисков данных всех указанных OSD неиспользуемыми
|
||||
(выполнить команду SSD TRIM).
|
||||
|
||||
Можно использовать только с остановленными OSD. Опции:
|
||||
|
||||
```
|
||||
--min_discard_size 1M Минимальный размер блока для TRIM
|
||||
--discard_granularity 0 Кратность размера блока для TRIM
|
||||
```
|
||||
|
||||
## start/stop/restart/enable/disable
|
||||
|
||||
`vitastor-disk start|stop|restart|enable|disable [--now] <device> [device2 device3 ...]`
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "blockstore_impl.h"
|
||||
#include "blockstore_disk.h"
|
||||
#include "str_util.h"
|
||||
#include "allocator.h"
|
||||
|
||||
static uint32_t is_power_of_two(uint64_t value)
|
||||
{
|
||||
|
@ -83,6 +84,12 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
|
|||
throw std::runtime_error("data_csum_type="+config["data_csum_type"]+" is unsupported, only \"crc32c\" and \"none\" are supported");
|
||||
}
|
||||
csum_block_size = parse_size(config["csum_block_size"]);
|
||||
discard_on_start = config.find("discard_on_start") != config.end() &&
|
||||
(config["discard_on_start"] == "true" || config["discard_on_start"] == "1" || config["discard_on_start"] == "yes");
|
||||
min_discard_size = parse_size(config["min_discard_size"]);
|
||||
if (!min_discard_size)
|
||||
min_discard_size = 1024*1024;
|
||||
discard_granularity = parse_size(config["discard_granularity"]);
|
||||
// Validate
|
||||
if (!data_block_size)
|
||||
{
|
||||
|
@ -172,10 +179,6 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
|
|||
{
|
||||
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
||||
}
|
||||
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
|
||||
clean_dyn_size = clean_entry_bitmap_size*2 + (csum_block_size
|
||||
? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
|
||||
clean_entry_size = sizeof(clean_disk_entry) + clean_dyn_size + 4 /*entry_csum*/;
|
||||
}
|
||||
|
||||
void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
||||
|
@ -224,9 +227,13 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
|||
}
|
||||
// required metadata size
|
||||
block_count = data_len / data_block_size;
|
||||
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
|
||||
clean_dyn_size = clean_entry_bitmap_size*2 + (csum_block_size
|
||||
? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
|
||||
clean_entry_size = sizeof(clean_disk_entry) + clean_dyn_size + 4 /*entry_csum*/;
|
||||
meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
|
||||
if (meta_format == BLOCKSTORE_META_FORMAT_V1 ||
|
||||
!meta_format && !skip_meta_check && meta_area_size < meta_len && !data_csum_type)
|
||||
bool new_doesnt_fit = (!meta_format && !skip_meta_check && meta_area_size < meta_len && !data_csum_type);
|
||||
if (meta_format == BLOCKSTORE_META_FORMAT_V1 || new_doesnt_fit)
|
||||
{
|
||||
uint64_t clean_entry_v0_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
||||
uint64_t meta_v0_len = (1 + (block_count - 1 + meta_block_size / clean_entry_v0_size)
|
||||
|
@ -234,7 +241,11 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
|||
if (meta_format == BLOCKSTORE_META_FORMAT_V1 || meta_area_size >= meta_v0_len)
|
||||
{
|
||||
// Old metadata fits.
|
||||
printf("Warning: Using old metadata format without checksums because the new format doesn't fit into provided area\n");
|
||||
if (new_doesnt_fit)
|
||||
{
|
||||
printf("Warning: Using old metadata format without checksums because the new format"
|
||||
" doesn't fit into provided area (%lu bytes required, %lu bytes available)\n", meta_len, meta_area_size);
|
||||
}
|
||||
clean_entry_size = clean_entry_v0_size;
|
||||
meta_len = meta_v0_len;
|
||||
meta_format = BLOCKSTORE_META_FORMAT_V1;
|
||||
|
@ -246,7 +257,7 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
|||
meta_format = BLOCKSTORE_META_FORMAT_V2;
|
||||
if (!skip_meta_check && meta_area_size < meta_len)
|
||||
{
|
||||
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
|
||||
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes, have only "+std::to_string(meta_area_size)+" bytes");
|
||||
}
|
||||
// requested journal size
|
||||
if (!skip_meta_check && cfg_journal_size > journal_len)
|
||||
|
@ -415,3 +426,44 @@ void blockstore_disk_t::close_all()
|
|||
close(journal_fd);
|
||||
data_fd = meta_fd = journal_fd = -1;
|
||||
}
|
||||
|
||||
// Sadly DISCARD only works through ioctl(), but it seems to always block the device queue,
|
||||
// so it's not a big deal that we can only run it synchronously.
|
||||
int blockstore_disk_t::trim_data(allocator_t *alloc)
|
||||
{
|
||||
int r = 0;
|
||||
uint64_t j = 0, i = 0;
|
||||
uint64_t discarded = 0;
|
||||
for (; i <= block_count; i++)
|
||||
{
|
||||
if (i >= block_count || alloc->get(i))
|
||||
{
|
||||
if (i > j && (i-j)*data_block_size >= min_discard_size)
|
||||
{
|
||||
uint64_t range[2] = { data_offset + j*data_block_size, (i-j)*data_block_size };
|
||||
if (discard_granularity)
|
||||
{
|
||||
range[1] += range[0];
|
||||
if (range[1] % discard_granularity)
|
||||
range[1] = range[1] - (range[1] % discard_granularity);
|
||||
if (range[0] % discard_granularity)
|
||||
range[0] = range[0] + discard_granularity - (range[0] % discard_granularity);
|
||||
if (range[0] >= range[1])
|
||||
continue;
|
||||
range[1] -= range[0];
|
||||
}
|
||||
r = ioctl(data_fd, BLKDISCARD, &range);
|
||||
if (r != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to execute BLKDISCARD %ju+%ju on %s: %s (code %d)\n",
|
||||
range[0], range[1], data_device.c_str(), strerror(-r), r);
|
||||
return -errno;
|
||||
}
|
||||
discarded += range[1];
|
||||
}
|
||||
j = i+1;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "%s (%ju bytes) of unused data discarded on %s\n", format_size(discarded).c_str(), discarded, data_device.c_str());
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
// Lower byte of checksum type is its length
|
||||
#define BLOCKSTORE_CSUM_CRC32C 0x104
|
||||
|
||||
class allocator_t;
|
||||
|
||||
struct blockstore_disk_t
|
||||
{
|
||||
std::string data_device, meta_device, journal_device;
|
||||
|
@ -34,14 +36,18 @@ struct blockstore_disk_t
|
|||
// I/O modes for data, metadata and journal: direct or "" = O_DIRECT, cached = O_SYNC, directsync = O_DIRECT|O_SYNC
|
||||
// O_SYNC without O_DIRECT = use Linux page cache for reads and writes
|
||||
std::string data_io, meta_io, journal_io;
|
||||
// Data discard granularity and minimum size (for the sake of performance)
|
||||
bool discard_on_start = false;
|
||||
uint64_t min_discard_size = 1024*1024;
|
||||
uint64_t discard_granularity = 0;
|
||||
|
||||
int meta_fd = -1, data_fd = -1, journal_fd = -1;
|
||||
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len, meta_format = 0;
|
||||
uint64_t data_offset, data_device_sect, data_device_size, data_len;
|
||||
uint64_t journal_offset, journal_device_sect, journal_device_size, journal_len;
|
||||
|
||||
uint32_t block_order;
|
||||
uint64_t block_count;
|
||||
uint32_t block_order = 0;
|
||||
uint64_t block_count = 0;
|
||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, clean_dyn_size = 0;
|
||||
|
||||
void parse_config(std::map<std::string, std::string> & config);
|
||||
|
@ -50,6 +56,7 @@ struct blockstore_disk_t
|
|||
void open_journal();
|
||||
void calc_lengths(bool skip_meta_check = false);
|
||||
void close_all();
|
||||
int trim_data(allocator_t *alloc);
|
||||
|
||||
inline uint64_t dirty_dyn_size(uint64_t offset, uint64_t len)
|
||||
{
|
||||
|
|
|
@ -20,7 +20,7 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
|
|||
dsk.open_meta();
|
||||
dsk.open_journal();
|
||||
calc_lengths();
|
||||
data_alloc = new allocator(dsk.block_count);
|
||||
data_alloc = new allocator_t(dsk.block_count);
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
|
@ -83,14 +83,20 @@ void blockstore_impl_t::loop()
|
|||
{
|
||||
delete journal_init_reader;
|
||||
journal_init_reader = NULL;
|
||||
if (journal.flush_journal)
|
||||
initialized = 3;
|
||||
else
|
||||
initialized = 10;
|
||||
initialized = 3;
|
||||
ringloop->wakeup();
|
||||
}
|
||||
}
|
||||
if (initialized == 3)
|
||||
{
|
||||
if (!readonly && dsk.discard_on_start)
|
||||
dsk.trim_data(data_alloc);
|
||||
if (journal.flush_journal)
|
||||
initialized = 4;
|
||||
else
|
||||
initialized = 10;
|
||||
}
|
||||
if (initialized == 4)
|
||||
{
|
||||
if (readonly)
|
||||
{
|
||||
|
|
|
@ -279,7 +279,7 @@ class blockstore_impl_t
|
|||
std::vector<obj_ver_id> unsynced_big_writes, unsynced_small_writes;
|
||||
int unsynced_big_write_count = 0, unstable_unsynced = 0;
|
||||
int unsynced_queued_ops = 0;
|
||||
allocator *data_alloc = NULL;
|
||||
allocator_t *data_alloc = NULL;
|
||||
uint64_t used_blocks = 0;
|
||||
uint8_t *zero_object;
|
||||
|
||||
|
|
|
@ -138,7 +138,11 @@ resume_1:
|
|||
exit(1);
|
||||
}
|
||||
hdr->header_csum = csum;
|
||||
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V2;
|
||||
if (bs->dsk.meta_format != BLOCKSTORE_META_FORMAT_V2)
|
||||
{
|
||||
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V2;
|
||||
bs->dsk.calc_lengths();
|
||||
}
|
||||
}
|
||||
else if (hdr->version == BLOCKSTORE_META_FORMAT_V1)
|
||||
{
|
||||
|
@ -146,11 +150,15 @@ resume_1:
|
|||
hdr->csum_block_size = 0;
|
||||
hdr->header_csum = 0;
|
||||
// Enable compatibility mode - entries without checksums
|
||||
bs->dsk.clean_entry_size = sizeof(clean_disk_entry) + bs->dsk.clean_entry_bitmap_size*2;
|
||||
bs->dsk.meta_len = (1 + (bs->dsk.block_count - 1 + bs->dsk.meta_block_size / bs->dsk.clean_entry_size)
|
||||
/ (bs->dsk.meta_block_size / bs->dsk.clean_entry_size)) * bs->dsk.meta_block_size;
|
||||
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V1;
|
||||
printf("Warning: Starting with metadata in the old format without checksums, as stored on disk\n");
|
||||
if (bs->dsk.meta_format != BLOCKSTORE_META_FORMAT_V1 ||
|
||||
bs->dsk.data_csum_type != 0 || bs->dsk.csum_block_size != 0)
|
||||
{
|
||||
bs->dsk.data_csum_type = 0;
|
||||
bs->dsk.csum_block_size = 0;
|
||||
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V1;
|
||||
bs->dsk.calc_lengths();
|
||||
printf("Warning: Starting with metadata in the old format without checksums, as stored on disk\n");
|
||||
}
|
||||
}
|
||||
else if (hdr->version > BLOCKSTORE_META_FORMAT_V2)
|
||||
{
|
||||
|
@ -338,7 +346,7 @@ bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_
|
|||
uint32_t *entry_csum = (uint32_t*)((uint8_t*)entry + bs->dsk.clean_entry_size - 4);
|
||||
if (*entry_csum != crc32c(0, entry, bs->dsk.clean_entry_size - 4))
|
||||
{
|
||||
printf("Metadata entry %ju is corrupt (checksum mismatch), skipping\n", done_cnt+i);
|
||||
printf("Metadata entry %ju is corrupt (checksum mismatch: %08x vs %08x), skipping\n", done_cnt+i, *entry_csum, crc32c(0, entry, bs->dsk.clean_entry_size - 4));
|
||||
// zero out the invalid entry, otherwise we'll hit "tried to overwrite non-zero metadata entry" later
|
||||
if (bs->inmemory_meta)
|
||||
{
|
||||
|
|
|
@ -5,7 +5,7 @@ project(vitastor)
|
|||
# vitastor-disk
|
||||
add_executable(vitastor-disk
|
||||
disk_tool.cpp disk_simple_offsets.cpp
|
||||
disk_tool_journal.cpp disk_tool_meta.cpp disk_tool_prepare.cpp disk_tool_resize.cpp
|
||||
disk_tool_discard.cpp disk_tool_journal.cpp disk_tool_meta.cpp disk_tool_prepare.cpp disk_tool_resize.cpp
|
||||
disk_tool_resize_auto.cpp disk_tool_udev.cpp disk_tool_utils.cpp disk_tool_upgrade.cpp
|
||||
../util/crc32c.c ../util/str_util.cpp ../util/json_util.cpp ../../json11/json11.cpp ../util/rw_blocking.cpp ../util/allocator.cpp ../util/ringloop.cpp ../blockstore/blockstore_disk.cpp
|
||||
)
|
||||
|
|
|
@ -68,6 +68,8 @@ static const char *help_text =
|
|||
" --data_device_block 4k Override data device block size\n"
|
||||
" --meta_device_block 4k Override metadata device block size\n"
|
||||
" --journal_device_block 4k Override journal device block size\n"
|
||||
" --discard_on_start 0 TRIM unused data device blocks every OSD start (default off)\n"
|
||||
" --min_discard_size 1M Minimum TRIM block size\n"
|
||||
" --json Enable JSON output\n"
|
||||
" \n"
|
||||
" immediate_commit setting is automatically derived from \"disable fsync\" options.\n"
|
||||
|
@ -128,6 +130,12 @@ static const char *help_text =
|
|||
" SIZE may include k/m/g/t suffixes. If any of the new layout parameter\n"
|
||||
" options are not specified, old values will be used.\n"
|
||||
"\n"
|
||||
"vitastor-disk trim <osd_num>|<osd_device> [<osd_num>|<osd_device>...]\n"
|
||||
" Try to discard unused blocks (SSD TRIM) on the data device of each of the OSD(s).\n"
|
||||
" May only be used on stopped OSDs. Options:\n"
|
||||
" --min_discard_size 1M Minimum TRIM block size\n"
|
||||
" --discard_granularity 0 Override device's discard granularity\n"
|
||||
"\n"
|
||||
"vitastor-disk start|stop|restart|enable|disable [--now] <device> [device2 device3 ...]\n"
|
||||
" Manipulate Vitastor OSDs using systemd by their device paths.\n"
|
||||
" Commands are passed to systemctl with vitastor-osd@<num> units as arguments.\n"
|
||||
|
@ -428,6 +436,19 @@ int main(int argc, char *argv[])
|
|||
disk_tool_simple_offsets(self.options, self.json);
|
||||
return 0;
|
||||
}
|
||||
else if (!strcmp(cmd[0], "trim"))
|
||||
{
|
||||
if (cmd.size() < 2)
|
||||
{
|
||||
fprintf(stderr, "OSD number(s) or device path(s) are required\n");
|
||||
return 1;
|
||||
}
|
||||
for (int i = 1; i < cmd.size(); i++)
|
||||
{
|
||||
self.trim_data(cmd[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else if (!strcmp(cmd[0], "udev"))
|
||||
{
|
||||
if (cmd.size() != 2)
|
||||
|
|
|
@ -60,7 +60,7 @@ struct disk_tool_t
|
|||
|
||||
bool first_block, first_entry;
|
||||
|
||||
allocator *data_alloc;
|
||||
allocator_t *data_alloc;
|
||||
std::map<uint64_t, uint64_t> data_remap;
|
||||
std::map<uint64_t, uint64_t>::iterator remap_it;
|
||||
ring_loop_t *ringloop;
|
||||
|
@ -86,10 +86,10 @@ struct disk_tool_t
|
|||
|
||||
int dump_journal();
|
||||
void dump_journal_entry(int num, journal_entry *je, bool json);
|
||||
int process_journal(std::function<int(void*)> block_fn);
|
||||
int process_journal(std::function<int(void*)> block_fn, bool do_open = true);
|
||||
int process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn);
|
||||
int process_meta(std::function<void(blockstore_meta_header_v2_t *)> hdr_fn,
|
||||
std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn);
|
||||
std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn, bool do_open = true);
|
||||
|
||||
int dump_meta();
|
||||
void dump_meta_header(blockstore_meta_header_v2_t *hdr);
|
||||
|
@ -123,6 +123,7 @@ struct disk_tool_t
|
|||
int pre_exec_osd(std::string device);
|
||||
int purge_devices(const std::vector<std::string> & devices);
|
||||
int clear_osd_superblock(const std::string & dev);
|
||||
int trim_data(std::string device);
|
||||
|
||||
json11::Json read_osd_superblock(std::string device, bool expect_exist = true, bool ignore_nonref = false);
|
||||
uint32_t write_osd_superblock(std::string device, json11::Json params);
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "disk_tool.h"
|
||||
#include "str_util.h"
|
||||
#include "json_util.h"
|
||||
|
||||
int disk_tool_t::trim_data(std::string device)
|
||||
{
|
||||
int r;
|
||||
// Parse parameters
|
||||
if (stoull_full(device))
|
||||
device = "/dev/vitastor/osd"+device+"-data";
|
||||
json11::Json sb = read_osd_superblock(device, true, false);
|
||||
if (sb.is_null())
|
||||
return 1;
|
||||
auto sb_params = json_to_string_map(sb["params"].object_items());
|
||||
if (options["discard_granularity"] != "")
|
||||
sb_params["discard_granularity"] = options["discard_granularity"];
|
||||
if (options["min_discard_size"] != "")
|
||||
sb_params["min_discard_size"] = options["min_discard_size"];
|
||||
try
|
||||
{
|
||||
dsk.parse_config(sb_params);
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
fprintf(stderr, "Error: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
if (!dsk.discard_granularity && sb["real_data_device"].string_value().substr(0, 5) == "/dev/")
|
||||
{
|
||||
auto dg = read_file("/sys/block/"+sb["real_data_device"].string_value().substr(5)+"/queue/discard_granularity", true);
|
||||
if (dg != "")
|
||||
dsk.discard_granularity = parse_size(trim(dg));
|
||||
}
|
||||
// Open devices
|
||||
try
|
||||
{
|
||||
if (options["io"] != "")
|
||||
dsk.data_io = dsk.meta_io = dsk.journal_io = options["io"];
|
||||
dsk.open_data();
|
||||
dsk.open_meta();
|
||||
dsk.open_journal();
|
||||
dsk.calc_lengths();
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
dsk.close_all();
|
||||
fprintf(stderr, "Error: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
// Fill allocator
|
||||
fprintf(stderr, "Reading metadata\n");
|
||||
data_alloc = new allocator_t(dsk.block_count);
|
||||
r = process_meta(
|
||||
[this](blockstore_meta_header_v2_t *hdr) {},
|
||||
[this](uint64_t block_num, clean_disk_entry *entry, uint8_t *bitmap)
|
||||
{
|
||||
data_alloc->set(block_num, true);
|
||||
},
|
||||
false
|
||||
);
|
||||
if (r != 0)
|
||||
{
|
||||
dsk.close_all();
|
||||
return r;
|
||||
}
|
||||
fprintf(stderr, "Reading journal\n");
|
||||
r = process_journal([this](void *buf)
|
||||
{
|
||||
return process_journal_block(buf, [this](int num, journal_entry *je)
|
||||
{
|
||||
if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT)
|
||||
{
|
||||
data_alloc->set(je->big_write.location / dsk.data_block_size, true);
|
||||
}
|
||||
});
|
||||
}, false);
|
||||
if (r != 0)
|
||||
{
|
||||
dsk.close_all();
|
||||
return r;
|
||||
}
|
||||
// Trim
|
||||
r = dsk.trim_data(data_alloc);
|
||||
dsk.close_all();
|
||||
return r == 0;
|
||||
}
|
|
@ -119,13 +119,21 @@ int disk_tool_t::dump_journal()
|
|||
return 0;
|
||||
}
|
||||
|
||||
int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
||||
int disk_tool_t::process_journal(std::function<int(void*)> block_fn, bool do_open)
|
||||
{
|
||||
dsk.journal_fd = open(dsk.journal_device.c_str(), (options["io"] == "cached" ? 0 : O_DIRECT) | O_RDONLY);
|
||||
if (dsk.journal_fd < 0)
|
||||
if (do_open)
|
||||
{
|
||||
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
|
||||
return 1;
|
||||
if (dsk.journal_fd >= 0)
|
||||
{
|
||||
fprintf(stderr, "Bug: journal device is already opened\n");
|
||||
return 1;
|
||||
}
|
||||
dsk.journal_fd = open(dsk.journal_device.c_str(), (options["io"] == "cached" ? 0 : O_DIRECT) | O_RDONLY);
|
||||
if (dsk.journal_fd < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
void *data = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
|
||||
journal_pos = 0;
|
||||
|
@ -170,8 +178,11 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
|||
break;
|
||||
}
|
||||
}
|
||||
close(dsk.journal_fd);
|
||||
dsk.journal_fd = -1;
|
||||
if (do_open)
|
||||
{
|
||||
close(dsk.journal_fd);
|
||||
dsk.journal_fd = -1;
|
||||
}
|
||||
free(data);
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -7,18 +7,26 @@
|
|||
#include "json_util.h"
|
||||
|
||||
int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)> hdr_fn,
|
||||
std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn)
|
||||
std::function<void(uint64_t, clean_disk_entry*, uint8_t*)> record_fn, bool do_open)
|
||||
{
|
||||
if (dsk.meta_block_size % DIRECT_IO_ALIGNMENT)
|
||||
{
|
||||
fprintf(stderr, "Invalid metadata block size: is not a multiple of %d\n", DIRECT_IO_ALIGNMENT);
|
||||
return 1;
|
||||
}
|
||||
dsk.meta_fd = open(dsk.meta_device.c_str(), (options["io"] == "cached" ? 0 : O_DIRECT) | O_RDONLY);
|
||||
if (dsk.meta_fd < 0)
|
||||
if (do_open)
|
||||
{
|
||||
fprintf(stderr, "Failed to open metadata device %s: %s\n", dsk.meta_device.c_str(), strerror(errno));
|
||||
return 1;
|
||||
if (dsk.meta_fd >= 0)
|
||||
{
|
||||
fprintf(stderr, "Bug: Metadata device is already opened\n");
|
||||
return 1;
|
||||
}
|
||||
dsk.meta_fd = open(dsk.meta_device.c_str(), (options["io"] == "cached" ? 0 : O_DIRECT) | O_RDONLY);
|
||||
if (dsk.meta_fd < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open metadata device %s: %s\n", dsk.meta_device.c_str(), strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
int buf_size = 1024*1024;
|
||||
if (buf_size % dsk.meta_block_size)
|
||||
|
@ -47,8 +55,11 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
|
|||
{
|
||||
fprintf(stderr, "I don't know checksum format %u, the only supported format is crc32c = %u.\n", hdr->data_csum_type, BLOCKSTORE_CSUM_CRC32C);
|
||||
free(data);
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
if (do_open)
|
||||
{
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -57,8 +68,11 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
|
|||
// Unsupported version
|
||||
fprintf(stderr, "Metadata format is too new for me (stored version is %ju, max supported %u).\n", hdr->version, BLOCKSTORE_META_FORMAT_V2);
|
||||
free(data);
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
if (do_open)
|
||||
{
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (hdr->meta_block_size != dsk.meta_block_size)
|
||||
|
@ -145,8 +159,11 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v2_t *)>
|
|||
}
|
||||
}
|
||||
free(data);
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
if (do_open)
|
||||
{
|
||||
close(dsk.meta_fd);
|
||||
dsk.meta_fd = -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,9 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
|||
"throttle_target_mbs",
|
||||
"throttle_target_parallelism",
|
||||
"throttle_threshold_us",
|
||||
"discard_on_start",
|
||||
"min_discard_size",
|
||||
"discard_granularity",
|
||||
};
|
||||
if (options.find("force") == options.end())
|
||||
{
|
||||
|
|
|
@ -25,9 +25,9 @@ int disk_tool_t::raw_resize()
|
|||
r = resize_parse_params();
|
||||
if (r != 0)
|
||||
return r;
|
||||
// Check parameters and fill allocator
|
||||
// Fill allocator
|
||||
fprintf(stderr, "Reading metadata\n");
|
||||
data_alloc = new allocator((new_data_len < dsk.data_len ? dsk.data_len : new_data_len) / dsk.data_block_size);
|
||||
data_alloc = new allocator_t((new_data_len < dsk.data_len ? dsk.data_len : new_data_len) / dsk.data_block_size);
|
||||
r = process_meta(
|
||||
[this](blockstore_meta_header_v2_t *hdr)
|
||||
{
|
||||
|
|
|
@ -79,28 +79,59 @@ int disk_tool_t::upgrade_simple_unit(std::string unit)
|
|||
{
|
||||
// Resize data
|
||||
uint64_t blk = stoull_full(options["block_size"]);
|
||||
blk = blk ? blk : 128*1024;
|
||||
blk = blk ? blk : (1 << DEFAULT_DATA_BLOCK_ORDER);
|
||||
std::map<std::string, uint64_t> resize;
|
||||
if (d_o < 4096 || m_is_d && m_o < 4096 && m_o < d_o || j_is_d && j_o < 4096 && j_o < d_o)
|
||||
{
|
||||
resize["new_data_offset"] = d_o+blk;
|
||||
d_o += blk;
|
||||
if (m_is_d && m_o < d_o)
|
||||
resize["new_meta_offset"] = m_o+blk;
|
||||
m_o += blk;
|
||||
if (j_is_d && j_o < d_o)
|
||||
resize["new_journal_offset"] = j_o+blk;
|
||||
j_o += blk;
|
||||
}
|
||||
if (!m_is_d && m_o < 4096)
|
||||
{
|
||||
resize["new_meta_offset"] = m_o+4096;
|
||||
m_o += 4096;
|
||||
if (j_is_m && m_o < j_o)
|
||||
resize["new_journal_offset"] = j_o+4096;
|
||||
j_o += 4096;
|
||||
}
|
||||
if (!j_is_d && !j_is_m && j_o < 4096)
|
||||
resize["new_journal_offset"] = j_o+4096;
|
||||
j_o += 4096;
|
||||
if (options["meta_format"] == "" || options["meta_format"] == "1")
|
||||
{
|
||||
blockstore_disk_t dsk;
|
||||
options["meta_format"] = std::to_string(BLOCKSTORE_META_FORMAT_V2);
|
||||
try
|
||||
{
|
||||
dsk.parse_config(options);
|
||||
dsk.open_data();
|
||||
dsk.open_meta();
|
||||
dsk.open_journal();
|
||||
dsk.calc_lengths(true);
|
||||
dsk.close_all();
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
dsk.close_all();
|
||||
fprintf(stderr, "Error: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
options.erase("meta_format");
|
||||
if (m_is_d && m_o < d_o && d_o-m_o < dsk.meta_len)
|
||||
d_o += ((dsk.meta_len - (d_o-m_o) + blk-1) / blk) * blk;
|
||||
}
|
||||
resize["new_data_offset"] = d_o;
|
||||
resize["new_meta_offset"] = m_o;
|
||||
resize["new_journal_offset"] = j_o;
|
||||
disk_tool_t resizer;
|
||||
resizer.options = options;
|
||||
for (auto & kv: resize)
|
||||
resizer.options[kv.first] = std::to_string(kv.second);
|
||||
std::string cmd;
|
||||
for (auto & kv: resizer.options)
|
||||
if (kv.second != "")
|
||||
cmd += " "+kv.first+" = "+kv.second+"\n";
|
||||
fprintf(stderr, "Running resize:\n%s", cmd.c_str());
|
||||
if (resizer.raw_resize() != 0)
|
||||
{
|
||||
// FIXME: Resize with backup or journal
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
void alloc_all(int size)
|
||||
{
|
||||
allocator *a = new allocator(size);
|
||||
allocator_t *a = new allocator_t(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
uint64_t x = a->find_free();
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
|
||||
allocator::allocator(uint64_t blocks)
|
||||
allocator_t::allocator_t(uint64_t blocks)
|
||||
{
|
||||
if (blocks >= 0x80000000 || blocks <= 1)
|
||||
{
|
||||
|
@ -32,12 +32,12 @@ allocator::allocator(uint64_t blocks)
|
|||
}
|
||||
}
|
||||
|
||||
allocator::~allocator()
|
||||
allocator_t::~allocator_t()
|
||||
{
|
||||
delete[] mask;
|
||||
}
|
||||
|
||||
bool allocator::get(uint64_t addr)
|
||||
bool allocator_t::get(uint64_t addr)
|
||||
{
|
||||
if (addr >= size)
|
||||
{
|
||||
|
@ -52,7 +52,7 @@ bool allocator::get(uint64_t addr)
|
|||
return ((mask[offset + addr/64] >> (addr % 64)) & 1);
|
||||
}
|
||||
|
||||
void allocator::set(uint64_t addr, bool value)
|
||||
void allocator_t::set(uint64_t addr, bool value)
|
||||
{
|
||||
if (addr >= size)
|
||||
{
|
||||
|
@ -109,7 +109,7 @@ void allocator::set(uint64_t addr, bool value)
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t allocator::find_free()
|
||||
uint64_t allocator_t::find_free()
|
||||
{
|
||||
uint64_t p2 = 1, offset = 0, addr = 0, f, i;
|
||||
while (p2 < size)
|
||||
|
@ -138,7 +138,7 @@ uint64_t allocator::find_free()
|
|||
return addr;
|
||||
}
|
||||
|
||||
uint64_t allocator::get_free_count()
|
||||
uint64_t allocator_t::get_free_count()
|
||||
{
|
||||
return free;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
// Hierarchical bitmap allocator
|
||||
class allocator
|
||||
class allocator_t
|
||||
{
|
||||
uint64_t total;
|
||||
uint64_t size;
|
||||
|
@ -14,8 +14,8 @@ class allocator
|
|||
uint64_t last_one_mask;
|
||||
uint64_t *mask;
|
||||
public:
|
||||
allocator(uint64_t blocks);
|
||||
~allocator();
|
||||
allocator_t(uint64_t blocks);
|
||||
~allocator_t();
|
||||
bool get(uint64_t addr);
|
||||
void set(uint64_t addr, bool value);
|
||||
uint64_t find_free();
|
||||
|
|
Loading…
Reference in New Issue