diff --git a/src/blockstore_disk.cpp b/src/blockstore_disk.cpp index 25da21cb..092a09d3 100644 --- a/src/blockstore_disk.cpp +++ b/src/blockstore_disk.cpp @@ -219,7 +219,7 @@ void blockstore_disk_t::open_data() data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR); if (data_fd == -1) { - throw std::runtime_error("Failed to open data device"); + throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno))); } check_size(data_fd, &data_device_size, &data_device_sect, "data device"); if (disk_alignment % data_device_sect) @@ -243,11 +243,10 @@ void blockstore_disk_t::open_meta() { if (meta_device != data_device) { - meta_offset = 0; meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR); if (meta_fd == -1) { - throw std::runtime_error("Failed to open metadata device"); + throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno))); } check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device"); if (meta_offset >= meta_device_size) @@ -285,7 +284,7 @@ void blockstore_disk_t::open_journal() journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR); if (journal_fd == -1) { - throw std::runtime_error("Failed to open journal device"); + throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno))); } check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device"); if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0) diff --git a/src/disk_tool.cpp b/src/disk_tool.cpp index ba9e678f..886d3f73 100644 --- a/src/disk_tool.cpp +++ b/src/disk_tool.cpp @@ -12,13 +12,13 @@ static const char *help_text = "\n" "vitastor-disk prepare [OPTIONS] [devices...]\n" " Initialize disk(s) for Vitastor OSD(s).\n" - " There are two forms of this command. In the first form, you pass which\n" + " There are two modes of this command. In the first mode, you pass which\n" " must be raw disks (not partitions). They are partitioned automatically and OSDs\n" " are initialized on all of them.\n" - " In the second form, you omit and pass --data_device, --journal_device\n" - " and/or --meta_device which must be already existing partitions. In this case\n" - " a single OSD is created.\n" - " Requires `vitastor-cli`, `blkid`, `sfdisk` and `partprobe` (from parted) utilities.\n" + " In the second mode, you omit and pass --data_device, --journal_device\n" + " and/or --meta_device which must be already existing partitions identified by their\n" + " GPT partition UUIDs. In this case a single OSD is created.\n" + " Requires `vitastor-cli`, `wipefs`, `sfdisk` and `partprobe` (from parted) utilities.\n" " OPTIONS may include:\n" " --hybrid\n" " Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n" @@ -30,6 +30,7 @@ static const char *help_text = " --data_device Create a single OSD using partition for data\n" " --meta_device Create a single OSD using partition for metadata\n" " --journal_device Create a single OSD using partition for journal\n" + " --force Bypass checks on data/meta/journal partitions\n" " --journal_size 1G/32M Set journal size\n" " --object_size 1M/128k Set blockstore object size\n" " --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n" @@ -43,8 +44,8 @@ static const char *help_text = " metadata size to ease possible future extension. The default is to allocate\n" " 2 times more space and at least 1G. Use this option to override.\n" " --max_other 10%\n" - " Use disks for OSD data even if they already have non-Vitastor partitions,\n" - " but only if these take up no more than this percent of disk space.\n" + " In the automatic mode, use disks for OSD data even if they already have non-Vitastor\n" + " partitions, but only if these take up no more than this percent of disk space.\n" "\n" "vitastor-disk upgrade-simple \n" " Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n" @@ -149,7 +150,7 @@ int main(int argc, char *argv[]) { self.options["hybrid"] = "1"; } - else if (!strcmp(argv[i], "--help")) + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) { cmd.insert(cmd.begin(), (char*)"help"); } diff --git a/src/disk_tool_prepare.cpp b/src/disk_tool_prepare.cpp index 69ed22fe..278df3bd 100644 --- a/src/disk_tool_prepare.cpp +++ b/src/disk_tool_prepare.cpp @@ -30,6 +30,12 @@ int disk_tool_t::prepare_one(std::map options, int is_ const auto & dev = all_devs[i]; if (dev == "") continue; + if (dev.substr(0, 22) != "/dev/disk/by-partuuid/") + { + // Partitions should be identified by GPT partition UUID + fprintf(stderr, "%s does not start with /dev/disk/by-partuuid/. Partitions should be identified by GPT partition UUIDs\n", dev.c_str()); + return 1; + } std::string real_dev = realpath_str(dev, false); if (real_dev == "") return 1; @@ -42,11 +48,11 @@ int disk_tool_t::prepare_one(std::map options, int is_ return 1; } if (i == 0 && is_hdd == -1) - is_hdd = read_file("/sys/block/"+parent_dev+"/queue/rotational") == "1"; + is_hdd = trim(read_file("/sys/block/"+parent_dev+"/queue/rotational")) == "1"; std::string out; - if (shell_exec({ "blkid", "-D", "-p", dev }, "", &out, NULL) == 0) + if (shell_exec({ "wipefs", dev }, "", &out, NULL) != 0 || out != "") { - fprintf(stderr, "%s contains data, not creating OSD without --force. blkid -D -p says:\n%s", dev.c_str(), out.c_str()); + fprintf(stderr, "%s contains data, not creating OSD without --force. wipefs shows:\n%s", dev.c_str(), out.c_str()); return 1; } json11::Json sb = read_osd_superblock(dev, false); @@ -143,17 +149,28 @@ int disk_tool_t::prepare_one(std::map options, int is_ } dsk.close_all(); // Write superblocks - if (!write_osd_superblock(options["data_device"], sb) || - options["meta_device"] != "" && - options["meta_device"] != options["data_device"] && - write_osd_superblock(options["meta_device"], sb) || - options["journal_device"] != "" && + bool sep_m = options["meta_device"] != "" && + options["meta_device"] != options["data_device"]; + bool sep_j = options["journal_device"] != "" && options["journal_device"] != options["data_device"] && - options["journal_device"] != options["meta_device"] && - !write_osd_superblock(options["journal_device"], sb)) + options["journal_device"] != options["meta_device"]; + if (!write_osd_superblock(options["data_device"], sb) || + sep_m && !write_osd_superblock(options["meta_device"], sb) || + sep_j && !write_osd_superblock(options["journal_device"], sb)) { return 1; } + auto desc = realpath_str(options["data_device"]); + if (sep_m) + desc += " with metadata on "+realpath_str(options["meta_device"]); + if (sep_j) + desc += (sep_m ? " and journal on " : " with journal on ") + realpath_str(options["journal_device"]); + fprintf(stderr, "Initialized OSD %lu on %s\n", osd_num, desc.c_str()); + if (shell_exec({ "systemctl", "enable", "--now", "vitastor-osd@"+std::to_string(osd_num) }, "", NULL, NULL) != 0) + { + fprintf(stderr, "Failed to enable systemd unit vitastor-osd@%lu\n", osd_num); + return 1; + } return 0; } @@ -190,7 +207,7 @@ std::vector disk_tool_t::collect_devices(const std::vector< return {}; } // Check if the device is an SSD - bool is_hdd = read_file("/sys/block/"+dev.substr(5)+"/queue/rotational") == "1"; + bool is_hdd = trim(read_file("/sys/block/"+dev.substr(5)+"/queue/rotational")) == "1"; // Check if it has a partition table json11::Json pt = read_parttable(dev); if (pt.is_bool() && !pt.bool_value()) @@ -202,8 +219,8 @@ std::vector disk_tool_t::collect_devices(const std::vector< { // No partition table std::string out; - int r = shell_exec({ "blkid", "-p", dev }, "", &out, NULL); - if (r == 0) + int r = shell_exec({ "wipefs", dev }, "", &out, NULL); + if (r != 0 || out != "") { fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str()); continue; @@ -244,9 +261,9 @@ json11::Json disk_tool_t::add_partitions(vitastor_dev_info_t & devinfo, std::vec { if (kv.first != "node") { - script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump()); if (n++) script += ", "; + script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump()); } } script += "\n"; @@ -510,6 +527,11 @@ int disk_tool_t::prepare(std::vector devices) fprintf(stderr, "No SSDs found\n"); return 1; } + else if (ssds.size() == devinfo.size()) + { + fprintf(stderr, "No HDDs found\n"); + return 1; + } if (options["journal_size"] == "") options["journal_size"] = DEFAULT_HYBRID_JOURNAL; } @@ -521,7 +543,7 @@ int disk_tool_t::prepare(std::vector devices) for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent)) { options["force"] = true; - options["data_device"] = "/dev/disk/by-uuid/"+strtolower(uuid); + options["data_device"] = "/dev/disk/by-partuuid/"+strtolower(uuid); if (hybrid) { // Select/create journal and metadata partitions diff --git a/src/disk_tool_udev.cpp b/src/disk_tool_udev.cpp index caa023f3..c5d94e4e 100644 --- a/src/disk_tool_udev.cpp +++ b/src/disk_tool_udev.cpp @@ -110,11 +110,6 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para free(buf); return 0; } - // Lock the file - if (flock(fd, LOCK_EX|LOCK_NB) < 0) - { - fprintf(stderr, "Warning: Failed to lock %s with flock - udev autodetection may fail. Error: %s\n", device.c_str(), strerror(errno)); - } int r = write_blocking(fd, buf, buf_len); if (r < 0) { @@ -125,6 +120,7 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para } close(fd); free(buf); + shell_exec({ "udevadm", "trigger", "--settle", device }, "", NULL, NULL); return sb_size; } diff --git a/src/osd_cluster.cpp b/src/osd_cluster.cpp index 8ee2e608..f6716ad1 100644 --- a/src/osd_cluster.cpp +++ b/src/osd_cluster.cpp @@ -663,9 +663,9 @@ void osd_t::apply_pg_config() { printf( "[OSD %lu] My block_size and bitmap_granularity are %u/%u" - ", but pool has %u/%u. Refusing to start PGs of this pool\n", + ", but pool %u has %u/%u. Refusing to start PGs of this pool\n", this->osd_num, bs_block_size, bs_bitmap_granularity, - pool_item.second.data_block_size, pool_item.second.bitmap_granularity + pool_id, pool_item.second.data_block_size, pool_item.second.bitmap_granularity ); } warned_block_size = true;