forked from vitalif/vitastor
Implement inode removal tool. Removes multiple objects from multiple OSDs in parallel
parent
3d05aa9362
commit
15dba96375
7
Makefile
7
Makefile
|
@ -2,7 +2,7 @@ BLOCKSTORE_OBJS := allocator.o blockstore.o blockstore_impl.o blockstore_init.o
|
||||||
blockstore_write.o blockstore_sync.o blockstore_stable.o blockstore_rollback.o blockstore_flush.o crc32c.o ringloop.o
|
blockstore_write.o blockstore_sync.o blockstore_stable.o blockstore_rollback.o blockstore_flush.o crc32c.o ringloop.o
|
||||||
# -fsanitize=address
|
# -fsanitize=address
|
||||||
CXXFLAGS := -g -O3 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fPIC -fdiagnostics-color=always
|
CXXFLAGS := -g -O3 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fPIC -fdiagnostics-color=always
|
||||||
all: libfio_blockstore.so osd libfio_sec_osd.so libfio_cluster.so stub_osd stub_uring_osd stub_bench osd_test dump_journal qemu_driver.so nbd_proxy
|
all: libfio_blockstore.so osd libfio_sec_osd.so libfio_cluster.so stub_osd stub_uring_osd stub_bench osd_test dump_journal qemu_driver.so nbd_proxy rm_inode
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
|
|
||||||
|
@ -44,6 +44,9 @@ libfio_cluster.so: fio_cluster.o $(FIO_CLUSTER_OBJS)
|
||||||
nbd_proxy: nbd_proxy.o $(FIO_CLUSTER_OBJS)
|
nbd_proxy: nbd_proxy.o $(FIO_CLUSTER_OBJS)
|
||||||
g++ $(CXXFLAGS) -ltcmalloc_minimal -o $@ $< $(FIO_CLUSTER_OBJS) -luring
|
g++ $(CXXFLAGS) -ltcmalloc_minimal -o $@ $< $(FIO_CLUSTER_OBJS) -luring
|
||||||
|
|
||||||
|
rm_inode: rm_inode.o $(FIO_CLUSTER_OBJS)
|
||||||
|
g++ $(CXXFLAGS) -ltcmalloc_minimal -o $@ $< $(FIO_CLUSTER_OBJS) -luring
|
||||||
|
|
||||||
qemu_driver.o: qemu_driver.c qemu_proxy.h
|
qemu_driver.o: qemu_driver.c qemu_proxy.h
|
||||||
gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
|
gcc -I qemu/b/qemu `pkg-config glib-2.0 --cflags` \
|
||||||
-I qemu/include $(CXXFLAGS) -c -o $@ $<
|
-I qemu/include $(CXXFLAGS) -c -o $@ $<
|
||||||
|
@ -149,6 +152,8 @@ qemu_proxy.o: qemu_proxy.cpp cluster_client.h etcd_state_client.h http_client.h
|
||||||
g++ $(CXXFLAGS) -c -o $@ $<
|
g++ $(CXXFLAGS) -c -o $@ $<
|
||||||
ringloop.o: ringloop.cpp ringloop.h
|
ringloop.o: ringloop.cpp ringloop.h
|
||||||
g++ $(CXXFLAGS) -c -o $@ $<
|
g++ $(CXXFLAGS) -c -o $@ $<
|
||||||
|
rm_inode.o: rm_inode.cpp cluster_client.h etcd_state_client.h http_client.h json11/json11.hpp malloc_or_die.h messenger.h object_id.h osd_id.h osd_ops.h ringloop.h timerfd_manager.h
|
||||||
|
g++ $(CXXFLAGS) -c -o $@ $<
|
||||||
rw_blocking.o: rw_blocking.cpp rw_blocking.h
|
rw_blocking.o: rw_blocking.cpp rw_blocking.h
|
||||||
g++ $(CXXFLAGS) -c -o $@ $<
|
g++ $(CXXFLAGS) -c -o $@ $<
|
||||||
stub_bench.o: stub_bench.cpp object_id.h osd_id.h osd_ops.h rw_blocking.h
|
stub_bench.o: stub_bench.cpp object_id.h osd_id.h osd_ops.h rw_blocking.h
|
||||||
|
|
19
README.md
19
README.md
|
@ -31,12 +31,12 @@ breaking changes in the future. However, the following is implemented:
|
||||||
- QEMU driver (built out-of-tree)
|
- QEMU driver (built out-of-tree)
|
||||||
- Loadable fio engine for benchmarks (also built out-of-tree)
|
- Loadable fio engine for benchmarks (also built out-of-tree)
|
||||||
- NBD proxy for kernel mounts
|
- NBD proxy for kernel mounts
|
||||||
|
- Inode removal tool (./rm_inode)
|
||||||
|
|
||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
- Packaging for Debian and, probably, CentOS too
|
- Packaging for Debian and, probably, CentOS too
|
||||||
- OSD creation tool (OSDs currently have to be created by hand)
|
- OSD creation tool (OSDs currently have to be created by hand)
|
||||||
- Inode deletion tool (currently you can't delete anything :))
|
|
||||||
- Other administrative tools
|
- Other administrative tools
|
||||||
- Per-inode I/O and space usage statistics
|
- Per-inode I/O and space usage statistics
|
||||||
- jerasure EC support with any number of data and parity drives in a group
|
- jerasure EC support with any number of data and parity drives in a group
|
||||||
|
@ -333,19 +333,22 @@ and calculate disk offsets almost by hand. This will be fixed in near future.
|
||||||
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=off,physical_block_size=4096,logical_block_size=512
|
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=off,physical_block_size=4096,logical_block_size=512
|
||||||
-vnc 0.0.0.0:0
|
-vnc 0.0.0.0:0
|
||||||
```
|
```
|
||||||
|
- Remove inode with (for example):
|
||||||
|
```
|
||||||
|
./rm_inode --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
|
||||||
|
```
|
||||||
|
|
||||||
## Known Problems
|
## Known Problems
|
||||||
|
|
||||||
- OSDs may currently crash with "can't get SQE, will fall out of sync with EPOLLET"
|
- OSDs may currently crash with "can't get SQE, will fall out of sync with EPOLLET"
|
||||||
if you try to load them with very long iodepths because io_uring queue (ring) is limited
|
if you try to load them with very long iodepths because io_uring queue (ring) is limited
|
||||||
and OSDs don't check if it fills up.
|
and OSDs don't check if it fills up.
|
||||||
- Object deletion requests may currently lead to unfound objects on crashes because
|
- Object deletion requests may currently lead to 'incomplete' objects if your OSDs crash during
|
||||||
proper handling of deletions in a cluster requires a "three-phase cleanup process"
|
deletion because proper handling of object cleanup in a cluster should be "three-phase"
|
||||||
and it's currently not implemented. In fact, even though deletion requests are
|
and it's currently not implemented. Inode removal tool currently can't handle unclean
|
||||||
implemented, there's no user tool to delete anything from the cluster yet :).
|
objects, so incomplete objects become undeletable. This will be fixed in near future
|
||||||
Of course I'll create such tool, but its first implementation will be vulnerable to this issue.
|
by allowing the inode removal tool to delete unclean objects. With this problem fixed
|
||||||
It's not a big deal though, because you'll be able to just repeat the deletion request
|
you'll be able just to repeat the removal again.
|
||||||
in this case.
|
|
||||||
|
|
||||||
## Implementation Principles
|
## Implementation Principles
|
||||||
|
|
||||||
|
|
|
@ -203,6 +203,12 @@ void cluster_client_t::on_load_pgs_hook(bool success)
|
||||||
{
|
{
|
||||||
pg_counts[pool_item.first] = pool_item.second.real_pg_count;
|
pg_counts[pool_item.first] = pool_item.second.real_pg_count;
|
||||||
}
|
}
|
||||||
|
pgs_loaded = true;
|
||||||
|
for (auto fn: on_ready_hooks)
|
||||||
|
{
|
||||||
|
fn();
|
||||||
|
}
|
||||||
|
on_ready_hooks.clear();
|
||||||
for (auto op: offline_ops)
|
for (auto op: offline_ops)
|
||||||
{
|
{
|
||||||
execute(op);
|
execute(op);
|
||||||
|
@ -254,6 +260,18 @@ void cluster_client_t::on_change_osd_state_hook(uint64_t peer_osd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cluster_client_t::on_ready(std::function<void(void)> fn)
|
||||||
|
{
|
||||||
|
if (pgs_loaded)
|
||||||
|
{
|
||||||
|
fn();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
on_ready_hooks.push_back(fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How writes are synced when immediate_commit is false
|
* How writes are synced when immediate_commit is false
|
||||||
*
|
*
|
||||||
|
@ -284,7 +302,7 @@ void cluster_client_t::on_change_osd_state_hook(uint64_t peer_osd)
|
||||||
|
|
||||||
void cluster_client_t::execute(cluster_op_t *op)
|
void cluster_client_t::execute(cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (!bs_disk_alignment)
|
if (!pgs_loaded)
|
||||||
{
|
{
|
||||||
// We're offline
|
// We're offline
|
||||||
offline_ops.push_back(op);
|
offline_ops.push_back(op);
|
||||||
|
|
|
@ -63,8 +63,6 @@ class cluster_client_t
|
||||||
int up_wait_retry_interval = 500; // ms
|
int up_wait_retry_interval = 500; // ms
|
||||||
|
|
||||||
uint64_t op_id = 1;
|
uint64_t op_id = 1;
|
||||||
etcd_state_client_t st_cli;
|
|
||||||
osd_messenger_t msgr;
|
|
||||||
ring_consumer_t consumer;
|
ring_consumer_t consumer;
|
||||||
// operations currently in progress
|
// operations currently in progress
|
||||||
std::set<cluster_op_t*> cur_ops;
|
std::set<cluster_op_t*> cur_ops;
|
||||||
|
@ -78,10 +76,17 @@ class cluster_client_t
|
||||||
std::vector<cluster_op_t*> offline_ops;
|
std::vector<cluster_op_t*> offline_ops;
|
||||||
uint64_t queued_bytes = 0;
|
uint64_t queued_bytes = 0;
|
||||||
|
|
||||||
|
bool pgs_loaded = false;
|
||||||
|
std::vector<std::function<void(void)>> on_ready_hooks;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
etcd_state_client_t st_cli;
|
||||||
|
osd_messenger_t msgr;
|
||||||
|
|
||||||
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
||||||
~cluster_client_t();
|
~cluster_client_t();
|
||||||
void execute(cluster_op_t *op);
|
void execute(cluster_op_t *op);
|
||||||
|
void on_ready(std::function<void(void)> fn);
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -107,7 +107,7 @@ public:
|
||||||
{
|
{
|
||||||
printf(
|
printf(
|
||||||
"Vitastor NBD proxy\n"
|
"Vitastor NBD proxy\n"
|
||||||
"(c) Vitaliy Filippov, 2020 (VNPL-1.0 or GNU GPL 2.0+)\n\n"
|
"(c) Vitaliy Filippov, 2020 (VNPL-1.0)\n\n"
|
||||||
"USAGE:\n"
|
"USAGE:\n"
|
||||||
" %s map --etcd_address <etcd_address> --pool <pool> --inode <inode> --size <size in bytes>\n"
|
" %s map --etcd_address <etcd_address> --pool <pool> --inode <inode> --size <size in bytes>\n"
|
||||||
" %s unmap /dev/nbd0\n"
|
" %s unmap /dev/nbd0\n"
|
||||||
|
|
|
@ -0,0 +1,325 @@
|
||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.0 (see README.md for details)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inode removal tool
|
||||||
|
* May be included into a bigger "command-line management interface" in the future
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "epoll_manager.h"
|
||||||
|
#include "cluster_client.h"
|
||||||
|
#include "pg_states.h"
|
||||||
|
|
||||||
|
#define RM_NO_LIST 1
|
||||||
|
#define RM_LIST_SENT 2
|
||||||
|
#define RM_REMOVING 3
|
||||||
|
#define RM_END 4
|
||||||
|
|
||||||
|
const char *exe_name = NULL;
|
||||||
|
|
||||||
|
struct rm_pg_osd_t
|
||||||
|
{
|
||||||
|
pg_num_t pg_num;
|
||||||
|
osd_num_t osd_num;
|
||||||
|
int state = 0;
|
||||||
|
obj_ver_id *obj_list = NULL;
|
||||||
|
uint64_t obj_count = 0, obj_pos = 0, obj_done = 0, obj_prev_done = 0;
|
||||||
|
int in_flight = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class rm_inode_t
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
uint64_t inode = 0, pool_id = 0;
|
||||||
|
uint64_t iodepth = 0, parallel_osds = 0;
|
||||||
|
|
||||||
|
ring_loop_t *ringloop = NULL;
|
||||||
|
epoll_manager_t *epmgr = NULL;
|
||||||
|
cluster_client_t *cli = NULL;
|
||||||
|
ring_consumer_t consumer;
|
||||||
|
|
||||||
|
std::vector<rm_pg_osd_t*> lists;
|
||||||
|
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
|
||||||
|
uint64_t pgs_to_list = 0;
|
||||||
|
bool started = false;
|
||||||
|
bool progress = true;
|
||||||
|
int log_level = 0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static json11::Json::object parse_args(int narg, const char *args[])
|
||||||
|
{
|
||||||
|
json11::Json::object cfg;
|
||||||
|
cfg["progress"] = "1";
|
||||||
|
for (int i = 1; i < narg; i++)
|
||||||
|
{
|
||||||
|
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
||||||
|
{
|
||||||
|
help();
|
||||||
|
}
|
||||||
|
else if (args[i][0] == '-' && args[i][1] == '-')
|
||||||
|
{
|
||||||
|
const char *opt = args[i]+2;
|
||||||
|
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cfg;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void help()
|
||||||
|
{
|
||||||
|
printf(
|
||||||
|
"Vitastor inode removal tool\n"
|
||||||
|
"(c) Vitaliy Filippov, 2020 (VNPL-1.0)\n\n"
|
||||||
|
"USAGE:\n"
|
||||||
|
" %s --etcd_address <etcd_address> --pool <pool> --inode <inode>\n",
|
||||||
|
exe_name
|
||||||
|
);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void run(json11::Json cfg)
|
||||||
|
{
|
||||||
|
if (cfg["etcd_address"].string_value() == "")
|
||||||
|
{
|
||||||
|
fprintf(stderr, "etcd_address is missing\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
inode = cfg["inode"].uint64_value();
|
||||||
|
pool_id = cfg["pool"].uint64_value();
|
||||||
|
if (pool_id)
|
||||||
|
inode = (inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (pool_id << (64-POOL_ID_BITS));
|
||||||
|
pool_id = INODE_POOL(inode);
|
||||||
|
if (!pool_id)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "pool is missing");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
iodepth = cfg["iodepth"].uint64_value();
|
||||||
|
if (!iodepth)
|
||||||
|
iodepth = 32;
|
||||||
|
parallel_osds = cfg["parallel_osds"].uint64_value();
|
||||||
|
if (!parallel_osds)
|
||||||
|
parallel_osds = 4;
|
||||||
|
log_level = cfg["log_level"].uint64_value();
|
||||||
|
progress = cfg["progress"].uint64_value() ? true : false;
|
||||||
|
// Create client
|
||||||
|
ringloop = new ring_loop_t(512);
|
||||||
|
epmgr = new epoll_manager_t(ringloop);
|
||||||
|
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
|
||||||
|
cli->on_ready([this]() { start_delete(); });
|
||||||
|
// Initialize job
|
||||||
|
consumer.loop = [this]()
|
||||||
|
{
|
||||||
|
if (started)
|
||||||
|
continue_delete();
|
||||||
|
ringloop->submit();
|
||||||
|
};
|
||||||
|
ringloop->register_consumer(&consumer);
|
||||||
|
// Loop until it completes
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
ringloop->loop();
|
||||||
|
ringloop->wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void start_delete()
|
||||||
|
{
|
||||||
|
if (cli->st_cli.pool_config.find(pool_id) == cli->st_cli.pool_config.end())
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Pool %lu does not exist\n", pool_id);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
auto pool_cfg = cli->st_cli.pool_config[pool_id];
|
||||||
|
for (auto & pg_item: pool_cfg.pg_config)
|
||||||
|
{
|
||||||
|
auto & pg = pg_item.second;
|
||||||
|
if (pg.pause || !pg.cur_primary || pg.cur_state != PG_ACTIVE)
|
||||||
|
{
|
||||||
|
// FIXME Support deletion in non-clean active PGs by introducing a "primary-list" command
|
||||||
|
fprintf(stderr, "PG %u is not active+clean, skipping\n", pg_item.first);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rm_pg_osd_t *r = new rm_pg_osd_t();
|
||||||
|
r->pg_num = pg_item.first;
|
||||||
|
r->osd_num = pg.cur_primary;
|
||||||
|
r->state = RM_NO_LIST;
|
||||||
|
lists.push_back(r);
|
||||||
|
}
|
||||||
|
std::sort(lists.begin(), lists.end(), [](rm_pg_osd_t *a, rm_pg_osd_t *b)
|
||||||
|
{
|
||||||
|
return a->osd_num < b->osd_num ? true : false;
|
||||||
|
});
|
||||||
|
pgs_to_list = lists.size();
|
||||||
|
started = true;
|
||||||
|
continue_delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
void send_list(rm_pg_osd_t *cur_list)
|
||||||
|
{
|
||||||
|
if (cli->msgr.osd_peer_fds.find(cur_list->osd_num) ==
|
||||||
|
cli->msgr.osd_peer_fds.end())
|
||||||
|
{
|
||||||
|
// Initiate connection
|
||||||
|
cli->msgr.connect_peer(cur_list->osd_num, cli->st_cli.peer_states[cur_list->osd_num]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
osd_op_t *op = new osd_op_t();
|
||||||
|
op->op_type = OSD_OP_OUT;
|
||||||
|
op->peer_fd = cli->msgr.osd_peer_fds[cur_list->osd_num];
|
||||||
|
op->req = {
|
||||||
|
.sec_list = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = cli->msgr.next_subop_id++,
|
||||||
|
.opcode = OSD_OP_SEC_LIST,
|
||||||
|
},
|
||||||
|
.list_pg = cur_list->pg_num,
|
||||||
|
.pg_count = (pg_num_t)cli->st_cli.pool_config[pool_id].real_pg_count,
|
||||||
|
.pg_stripe_size = cli->st_cli.pool_config[pool_id].pg_stripe_size,
|
||||||
|
.min_inode = inode,
|
||||||
|
.max_inode = inode,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
op->callback = [this, cur_list](osd_op_t *op)
|
||||||
|
{
|
||||||
|
pgs_to_list--;
|
||||||
|
if (op->reply.hdr.retval < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to get object list from OSD %lu (retval=%ld), skipping the PG\n",
|
||||||
|
cur_list->osd_num, op->reply.hdr.retval);
|
||||||
|
cli->msgr.stop_client(cur_list->osd_num);
|
||||||
|
delete op;
|
||||||
|
cur_list->state = RM_END;
|
||||||
|
continue_delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (log_level > 0)
|
||||||
|
{
|
||||||
|
printf(
|
||||||
|
"[PG %u] Got inode object list from OSD %lu: %ld object versions\n",
|
||||||
|
cur_list->pg_num, cur_list->osd_num, op->reply.hdr.retval
|
||||||
|
);
|
||||||
|
}
|
||||||
|
cur_list->obj_list = (obj_ver_id*)op->buf;
|
||||||
|
cur_list->obj_count = (uint64_t)op->reply.hdr.retval;
|
||||||
|
cur_list->obj_done = cur_list->obj_prev_done = cur_list->obj_pos = 0;
|
||||||
|
total_count += cur_list->obj_count;
|
||||||
|
total_prev_pct = 0;
|
||||||
|
// set op->buf to NULL so it doesn't get freed
|
||||||
|
op->buf = NULL;
|
||||||
|
delete op;
|
||||||
|
cur_list->state = RM_REMOVING;
|
||||||
|
continue_delete();
|
||||||
|
};
|
||||||
|
cur_list->state = RM_LIST_SENT;
|
||||||
|
cli->msgr.outbox_push(op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void send_ops(rm_pg_osd_t *cur_list)
|
||||||
|
{
|
||||||
|
if (cli->msgr.osd_peer_fds.find(cur_list->osd_num) ==
|
||||||
|
cli->msgr.osd_peer_fds.end())
|
||||||
|
{
|
||||||
|
// Initiate connection
|
||||||
|
cli->msgr.connect_peer(cur_list->osd_num, cli->st_cli.peer_states[cur_list->osd_num]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while (cur_list->in_flight < iodepth && cur_list->obj_pos < cur_list->obj_count)
|
||||||
|
{
|
||||||
|
osd_op_t *op = new osd_op_t();
|
||||||
|
op->op_type = OSD_OP_OUT;
|
||||||
|
op->peer_fd = cli->msgr.osd_peer_fds[cur_list->osd_num];
|
||||||
|
op->req = {
|
||||||
|
.rw = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = cli->msgr.next_subop_id++,
|
||||||
|
.opcode = OSD_OP_DELETE,
|
||||||
|
},
|
||||||
|
.inode = cur_list->obj_list[cur_list->obj_pos].oid.inode,
|
||||||
|
.offset = (cur_list->obj_list[cur_list->obj_pos].oid.stripe & ~STRIPE_MASK),
|
||||||
|
.len = 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
op->callback = [this, cur_list](osd_op_t *op)
|
||||||
|
{
|
||||||
|
cur_list->in_flight--;
|
||||||
|
if (op->reply.hdr.retval < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to remove object from PG %u (OSD %lu) (retval=%ld)\n",
|
||||||
|
cur_list->pg_num, cur_list->osd_num, op->reply.hdr.retval);
|
||||||
|
}
|
||||||
|
delete op;
|
||||||
|
cur_list->obj_done++;
|
||||||
|
total_done++;
|
||||||
|
continue_delete();
|
||||||
|
};
|
||||||
|
cli->msgr.outbox_push(op);
|
||||||
|
cur_list->obj_pos++;
|
||||||
|
cur_list->in_flight++;
|
||||||
|
}
|
||||||
|
if (!cur_list->in_flight && cur_list->obj_pos >= cur_list->obj_count)
|
||||||
|
{
|
||||||
|
free(cur_list->obj_list);
|
||||||
|
cur_list->obj_list = NULL;
|
||||||
|
cur_list->obj_count = 0;
|
||||||
|
cur_list->obj_done = cur_list->obj_prev_done = cur_list->obj_pos = 0;
|
||||||
|
cur_list->state = RM_END;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void continue_delete()
|
||||||
|
{
|
||||||
|
int par_osd = 0;
|
||||||
|
osd_num_t max_seen_osd = 0;
|
||||||
|
for (int i = 0; i < lists.size(); i++)
|
||||||
|
{
|
||||||
|
if (lists[i]->state == RM_END)
|
||||||
|
{
|
||||||
|
delete lists[i];
|
||||||
|
lists.erase(lists.begin()+i, lists.begin()+i+1);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
else if (lists[i]->osd_num > max_seen_osd)
|
||||||
|
{
|
||||||
|
if (lists[i]->state == RM_NO_LIST)
|
||||||
|
{
|
||||||
|
send_list(lists[i]);
|
||||||
|
}
|
||||||
|
else if (lists[i]->state == RM_REMOVING)
|
||||||
|
{
|
||||||
|
send_ops(lists[i]);
|
||||||
|
}
|
||||||
|
par_osd++;
|
||||||
|
max_seen_osd = lists[i]->osd_num;
|
||||||
|
if (par_osd >= parallel_osds)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
|
||||||
|
{
|
||||||
|
printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
|
||||||
|
total_prev_pct = total_done*1000/total_count;
|
||||||
|
}
|
||||||
|
if (!lists.size())
|
||||||
|
{
|
||||||
|
printf("Done, inode %lu in pool %lu removed\n", (inode & ((1l << (64-POOL_ID_BITS)) - 1)), pool_id);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int narg, const char *args[])
|
||||||
|
{
|
||||||
|
setvbuf(stdout, NULL, _IONBF, 0);
|
||||||
|
setvbuf(stderr, NULL, _IONBF, 0);
|
||||||
|
exe_name = args[0];
|
||||||
|
rm_inode_t *p = new rm_inode_t();
|
||||||
|
p->run(rm_inode_t::parse_args(narg, args));
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue