Print journal flusher diagnostics on slow ops

nbd-vmsplice
Vitaliy Filippov 2021-07-17 16:13:41 +03:00
parent 0e0509e3da
commit e74af9745e
9 changed files with 106 additions and 17 deletions

View File

@ -48,6 +48,11 @@ std::map<uint64_t, uint64_t> & blockstore_t::get_inode_space_stats()
return impl->inode_space_stats;
}
void blockstore_t::dump_diagnostics()
{
return impl->dump_diagnostics();
}
uint32_t blockstore_t::get_block_size()
{
return impl->get_block_size();

View File

@ -186,6 +186,9 @@ public:
// Get per-inode space usage statistics
std::map<uint64_t, uint64_t> & get_inode_space_stats();
// Print diagnostics to stdout
void dump_diagnostics();
// FIXME rename to object_size
uint32_t get_block_size();
uint64_t get_block_count();

View File

@ -182,6 +182,67 @@ void journal_flusher_t::release_trim()
trim_wanted--;
}
void journal_flusher_t::dump_diagnostics()
{
obj_ver_id flushable = { 0 };
// Try to find out if there is a flushable object for information
for (object_id cur_oid: flush_queue)
{
obj_ver_id cur = { .oid = cur_oid, .version = flush_versions[cur_oid] };
auto dirty_end = bs->dirty_db.find(cur);
if (dirty_end == bs->dirty_db.end())
{
// Already flushed
continue;
}
auto repeat_it = sync_to_repeat.find(cur.oid);
if (repeat_it != sync_to_repeat.end())
{
// Someone is already flushing it
continue;
}
if (dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start))
{
// Object is more recent than possible to flush
bool found = try_find_older(dirty_end, cur);
if (!found)
continue;
}
flushable = cur;
break;
}
printf(
"Flusher: queued=%ld first=%lx:%lx trim_wanted=%d dequeuing=%d trimming=%d cur=%d target=%d active=%d syncing=%d\n",
flush_queue.size(), flushable.oid.inode, flushable.oid.stripe,
trim_wanted, dequeuing, trimming, cur_flusher_count, target_flusher_count,
active_flushers, syncing_flushers
);
}
bool journal_flusher_t::try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur)
{
bool found = false;
while (dirty_end != bs->dirty_db.begin())
{
dirty_end--;
if (dirty_end->first.oid != cur.oid)
{
break;
}
if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start)))
{
found = true;
cur.version = dirty_end->first.version;
break;
}
}
return found;
}
#define await_sqe(label) \
resume_##label:\
sqe = bs->get_sqe();\
@ -286,23 +347,7 @@ stop_flusher:
// And it may even block writes if we don't flush the older version
// (if it's in the beginning of the journal)...
// So first try to find an older version of the same object to flush.
bool found = false;
while (dirty_end != bs->dirty_db.begin())
{
dirty_end--;
if (dirty_end->first.oid != cur.oid)
{
break;
}
if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start)))
{
found = true;
cur.version = dirty_end->first.version;
break;
}
}
bool found = flusher->try_find_older(dirty_end, cur);
if (!found)
{
// Try other objects

View File

@ -97,6 +97,9 @@ class journal_flusher_t
std::map<uint64_t, meta_sector_t> meta_sectors;
std::deque<object_id> flush_queue;
std::map<object_id, uint64_t> flush_versions;
bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
public:
journal_flusher_t(blockstore_impl_t *bs);
~journal_flusher_t();
@ -108,4 +111,5 @@ public:
void enqueue_flush(obj_ver_id oid);
void unshift_flush(obj_ver_id oid, bool force);
void remove_flush(object_id oid);
void dump_diagnostics();
};

View File

@ -595,3 +595,9 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
op->buf = stable;
FINISH_OP(op);
}
void blockstore_impl_t::dump_diagnostics()
{
journal.dump_diagnostics();
flusher->dump_diagnostics();
}

View File

@ -361,6 +361,9 @@ public:
// Space usage statistics
std::map<uint64_t, uint64_t> inode_space_stats;
// Print diagnostics to stdout
void dump_diagnostics();
inline uint32_t get_block_size() { return block_size; }
inline uint64_t get_block_count() { return block_count; }
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }

View File

@ -218,3 +218,19 @@ uint64_t journal_t::get_trim_pos()
// Can't trim journal
return used_start;
}
void journal_t::dump_diagnostics()
{
auto journal_used_it = used_sectors.lower_bound(used_start);
if (journal_used_it == used_sectors.end())
{
// Journal is cleared to its end, restart from the beginning
journal_used_it = used_sectors.begin();
}
printf(
"Journal: used_start=%08lx next_free=%08lx dirty_start=%08lx trim_to=%08lx trim_to_refs=%ld\n",
used_start, next_free, dirty_start,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->first,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
);
}

View File

@ -180,6 +180,7 @@ struct journal_t
~journal_t();
bool trim();
uint64_t get_trim_pos();
void dump_diagnostics();
inline bool entry_fits(int size)
{
return !(block_size - in_sector_pos < size ||

View File

@ -367,6 +367,7 @@ void osd_t::print_stats()
void osd_t::print_slow()
{
bool has_slow = false;
char alloc[1024];
timespec now;
clock_gettime(CLOCK_REALTIME, &now);
@ -448,7 +449,12 @@ void osd_t::print_slow()
}
#undef bufprintf
printf("%s\n", alloc);
has_slow = true;
}
}
}
if (has_slow)
{
bs->dump_diagnostics();
}
}