Dead OSDs are marked "out", so only OSDs from the same hosts work to resilver

master
Vitaliy Filippov 2021-01-15 18:20:52 +03:00
parent a02828214d
commit 62fe6bf681
2 changed files with 27 additions and 21 deletions

46
afr.js
View File

@ -42,57 +42,63 @@ function failure_rate_fullmesh(n, a, f)
/******** PGS: EACH OSD ONLY COMMUNICATES WITH <pgs> OTHER OSDs ********/ /******** PGS: EACH OSD ONLY COMMUNICATES WITH <pgs> OTHER OSDs ********/
// <n> hosts of <m> drives of <capacity> GB, each able to backfill at <speed> GB/s, // <n> hosts of <m> drives of <capacity> GB, each able to backfill at <speed> GB/s,
// <k> replicas, <pgs> unique peer PGs per OSD // <k> replicas, <pgs> unique peer PGs per OSD (~50 for 100 PG-per-OSD in a big cluster)
// //
// For each of n*m drives: P(drive fails in a year) * P(any of its peers fail in <l*365> next days). // For each of n*m drives: P(drive fails in a year) * P(any of its peers fail in <l*365> next days).
// More peers per OSD increase rebalance speed (more drives work together to resilver) if you // More peers per OSD increase rebalance speed (more drives work together to resilver) if you
// let them finish rebalance BEFORE replacing the failed drive. // let them finish rebalance BEFORE replacing the failed drive (degraded_replacement=false).
// At the same time, more peers per OSD increase probability of any of them to fail! // At the same time, more peers per OSD increase probability of any of them to fail!
// osd_rm=true means that failed OSDs' data is rebalanced over all other hosts,
// not over the same host as it's in Ceph by default (dead OSDs are marked 'out').
// //
// Probability of all except one drives in a replica group to fail is (AFR^(k-1)). // Probability of all except one drives in a replica group to fail is (AFR^(k-1)).
// So with <x> PGs it becomes ~ (x * (AFR*L/365)^(k-1)). Interesting but reasonable consequence // So with <x> PGs it becomes ~ (x * (AFR*L/365)^(k-1)). Interesting but reasonable consequence
// is that, with k=2, total failure rate doesn't depend on number of peers per OSD, // is that, with k=2, total failure rate doesn't depend on number of peers per OSD,
// because it gets increased linearly by increased number of peers to fail // because it gets increased linearly by increased number of peers to fail
// and decreased linearly by reduced rebalance time. // and decreased linearly by reduced rebalance time.
function cluster_afr_pgs({ n_hosts, n_drives, afr_drive, capacity, speed, replicas, pgs = 1, degraded_replacement }) function cluster_afr_pgs({ n_hosts, n_drives, afr_drive, capacity, speed, replicas, pgs = 1, osd_rm, degraded_replacement })
{ {
pgs = Math.min(pgs, (n_hosts-1)*n_drives/(replicas-1)); pgs = Math.min(pgs, (n_hosts-1)*n_drives/(replicas-1));
const l = capacity/(degraded_replacement ? 1 : pgs)/speed/86400/365; const resilver_disk = n_drives == 1 || osd_rm ? pgs : (n_drives-1);
return 1 - (1 - afr_drive * (1-(1-(afr_drive*l)**(replicas-1))**pgs)) ** (n_hosts*n_drives); const disk_heal_time = capacity/(degraded_replacement ? 1 : resilver_disk)/speed/86400/365;
return 1 - (1 - afr_drive * (1-(1-(afr_drive*disk_heal_time)**(replicas-1))**pgs)) ** (n_hosts*n_drives);
} }
function cluster_afr_pgs_ec({ n_hosts, n_drives, afr_drive, capacity, speed, ec: [ ec_data, ec_parity ], pgs = 1, degraded_replacement }) function cluster_afr_pgs_ec({ n_hosts, n_drives, afr_drive, capacity, speed, ec: [ ec_data, ec_parity ], pgs = 1, osd_rm, degraded_replacement })
{ {
const ec_total = ec_data+ec_parity; const ec_total = ec_data+ec_parity;
pgs = Math.min(pgs, (n_hosts-1)*n_drives/(ec_total-1)); pgs = Math.min(pgs, (n_hosts-1)*n_drives/(ec_total-1));
const l = capacity/(degraded_replacement ? 1 : pgs)/speed/86400/365; const resilver_disk = n_drives == 1 || osd_rm ? pgs : (n_drives-1);
return 1 - (1 - afr_drive * (1-(1-failure_rate_fullmesh(ec_total-1, afr_drive*l, ec_parity))**pgs)) ** (n_hosts*n_drives); const disk_heal_time = capacity/(degraded_replacement ? 1 : resilver_disk)/speed/86400/365;
return 1 - (1 - afr_drive * (1-(1-failure_rate_fullmesh(ec_total-1, afr_drive*disk_heal_time, ec_parity))**pgs)) ** (n_hosts*n_drives);
} }
// Same as above, but also take server failures into account // Same as above, but also take server failures into account
function cluster_afr_pgs_hosts({ n_hosts, n_drives, afr_drive, afr_host, capacity, speed, replicas, pgs = 1, degraded_replacement }) function cluster_afr_pgs_hosts({ n_hosts, n_drives, afr_drive, afr_host, capacity, speed, replicas, pgs = 1, osd_rm, degraded_replacement })
{ {
let otherhosts = Math.min(pgs, (n_hosts-1)/(replicas-1)); const otherhosts = Math.min(pgs, (n_hosts-1)/(replicas-1));
pgs = Math.min(pgs, (n_hosts-1)*n_drives/(replicas-1)); pgs = Math.min(pgs, (n_hosts-1)*n_drives/(replicas-1));
let pgh = Math.min(pgs*n_drives, (n_hosts-1)*n_drives/(replicas-1)); const resilver_disk = n_drives == 1 || osd_rm ? pgs : (n_drives-1);
const ld = capacity/(degraded_replacement ? 1 : pgs)/speed/86400/365; const pgh = Math.min(pgs*n_drives, (n_hosts-1)*n_drives/(replicas-1));
const lh = n_drives*capacity/pgs/speed/86400/365; const disk_heal_time = capacity/(degraded_replacement ? 1 : resilver_disk)/speed/86400/365;
const p1 = ((afr_drive+afr_host*pgs/otherhosts)*lh); const host_heal_time = n_drives*capacity/pgs/speed/86400/365;
const p2 = ((afr_drive+afr_host*pgs/otherhosts)*ld); const p1 = ((afr_drive+afr_host*pgs/otherhosts)*host_heal_time);
const p2 = ((afr_drive+afr_host*pgs/otherhosts)*disk_heal_time);
return 1 - ((1 - afr_host * (1-(1-p1**(replicas-1))**pgh)) ** n_hosts) * return 1 - ((1 - afr_host * (1-(1-p1**(replicas-1))**pgh)) ** n_hosts) *
((1 - afr_drive * (1-(1-p2**(replicas-1))**pgs)) ** (n_hosts*n_drives)); ((1 - afr_drive * (1-(1-p2**(replicas-1))**pgs)) ** (n_hosts*n_drives));
} }
function cluster_afr_pgs_ec_hosts({ n_hosts, n_drives, afr_drive, afr_host, capacity, speed, ec: [ ec_data, ec_parity ], pgs = 1, degraded_replacement }) function cluster_afr_pgs_ec_hosts({ n_hosts, n_drives, afr_drive, afr_host, capacity, speed, ec: [ ec_data, ec_parity ], pgs = 1, osd_rm, degraded_replacement })
{ {
const ec_total = ec_data+ec_parity; const ec_total = ec_data+ec_parity;
const otherhosts = Math.min(pgs, (n_hosts-1)/(ec_total-1)); const otherhosts = Math.min(pgs, (n_hosts-1)/(ec_total-1));
pgs = Math.min(pgs, (n_hosts-1)*n_drives/(ec_total-1)); pgs = Math.min(pgs, (n_hosts-1)*n_drives/(ec_total-1));
const resilver_disk = n_drives == 1 || osd_rm ? pgs : (n_drives-1);
const pgh = Math.min(pgs*n_drives, (n_hosts-1)*n_drives/(ec_total-1)); const pgh = Math.min(pgs*n_drives, (n_hosts-1)*n_drives/(ec_total-1));
const ld = capacity/(degraded_replacement ? 1 : pgs)/speed/86400/365; const disk_heal_time = capacity/(degraded_replacement ? 1 : resilver_disk)/speed/86400/365;
const lh = n_drives*capacity/pgs/speed/86400/365; const host_heal_time = n_drives*capacity/pgs/speed/86400/365;
const p1 = ((afr_drive+afr_host*pgs/otherhosts)*lh); const p1 = ((afr_drive+afr_host*pgs/otherhosts)*host_heal_time);
const p2 = ((afr_drive+afr_host*pgs/otherhosts)*ld); const p2 = ((afr_drive+afr_host*pgs/otherhosts)*disk_heal_time);
return 1 - ((1 - afr_host * (1-(1-failure_rate_fullmesh(ec_total-1, p1, ec_parity))**pgh)) ** n_hosts) * return 1 - ((1 - afr_host * (1-(1-failure_rate_fullmesh(ec_total-1, p1, ec_parity))**pgh)) ** n_hosts) *
((1 - afr_drive * (1-(1-failure_rate_fullmesh(ec_total-1, p2, ec_parity))**pgs)) ** (n_hosts*n_drives)); ((1 - afr_drive * (1-(1-failure_rate_fullmesh(ec_total-1, p2, ec_parity))**pgs)) ** (n_hosts*n_drives));
} }

View File

@ -31,7 +31,7 @@ class Calc extends preact.Component
speed: st.speed/1000, speed: st.speed/1000,
ec: st.ec ? [ st.ec_data, st.ec_parity ] : null, ec: st.ec ? [ st.ec_data, st.ec_parity ] : null,
replicas: st.replicas, replicas: st.replicas,
pgs: 100, pgs: 50,
degraded_replacement: st.eager, degraded_replacement: st.eager,
}); });
this.setState(st); this.setState(st);