Add an accurate, but brute-force based model for replicated setups
parent
6f01329374
commit
486d6774bd
122
afr.js
122
afr.js
|
@ -8,6 +8,7 @@ module.exports = {
|
|||
cluster_afr_fullmesh,
|
||||
failure_rate_fullmesh,
|
||||
cluster_afr,
|
||||
cluster_afr_bruteforce,
|
||||
c_n_k,
|
||||
};
|
||||
|
||||
|
@ -91,6 +92,116 @@ function cluster_afr({ n_hosts, n_drives, afr_drive, afr_host, capacity, speed,
|
|||
* ((1 - afr_host * (1-(1-host_pg_fail)**host_peers)) ** n_hosts);
|
||||
}
|
||||
|
||||
// Accurate brute-force based calculation, but only for replicated setups
|
||||
function cluster_afr_bruteforce({ n_hosts, n_drives, afr_drive, disk_heal_hours, replicas, pgs = 1 })
|
||||
{
|
||||
// N-wise replication
|
||||
// - Generate random pgs
|
||||
// - For each of them
|
||||
// - Drive #1 dies within a year
|
||||
// - Drive #2 dies within +- recovery time around #1 death time
|
||||
// - afr*2*recovery_time/year probability
|
||||
// - Drive #3 dies within +- recovery time around #1 and #2
|
||||
// - afr*1.5*recovery_time/year probability
|
||||
// - Drive #4 dies within +- recovery time around #1 and #2 and #3
|
||||
// - etc...
|
||||
// - integral of max(t-|x-y|, 0), max(min(t-|x-y|, t-|x-z|, t-|y-z|), 0), and so on
|
||||
// - AFRCoeff(DriveNum >= 3) = 2*(0.5 + PyramidVolume/NCubeVolume)
|
||||
// - PyramidVolume(N) = 1/N * (BaseSquare=2^(N-1)) * (Height=1)
|
||||
// - AFRCoeff(DriveNum >= 3) = 1 + 1/(DriveNum-1)
|
||||
// - so AFRCoeff for 2 3 4 5 6 ... = 2 1.5 1.33 1.25 1.2 ...
|
||||
// - Drive #3 dies but not within recovery time
|
||||
// - Drive #3 does not die
|
||||
// - Drive #2 dies but not within recovery time
|
||||
// - Drive #2 does not die
|
||||
// - Drive #1 does not die within a year
|
||||
// - After each step we know we accounted for ALL drive #1 death probability
|
||||
// AND for (1-AFR) portion of drive #2 death probability (all cases where #2 dies with #1 are already accounted)
|
||||
// AND for (1-AFR)^2 portion of drive #3 death probability (#3 dying with #1 and #3 already accounted)
|
||||
// AND so on
|
||||
let pg_set = [];
|
||||
let per_osd = {};
|
||||
/*
|
||||
// Method 1: each drive has at least <pgs>
|
||||
for (let i = 1; i <= n_hosts*n_drives; i++)
|
||||
{
|
||||
while (!per_osd[i] || per_osd[i] < pgs)
|
||||
{
|
||||
const host1 = Math.floor((i-1) / n_drives);
|
||||
let host2 = Math.floor(Math.random()*(n_hosts-1));
|
||||
if (host2 >= host1)
|
||||
host2++;
|
||||
const osd2 = 1 + host2*n_drives + Math.floor(Math.random()*n_drives);
|
||||
pg_set.push([ i, osd2 ]);
|
||||
per_osd[i] = (per_osd[i] || 0) + 1;
|
||||
per_osd[osd2] = (per_osd[osd2] || 0) + 1;
|
||||
}
|
||||
}
|
||||
*/
|
||||
// Method 2: N_OSD*PG_PER_OSD/REPLICAS
|
||||
for (let i = 0; i <= n_hosts*n_drives*pgs/replicas; i++)
|
||||
{
|
||||
const pg_hosts = [];
|
||||
while (pg_hosts.length < replicas)
|
||||
{
|
||||
let host = Math.floor(Math.random()*(n_hosts-pg_hosts.length));
|
||||
for (let i = 0; i < pg_hosts.length; i++)
|
||||
{
|
||||
if (host < pg_hosts[i])
|
||||
break;
|
||||
host++;
|
||||
}
|
||||
pg_hosts.push(host);
|
||||
pg_hosts.sort();
|
||||
}
|
||||
const pg_osds = pg_hosts.map(host => 1 + host*n_drives + Math.floor(Math.random()*n_drives));
|
||||
pg_set.push(pg_osds);
|
||||
pg_osds.forEach(osd => per_osd[osd] = (per_osd[osd] || 0) + 1);
|
||||
}
|
||||
let result = 1;
|
||||
let maydie = {};
|
||||
for (let i = 1; i <= n_hosts*n_drives; i++)
|
||||
{
|
||||
maydie[i] = afr_drive;
|
||||
}
|
||||
for (let pg of pg_set)
|
||||
{
|
||||
let i;
|
||||
for (i = 0; i < pg.length && maydie[pg[i]] > 0; i++) {}
|
||||
if (i < pg.length)
|
||||
continue;
|
||||
let pg_death = maydie[pg[0]] * disk_heal_hours/365/24 * 2 * maydie[pg[1]];
|
||||
for (i = 2; i < pg.length; i++)
|
||||
{
|
||||
pg_death *= disk_heal_hours/365/24 * (1 + 1/i) * maydie[pg[i]];
|
||||
}
|
||||
result *= (1 - pg_death);
|
||||
let cur = maydie[pg[0]];
|
||||
for (i = 1; i < pg.length; i++)
|
||||
{
|
||||
// portion of drive #i death probability equal to multiplication of
|
||||
// all prev drives death probabilities is already accounted for
|
||||
const next = cur*maydie[pg[i]];
|
||||
maydie[pg[i]] *= (1 - cur);
|
||||
cur = next;
|
||||
}
|
||||
maydie[pg[0]] = 0; // all drive #1 death probability is already accounted for
|
||||
}
|
||||
/*
|
||||
// replicas = 2
|
||||
for (let pg of pg_set)
|
||||
{
|
||||
if (maydie[pg[0]] > 0 && maydie[pg[1]] > 0)
|
||||
{
|
||||
result *= (1 - disk_heal_hours/365/24 * 2 * maydie[pg[0]] * maydie[pg[1]]);
|
||||
maydie[pg[1]] *= (1-maydie[pg[0]]); // drive #1 is not dead
|
||||
maydie[pg[0]] = 0; // drive #1 death probability is already accounted for
|
||||
}
|
||||
}
|
||||
*/
|
||||
return 1-result;
|
||||
}
|
||||
|
||||
/******** UTILITY ********/
|
||||
|
||||
// Combination count
|
||||
|
@ -109,3 +220,14 @@ function avg_distinct(n, k)
|
|||
{
|
||||
return n * (1 - (1 - 1/n)**k);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Examples:
|
||||
|
||||
console.log(100*cluster_afr({ n_hosts: 4, n_drives: 4, afr_drive: 0.05, afr_host: 0, capacity: 4, disk_heal_hours: 24, replicas: 2, pgs: 10 }), '%');
|
||||
console.log(100*cluster_afr_bruteforce({ n_hosts: 4, n_drives: 4, afr_drive: 0.05, afr_host: 0, capacity: 4, disk_heal_hours: 24, replicas: 2, pgs: 10 }), '%');
|
||||
console.log(100*cluster_afr({ n_hosts: 2500, n_drives: 80, afr_drive: 0.006, afr_host: 0, capacity: 10, disk_heal_hours: 18, replicas: 2, pgs: 10 }), '%');
|
||||
console.log(100*cluster_afr_bruteforce({ n_hosts: 2500, n_drives: 80, afr_drive: 0.006, afr_host: 0, capacity: 10, disk_heal_hours: 18, replicas: 2, pgs: 10 }), '%');
|
||||
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue