vitastor/lp/lp-optimizer.js

522 lines
16 KiB
JavaScript

// Data distribution optimizer using linear programming (lp_solve)
const child_process = require('child_process');
const NO_OSD = 'Z';
async function lp_solve(text)
{
const cp = child_process.spawn('lp_solve');
let stdout = '', stderr = '', finish_cb;
cp.stdout.on('data', buf => stdout += buf.toString());
cp.stderr.on('data', buf => stderr += buf.toString());
cp.on('exit', () => finish_cb && finish_cb());
cp.stdin.write(text);
cp.stdin.end();
if (cp.exitCode == null)
{
await new Promise(ok => finish_cb = ok);
}
if (!stdout.trim())
{
return null;
}
let score = 0;
let vars = {};
for (const line of stdout.split(/\n/))
{
let m = /^(^Value of objective function: ([\d\.]+)|Actual values of the variables:)\s*$/.exec(line);
if (m)
{
if (m[2])
{
score = m[2];
}
continue;
}
else if (/This problem is (infeasible|unbounded)/.exec(line))
{
return null;
}
let [ k, v ] = line.trim().split(/\s+/, 2);
if (v)
{
vars[k] = v;
}
}
return { score, vars };
}
async function optimize_initial(osd_tree, pg_count, max_combinations)
{
max_combinations = max_combinations || 10000;
const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
let all_pgs = all_combinations(osd_tree, null, true);
if (all_pgs.length > max_combinations)
{
const prob = max_combinations/all_pgs.length;
all_pgs = all_pgs.filter(pg => Math.random() < prob);
}
const pg_per_osd = {};
for (const pg of all_pgs)
{
for (const osd of pg)
{
pg_per_osd[osd] = pg_per_osd[osd] || [];
pg_per_osd[osd].push("pg_"+pg.join("_"));
}
}
const pg_size = Math.min(Object.keys(osd_tree).length, 3);
let lp = '';
lp += "max: "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(' + ')+";\n";
for (const osd in pg_per_osd)
{
if (osd !== NO_OSD)
{
let osd_pg_count = all_weights[osd]/total_weight*pg_size*pg_count;
lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
}
}
for (const pg of all_pgs)
{
lp += 'pg_'+pg.join('_')+" >= 0;\n";
}
lp += "sec "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(', ')+";\n";
const lp_result = await lp_solve(lp);
if (!lp_result)
{
throw new Error('Problem is infeasible or unbounded - is it a bug?');
}
const int_pgs = make_int_pgs(lp_result.vars, pg_count);
const eff = pg_list_space_efficiency(int_pgs, all_weights);
return { score: lp_result.score, weights: lp_result.vars, int_pgs, space: eff*pg_size, total_space: total_weight };
}
function make_int_pgs(weights, pg_count)
{
const total_weight = Object.values(weights).reduce((a, c) => Number(a) + Number(c), 0);
let int_pgs = [];
let pg_left = pg_count;
let weight_left = total_weight;
for (const pg_name in weights)
{
let n = Math.round(weights[pg_name] / weight_left * pg_left);
for (let i = 0; i < n; i++)
{
int_pgs.push(pg_name.substr(3).split('_'));
}
weight_left -= weights[pg_name];
pg_left -= n;
}
return int_pgs;
}
// Try to minimize data movement
async function optimize_change(prev_int_pgs, osd_tree, max_combinations)
{
max_combinations = max_combinations || 10000;
const pg_size = Math.min(Object.keys(osd_tree).length, 3);
const pg_count = prev_int_pgs.length;
const prev_weights = {};
const prev_pg_per_osd = {};
for (const pg of prev_int_pgs)
{
const pg_name = 'pg_'+pg.join('_');
prev_weights[pg_name] = (prev_weights[pg_name]||0) + 1;
for (const osd of pg)
{
prev_pg_per_osd[osd] = prev_pg_per_osd[osd] || [];
prev_pg_per_osd[osd].push(pg_name);
}
}
// Get all combinations
let all_pgs = all_combinations(osd_tree, null, true);
if (all_pgs.length > max_combinations)
{
const intersecting = all_pgs.filter(pg => prev_weights['pg_'+pg.join('_')]);
if (intersecting.length > max_combinations)
{
const prob = max_combinations/intersecting.length;
all_pgs = intersecting.filter(pg => Math.random() < prob);
}
else
{
const prob = (max_combinations-intersecting.length)/all_pgs.length;
all_pgs = all_pgs.filter(pg => Math.random() < prob || prev_weights['pg_'+pg.join('_')]);
}
}
const pg_per_osd = {};
for (const pg of all_pgs)
{
const pg_name = 'pg_'+pg.join('_');
for (const osd of pg)
{
pg_per_osd[osd] = pg_per_osd[osd] || [];
pg_per_osd[osd].push(pg_name);
}
}
// Penalize PGs based on their similarity to old PGs
const intersect = {};
for (const pg_name in prev_weights)
{
const pg = pg_name.substr(3).split(/_/);
intersect[pg[0]+'::'] = intersect[':'+pg[1]+':'] = intersect['::'+pg[2]] = 2;
intersect[pg[0]+'::'+pg[2]] = intersect[':'+pg[1]+':'+pg[2]] = intersect[pg[0]+':'+pg[1]+':'] = 1;
}
const move_weights = {};
for (const pg of all_pgs)
{
move_weights['pg_'+pg.join('_')] =
intersect[pg[0]+'::'+pg[2]] || intersect[':'+pg[1]+':'+pg[2]] || intersect[pg[0]+':'+pg[1]+':'] ||
intersect[pg[0]+'::'] || intersect[':'+pg[1]+':'] || intersect['::'+pg[2]] ||
3;
}
// Calculate total weight - old PG weights
const all_pg_names = all_pgs.map(pg => 'pg_'+pg.join('_'));
const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
// Generate the LP problem
let lp = '';
lp += 'max: '+all_pg_names.map(pg_name => (
prev_weights[pg_name] ? `${4-move_weights[pg_name]}*add_${pg_name} - 4*del_${pg_name}` : `${4-move_weights[pg_name]}*${pg_name}`
)).join(' + ')+';\n';
for (const osd in pg_per_osd)
{
if (osd !== NO_OSD)
{
const osd_sum = (pg_per_osd[osd]||[]).map(pg_name => prev_weights[pg_name] ? `add_${pg_name} - del_${pg_name}` : pg_name).join(' + ');
const rm_osd_pg_count = (prev_pg_per_osd[osd]||[]).filter(old_pg_name => move_weights[old_pg_name]).length;
let osd_pg_count = all_weights[osd]*3/total_weight*pg_count - rm_osd_pg_count;
lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
}
}
let pg_vars = [];
for (const pg_name of all_pg_names)
{
if (prev_weights[pg_name])
{
pg_vars.push(`add_${pg_name}`, `del_${pg_name}`);
// Can't add or remove less than zero
lp += `add_${pg_name} >= 0;\n`;
lp += `del_${pg_name} >= 0;\n`;
// Can't remove more than the PG already has
lp += `add_${pg_name} - del_${pg_name} >= -${prev_weights[pg_name]};\n`;
}
else
{
pg_vars.push(pg_name);
lp += `${pg_name} >= 0;\n`;
}
}
lp += 'sec '+pg_vars.join(', ')+';\n';
// Solve it
const lp_result = await lp_solve(lp);
if (!lp_result)
{
console.log(lp);
throw new Error('Problem is infeasible or unbounded - is it a bug?');
}
// Generate the new distribution
const weights = { ...prev_weights };
for (const k in prev_weights)
{
if (!move_weights[k])
{
delete weights[k];
}
}
for (const k in lp_result.vars)
{
if (k.substr(0, 4) === 'add_')
{
weights[k.substr(4)] = (weights[k.substr(4)] || 0) + Number(lp_result.vars[k]);
}
else if (k.substr(0, 4) === 'del_')
{
weights[k.substr(4)] = (weights[k.substr(4)] || 0) - Number(lp_result.vars[k]);
}
else
{
weights[k] = Number(lp_result.vars[k]);
}
}
for (const k in weights)
{
if (!weights[k])
{
delete weights[k];
}
}
const int_pgs = make_int_pgs(weights, pg_count);
// Align them with most similar previous PGs
const new_pgs = align_pgs(prev_int_pgs, int_pgs);
let differs = 0, osd_differs = 0;
for (let i = 0; i < pg_count; i++)
{
if (new_pgs[i].join('_') != prev_int_pgs[i].join('_'))
{
differs++;
}
for (let j = 0; j < 3; j++)
{
if (new_pgs[i][j] != prev_int_pgs[i][j])
{
osd_differs++;
}
}
}
return {
prev_pgs: prev_int_pgs,
score: lp_result.score,
weights,
int_pgs: new_pgs,
differs,
osd_differs,
space: pg_size * pg_list_space_efficiency(new_pgs, all_weights),
total_space: total_weight,
};
}
function print_change_stats(retval, detailed)
{
const new_pgs = retval.int_pgs;
const prev_int_pgs = retval.prev_pgs;
if (prev_int_pgs)
{
if (detailed)
{
for (let i = 0; i < new_pgs.length; i++)
{
if (new_pgs[i].join('_') != prev_int_pgs[i].join('_'))
{
console.log("pg "+i+": "+prev_int_pgs[i].join(' ')+" -> "+new_pgs[i].join(' '));
}
}
}
console.log(
"Data movement: "+retval.differs+" pgs, "+
retval.osd_differs+" pg*osds = "+Math.round(retval.osd_differs / prev_int_pgs.length / 3 * 10000)/100+" %"
);
}
console.log(
"Total space (raw): "+Math.round(retval.space*100)/100+" TB, space efficiency: "+
Math.round(retval.space/(retval.total_space||1)*10000)/100+" %"
);
}
function align_pgs(prev_int_pgs, int_pgs)
{
const aligned_pgs = [];
put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, (pg) => [ pg.join(':') ]);
put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, (pg) => [ pg[0]+'::'+pg[2], ':'+pg[1]+':'+pg[2], pg[0]+':'+pg[1]+':' ]);
put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, (pg) => [ pg[0]+'::', ':'+pg[1]+':', '::'+pg[2] ]);
const free_slots = prev_int_pgs.map((pg, i) => !aligned_pgs[i] ? i : null).filter(i => i != null);
for (const pg of int_pgs)
{
if (!free_slots.length)
{
throw new Error("Can't place unaligned PG");
}
aligned_pgs[free_slots.shift()] = pg;
}
return aligned_pgs;
}
function put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, keygen)
{
let prev_indexes = {};
for (let i = 0; i < prev_int_pgs.length; i++)
{
for (let k of keygen(prev_int_pgs[i]))
{
prev_indexes[k] = prev_indexes[k] || [];
prev_indexes[k].push(i);
}
}
PG: for (let i = int_pgs.length-1; i >= 0; i--)
{
let pg = int_pgs[i];
let keys = keygen(int_pgs[i]);
for (let k of keys)
{
while (prev_indexes[k] && prev_indexes[k].length)
{
let idx = prev_indexes[k].shift();
if (!aligned_pgs[idx])
{
aligned_pgs[idx] = pg;
int_pgs.splice(i, 1);
continue PG;
}
}
}
}
}
// Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
// levels = { string: number }
// to a two-level osd_tree suitable for all_combinations()
function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
{
osd_level = levels[osd_level] || osd_level;
failure_domain_level = levels[failure_domain_level] || failure_domain_level;
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) < failure_domain_level)
{
flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
}
else
{
domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
}
}
return domains;
}
function extract_osds(osd_tree, levels, osd_level, osds = {})
{
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) >= osd_level)
{
osds[node.id] = node.size;
}
else
{
extract_osds(node.children||[], levels, osd_level, osds);
}
}
return osds;
}
// FIXME: support different pg_sizes, not just 3
// osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
function all_combinations(osd_tree, count, ordered)
{
const hosts = Object.keys(osd_tree).sort();
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
while (hosts.length < 3)
{
osds[NO_OSD] = [ NO_OSD ];
hosts.push(NO_OSD);
}
let host_idx = [ 0, 1, 2 ];
let osd_idx = [ 0, 0, 0 ];
const r = [];
while (!count || count < 0 || r.length < count)
{
let inc;
if (host_idx[2] != host_idx[1] && host_idx[2] != host_idx[0] && host_idx[1] != host_idx[0])
{
r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
inc = 2;
while (inc >= 0)
{
osd_idx[inc]++;
if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
{
osd_idx[inc] = 0;
inc--;
}
else
{
break;
}
}
}
else
{
inc = -1;
}
if (inc < 0)
{
// no osds left in current host combination, select the next one
osd_idx = [ 0, 0, 0 ];
host_idx[2]++;
if (host_idx[2] >= hosts.length)
{
host_idx[1]++;
host_idx[2] = ordered ? host_idx[1]+1 : 0;
if ((ordered ? host_idx[2] : host_idx[1]) >= hosts.length)
{
host_idx[0]++;
host_idx[1] = ordered ? host_idx[0]+1 : 0;
host_idx[2] = ordered ? host_idx[1]+1 : 0;
if ((ordered ? host_idx[2] : host_idx[0]) >= hosts.length)
{
break;
}
}
}
}
}
return r;
}
function pg_weights_space_efficiency(weights, pg_count, osd_sizes)
{
const per_osd = {};
for (const pg_name in weights)
{
for (const osd of pg_name.substr(3).split(/_/))
{
per_osd[osd] = (per_osd[osd]||0) + weights[pg_name];
}
}
return pg_per_osd_space_efficiency(per_osd, pg_count, osd_sizes);
}
function pg_list_space_efficiency(pgs, osd_sizes)
{
const per_osd = {};
for (const pg of pgs)
{
for (const osd of pg)
{
per_osd[osd] = (per_osd[osd]||0) + 1;
}
}
return pg_per_osd_space_efficiency(per_osd, pgs.length, osd_sizes);
}
function pg_per_osd_space_efficiency(per_osd, pg_count, osd_sizes)
{
// each PG gets randomly selected in 1/N cases
// & there are x PGs per OSD
// => an OSD is selected in x/N cases
// => total space * x/N <= OSD size
// => total space <= OSD size * N/x
let space;
for (let osd in per_osd)
{
if (osd in osd_sizes)
{
const space_estimate = osd_sizes[osd] * pg_count / per_osd[osd];
if (space == null || space > space_estimate)
{
space = space_estimate;
}
}
}
return space == null ? 0 : space;
}
module.exports = {
NO_OSD,
optimize_initial,
optimize_change,
print_change_stats,
pg_weights_space_efficiency,
pg_list_space_efficiency,
pg_per_osd_space_efficiency,
flatten_tree,
lp_solve,
make_int_pgs,
align_pgs,
all_combinations,
};