Compare commits

...

7 Commits

Author SHA1 Message Date
Vitaliy Filippov ec316a747d Move NBD/VDUSE map/unmap functions to a separate file 2024-04-04 02:48:37 +03:00
Vitaliy Filippov a0c2926c5e Use murmur3 in the old PG combinator too ?
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m4s Details
Test / test_move_reappear (push) Successful in 22s Details
Test / test_snapshot_down (push) Successful in 31s Details
Test / test_snapshot_down_ec (push) Successful in 29s Details
Test / test_splitbrain (push) Successful in 23s Details
Test / test_snapshot_chain (push) Successful in 2m39s Details
Test / test_snapshot_chain_ec (push) Successful in 3m13s Details
Test / test_rebalance_verify_imm (push) Successful in 4m25s Details
Test / test_rebalance_verify (push) Successful in 5m12s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 54s Details
Test / test_write_xor (push) Successful in 54s Details
Test / test_write_no_same (push) Successful in 13s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m43s Details
Test / test_rebalance_verify_ec (push) Successful in 6m42s Details
Test / test_heal_pg_size_2 (push) Successful in 4m48s Details
Test / test_heal_csum_32k_dmj (push) Successful in 4m41s Details
Test / test_heal_ec (push) Successful in 5m42s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m38s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m36s Details
Test / test_heal_csum_32k (push) Successful in 6m38s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m28s Details
Test / test_scrub_zero_osd_2 (push) Successful in 59s Details
Test / test_scrub (push) Successful in 1m4s Details
Test / test_scrub_xor (push) Successful in 51s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m16s Details
Test / test_scrub_ec (push) Successful in 48s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m36s Details
Test / test_nfs (push) Successful in 26s Details
Test / test_heal_csum_4k (push) Successful in 5m58s Details
2024-03-31 12:23:10 +03:00
Vitaliy Filippov 416924f6a8 Try new PG combinator in tests
Test / test_rm (push) Successful in 13s Details
Test / test_snapshot_down (push) Successful in 30s Details
Test / test_snapshot_down_ec (push) Successful in 29s Details
Test / test_splitbrain (push) Successful in 28s Details
Test / test_snapshot_chain (push) Successful in 2m26s Details
Test / test_snapshot_chain_ec (push) Successful in 3m3s Details
Test / test_rebalance_verify_imm (push) Successful in 3m13s Details
Test / test_rebalance_verify (push) Successful in 3m53s Details
Test / test_switch_primary (push) Successful in 41s Details
Test / test_write (push) Successful in 42s Details
Test / test_write_no_same (push) Successful in 18s Details
Test / test_write_xor (push) Successful in 1m2s Details
Test / test_rebalance_verify_ec (push) Successful in 4m51s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m21s Details
Test / test_heal_pg_size_2 (push) Successful in 3m37s Details
Test / test_heal_ec (push) Successful in 4m8s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m47s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m54s Details
Test / test_heal_csum_32k (push) Successful in 6m55s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m57s Details
Test / test_scrub (push) Successful in 1m31s Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m18s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m26s Details
Test / test_heal_csum_4k (push) Successful in 6m11s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m30s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 32s Details
Test / test_scrub_ec (push) Successful in 26s Details
Test / test_nfs (push) Successful in 12s Details
Test / test_change_pg_count_ec (push) Successful in 30s Details
Test / test_scrub_xor (push) Successful in 18s Details
2024-03-31 12:23:10 +03:00
Vitaliy Filippov c57ea9be61 Plug the new PG combinator into monitor 2024-03-31 12:22:58 +03:00
Vitaliy Filippov 7eef87aaa1 Implement new DSL/rule-based PG generation algorithm 2024-03-31 12:20:49 +03:00
Vitaliy Filippov ca01931e72 Extract PG combinator into a separate module 2024-03-31 12:20:49 +03:00
Vitaliy Filippov 57eb811bb1 Do not die on invalid pool configurations 2024-03-31 12:20:49 +03:00
13 changed files with 1273 additions and 605 deletions

View File

@ -5,14 +5,9 @@ package vitastor
import (
"context"
"errors"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
@ -81,53 +76,6 @@ func NewNodeServer(driver *Driver) *NodeServer
return ns
}
func checkVduseSupport() bool
{
// Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
vduse := true
for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
{
_, err := os.Stat("/sys/module/"+mod)
if (err != nil)
{
if (!errors.Is(err, os.ErrNotExist))
{
klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
}
c := exec.Command("/sbin/modprobe", mod)
c.Stdout = os.Stderr
c.Stderr = os.Stderr
err := c.Run()
if (err != nil)
{
klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
vduse = false
break
}
}
}
// Check that vdpa tool functions
if (vduse)
{
c := exec.Command("/sbin/vdpa", "-j", "dev")
c.Stderr = os.Stderr
err := c.Run()
if (err != nil)
{
klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
vduse = false
}
}
if (!vduse)
{
klog.Errorf(
"Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
" For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
)
}
return vduse
}
// NodeStageVolume mounts the volume to a staging path on the node.
func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error)
{
@ -140,242 +88,6 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
return &csi.NodeUnstageVolumeResponse{}, nil
}
func Contains(list []string, s string) bool
{
for i := 0; i < len(list); i++
{
if (list[i] == s)
{
return true
}
}
return false
}
func (ns *NodeServer) mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
{
// Map NBD device
// FIXME: Check if already mapped
args := []string{
"map", "--image", volName,
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
if (readonly)
{
args = append(args, "--readonly", "1")
}
stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
dev := strings.TrimSpace(string(stdout))
if (dev == "")
{
return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
}
return dev, err
}
func (ns *NodeServer) unmapNbd(devicePath string)
{
// unmap NBD device
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
if (unmapErr != nil)
{
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
}
}
func findByPidFile(pidFile string) (*os.Process, error)
{
pidBuf, err := os.ReadFile(pidFile)
if (err != nil)
{
return nil, err
}
pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
if (err != nil)
{
return nil, err
}
proc, err := os.FindProcess(int(pid))
if (err != nil)
{
return nil, err
}
return proc, nil
}
func killByPidFile(pidFile string) error
{
klog.Infof("killing process with PID from file %s", pidFile)
proc, err := findByPidFile(pidFile)
if (err != nil)
{
return err
}
return proc.Signal(syscall.SIGTERM)
}
func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
{
// Start qemu-storage-daemon
blockSpec := map[string]interface{}{
"node-name": "disk1",
"driver": "vitastor",
"image": volName,
"cache": map[string]bool{
"direct": true,
"no-flush": false,
},
"discard": "unmap",
}
if (configPath != "")
{
blockSpec["config-path"] = configPath
}
blockSpecJson, _ := json.Marshal(blockSpec)
writable := "true"
if (readonly)
{
writable = "false"
}
_, _, err := system(
"/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
"--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
)
return err
}
func (ns *NodeServer) mapVduse(volName string, ctxVars map[string]string, readonly bool) (string, string, error)
{
// Generate state file
stateFd, err := os.CreateTemp(ns.stateDir, "vitastor-vduse-*.json")
if (err != nil)
{
return "", "", err
}
stateFile := stateFd.Name()
stateFd.Close()
vdpaId := filepath.Base(stateFile)
vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
pidFile := ns.stateDir + vdpaId + ".pid"
// Map VDUSE device via qemu-storage-daemon
err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
if (err == nil)
{
// Add device to VDPA bus
_, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
if (err == nil)
{
// Find block device name
var matches []string
matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
if (err == nil && len(matches) == 0)
{
err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
}
if (err == nil)
{
blockdev := "/dev/"+filepath.Base(matches[0])
_, err = os.Stat(blockdev)
if (err == nil)
{
// Generate state file
stateJSON, _ := json.Marshal(&DeviceState{
ConfigPath: ctxVars["configPath"],
VdpaId: vdpaId,
Image: volName,
Blockdev: blockdev,
Readonly: readonly,
PidFile: pidFile,
})
err = os.WriteFile(stateFile, stateJSON, 0600)
if (err == nil)
{
return blockdev, vdpaId, nil
}
}
}
}
killErr := killByPidFile(pidFile)
if (killErr != nil)
{
klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
}
os.Remove(stateFile)
os.Remove(pidFile)
}
return "", "", err
}
func (ns *NodeServer) unmapVduse(devicePath string)
{
if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
{
klog.Errorf("%s does not start with /dev/v", devicePath)
return
}
vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
if (err != nil)
{
klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
return
}
vdpaId := ""
p := strings.Index(vduseDev, "/vduse/")
if (p >= 0)
{
vduseDev = vduseDev[p+7:]
p = strings.Index(vduseDev, "/")
if (p >= 0)
{
vdpaId = vduseDev[0:p]
}
}
if (vdpaId == "")
{
klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
return
}
ns.unmapVduseById(vdpaId)
}
func (ns *NodeServer) unmapVduseById(vdpaId string)
{
_, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
if (err != nil)
{
klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
}
else
{
_, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
}
stateFile := ns.stateDir + vdpaId + ".json"
os.Remove(stateFile)
pidFile := ns.stateDir + vdpaId + ".pid"
_, err = os.Stat(pidFile)
if (os.IsNotExist(err))
{
// ok, already killed
}
else if (err != nil)
{
klog.Errorf("Failed to stat %v: %v", pidFile, err)
return
}
else
{
err = killByPidFile(pidFile)
if (err != nil)
{
klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
}
os.Remove(pidFile)
}
}
func (ns *NodeServer) restarter()
{
// Restart dead VDUSE daemons at regular intervals
@ -459,7 +171,7 @@ func (ns *NodeServer) restoreVduseDaemons()
else
{
// Unused, clean it up
ns.unmapVduseById(vdpaId)
unmapVduseById(ns.stateDir, vdpaId)
}
}
}
@ -526,11 +238,11 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
var devicePath, vdpaId string
if (!ns.useVduse)
{
devicePath, err = ns.mapNbd(volName, ctxVars, req.GetReadonly())
devicePath, err = mapNbd(volName, ctxVars, req.GetReadonly())
}
else
{
devicePath, vdpaId, err = ns.mapVduse(volName, ctxVars, req.GetReadonly())
devicePath, vdpaId, err = mapVduse(ns.stateDir, volName, ctxVars, req.GetReadonly())
}
if (err != nil)
{
@ -619,11 +331,11 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
unmap:
if (!ns.useVduse || len(devicePath) >= 8 && devicePath[0:8] == "/dev/nbd")
{
ns.unmapNbd(devicePath)
unmapNbd(devicePath)
}
else
{
ns.unmapVduseById(vdpaId)
unmapVduseById(ns.stateDir, vdpaId)
}
return nil, err
}
@ -660,11 +372,11 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
{
if (!ns.useVduse)
{
ns.unmapNbd(devicePath)
unmapNbd(devicePath)
}
else
{
ns.unmapVduse(devicePath)
unmapVduse(ns.stateDir, devicePath)
}
}
return &csi.NodeUnpublishVolumeResponse{}, nil

301
csi/src/utils.go Normal file
View File

@ -0,0 +1,301 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
package vitastor
import (
"errors"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"k8s.io/klog"
)
func Contains(list []string, s string) bool
{
for i := 0; i < len(list); i++
{
if (list[i] == s)
{
return true
}
}
return false
}
func checkVduseSupport() bool
{
// Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
vduse := true
for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
{
_, err := os.Stat("/sys/module/"+mod)
if (err != nil)
{
if (!errors.Is(err, os.ErrNotExist))
{
klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
}
c := exec.Command("/sbin/modprobe", mod)
c.Stdout = os.Stderr
c.Stderr = os.Stderr
err := c.Run()
if (err != nil)
{
klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
vduse = false
break
}
}
}
// Check that vdpa tool functions
if (vduse)
{
c := exec.Command("/sbin/vdpa", "-j", "dev")
c.Stderr = os.Stderr
err := c.Run()
if (err != nil)
{
klog.Errorf("/sbin/vdpa -j dev failed: %v", err)
vduse = false
}
}
if (!vduse)
{
klog.Errorf(
"Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
" For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
)
}
return vduse
}
func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
{
// Map NBD device
// FIXME: Check if already mapped
args := []string{
"map", "--image", volName,
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
if (readonly)
{
args = append(args, "--readonly", "1")
}
stdout, stderr, err := system("/usr/bin/vitastor-nbd", args...)
dev := strings.TrimSpace(string(stdout))
if (dev == "")
{
return "", fmt.Errorf("vitastor-nbd did not return the name of NBD device. output: %s", stderr)
}
return dev, err
}
func unmapNbd(devicePath string)
{
// unmap NBD device
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
if (unmapErr != nil)
{
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
}
}
func findByPidFile(pidFile string) (*os.Process, error)
{
pidBuf, err := os.ReadFile(pidFile)
if (err != nil)
{
return nil, err
}
pid, err := strconv.ParseInt(strings.TrimSpace(string(pidBuf)), 0, 64)
if (err != nil)
{
return nil, err
}
proc, err := os.FindProcess(int(pid))
if (err != nil)
{
return nil, err
}
return proc, nil
}
func killByPidFile(pidFile string) error
{
klog.Infof("killing process with PID from file %s", pidFile)
proc, err := findByPidFile(pidFile)
if (err != nil)
{
return err
}
return proc.Signal(syscall.SIGTERM)
}
func startStorageDaemon(vdpaId, volName, pidFile, configPath string, readonly bool) error
{
// Start qemu-storage-daemon
blockSpec := map[string]interface{}{
"node-name": "disk1",
"driver": "vitastor",
"image": volName,
"cache": map[string]bool{
"direct": true,
"no-flush": false,
},
"discard": "unmap",
}
if (configPath != "")
{
blockSpec["config-path"] = configPath
}
blockSpecJson, _ := json.Marshal(blockSpec)
writable := "true"
if (readonly)
{
writable = "false"
}
_, _, err := system(
"/usr/bin/qemu-storage-daemon", "--daemonize", "--pidfile", pidFile, "--blockdev", string(blockSpecJson),
"--export", "vduse-blk,id="+vdpaId+",node-name=disk1,name="+vdpaId+",num-queues=16,queue-size=128,writable="+writable,
)
return err
}
func mapVduse(stateDir string, volName string, ctxVars map[string]string, readonly bool) (string, string, error)
{
// Generate state file
stateFd, err := os.CreateTemp(stateDir, "vitastor-vduse-*.json")
if (err != nil)
{
return "", "", err
}
stateFile := stateFd.Name()
stateFd.Close()
vdpaId := filepath.Base(stateFile)
vdpaId = vdpaId[0:len(vdpaId)-5] // remove ".json"
pidFile := stateDir + vdpaId + ".pid"
// Map VDUSE device via qemu-storage-daemon
err = startStorageDaemon(vdpaId, volName, pidFile, ctxVars["configPath"], readonly)
if (err == nil)
{
// Add device to VDPA bus
_, _, err = system("/sbin/vdpa", "-j", "dev", "add", "name", vdpaId, "mgmtdev", "vduse")
if (err == nil)
{
// Find block device name
var matches []string
matches, err = filepath.Glob("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/*")
if (err == nil && len(matches) == 0)
{
err = errors.New("/sys/bus/vdpa/devices/"+vdpaId+"/virtio*/block/* is not found")
}
if (err == nil)
{
blockdev := "/dev/"+filepath.Base(matches[0])
_, err = os.Stat(blockdev)
if (err == nil)
{
// Generate state file
stateJSON, _ := json.Marshal(&DeviceState{
ConfigPath: ctxVars["configPath"],
VdpaId: vdpaId,
Image: volName,
Blockdev: blockdev,
Readonly: readonly,
PidFile: pidFile,
})
err = os.WriteFile(stateFile, stateJSON, 0600)
if (err == nil)
{
return blockdev, vdpaId, nil
}
}
}
}
killErr := killByPidFile(pidFile)
if (killErr != nil)
{
klog.Errorf("Failed to kill started qemu-storage-daemon: %v", killErr)
}
os.Remove(stateFile)
os.Remove(pidFile)
}
return "", "", err
}
func unmapVduse(stateDir, devicePath string)
{
if (len(devicePath) < 6 || devicePath[0:6] != "/dev/v")
{
klog.Errorf("%s does not start with /dev/v", devicePath)
return
}
vduseDev, err := os.Readlink("/sys/block/"+devicePath[5:])
if (err != nil)
{
klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx): %v", devicePath, err)
return
}
vdpaId := ""
p := strings.Index(vduseDev, "/vduse/")
if (p >= 0)
{
vduseDev = vduseDev[p+7:]
p = strings.Index(vduseDev, "/")
if (p >= 0)
{
vdpaId = vduseDev[0:p]
}
}
if (vdpaId == "")
{
klog.Errorf("%s is not a symbolic link to VDUSE device (../devices/virtual/vduse/xxx), but is %v", devicePath, vduseDev)
return
}
unmapVduseById(stateDir, vdpaId)
}
func unmapVduseById(stateDir, vdpaId string)
{
_, err := os.Stat("/sys/bus/vdpa/devices/"+vdpaId)
if (err != nil)
{
klog.Errorf("failed to stat /sys/bus/vdpa/devices/"+vdpaId+": %v", err)
}
else
{
_, _, _ = system("/sbin/vdpa", "-j", "dev", "del", vdpaId)
}
stateFile := stateDir + vdpaId + ".json"
os.Remove(stateFile)
pidFile := stateDir + vdpaId + ".pid"
_, err = os.Stat(pidFile)
if (os.IsNotExist(err))
{
// ok, already killed
}
else if (err != nil)
{
klog.Errorf("Failed to stat %v: %v", pidFile, err)
return
}
else
{
err = killByPidFile(pidFile)
if (err != nil)
{
klog.Errorf("Failed to kill started qemu-storage-daemon: %v", err)
}
os.Remove(pidFile)
}
}

408
mon/dsl_pgs.js Normal file
View File

@ -0,0 +1,408 @@
const { select_murmur3 } = require('./murmur3.js');
const NO_OSD = 'Z';
class RuleCombinator
{
constructor(osd_tree, rules, max_combinations, ordered)
{
this.osd_tree = index_tree(Object.values(osd_tree).filter(o => o.id));
this.rules = rules;
this.max_combinations = max_combinations;
this.ordered = ordered;
}
random_combinations()
{
return random_custom_combinations(this.osd_tree, this.rules, this.max_combinations, this.ordered);
}
check_combinations(pgs)
{
return check_custom_combinations(this.osd_tree, this.rules, pgs);
}
}
// Convert alternative "level-index" format to rules
// level_index = { [level: string]: string | string[] }
// level_sequence = optional, levels from upper to lower, i.e. [ 'dc', 'host' ]
// Example: level_index = { dc: "112233", host: "ABCDEF" }
function parse_level_indexes(level_index, level_sequence)
{
const rules = [];
const lvl_first = {};
for (const level in level_index)
{
const idx = level_index[level];
while (rules.length < idx.length)
{
rules.push([]);
}
const seen = {};
for (let i = 0; i < idx.length; i++)
{
if (!seen[idx[i]])
{
const other = Object.values(seen);
if (other.length)
{
rules[i].push([ level, '!=', other ]);
}
seen[idx[i]] = i+1;
}
else
{
rules[i].push([ level, '=', seen[idx[i]] ]);
}
}
lvl_first[level] = seen;
}
if (level_sequence)
{
// Prune useless rules for the sake of prettiness
// For simplicity, call "upper" level DC and "lower" level host
const level_prio = Object.keys(level_sequence).reduce((a, c) => { a[level_sequence[c]] = c; return a; }, {});
for (let upper_i = 0; upper_i < level_sequence.length-1; upper_i++)
{
const upper_level = level_sequence[upper_i];
for (let i = 0; i < rules.length; i++)
{
const noteq = {};
for (let k = 0; k < level_index[upper_level].length; k++)
{
// If upper_level[x] is different from upper_level[y]
// then lower_level[x] is also different from lower_level[y]
if (level_index[upper_level][k] != level_index[upper_level][i])
{
noteq[k+1] = true;
}
}
for (let j = 0; j < rules[i].length; j++)
{
if (level_prio[rules[i][j][0]] != null && level_prio[rules[i][j][0]] > upper_i && rules[i][j][1] == '!=')
{
rules[i][j][2] = rules[i][j][2].filter(other_host => !noteq[other_host]);
if (!rules[i][j][2].length)
{
rules[i].splice(j--, 1);
}
}
}
}
}
}
return rules;
}
// Parse rules in DSL format
// dsl := item | item ("\n" | ",") items
// item := "any" | rules
// rules := rule | rule rules
// rule := level operator arg
// level := /\w+/
// operator := "!=" | "=" | ">" | "?="
// arg := value | "(" values ")"
// values := value | value "," values
// value := item_ref | constant_id
// item_ref := /\d+/
// constant_id := /"([^"]+)"/
//
// Output: [ level, operator, value ][][]
function parse_pg_dsl(text)
{
const tokens = [ ...text.matchAll(/\w+|!=|\?=|[>=\(\),\n]|"([^\"]+)"/g) ].map(t => [ t[0], t.index ]);
let positions = [ [] ];
let rules = positions[0];
for (let i = 0; i < tokens.length; )
{
if (tokens[i][0] === '\n' || tokens[i][0] === ',')
{
rules = [];
positions.push(rules);
i++;
}
else if (!rules.length && tokens[i][0] === 'any' && (i == tokens.length-1 || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n'))
{
i++;
}
else
{
if (!/^\w/.exec(tokens[i][0]))
{
throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (level name expected)');
}
if (i > tokens.length-3)
{
throw new Error('Unexpected EOF (operator and value expected)');
}
if (/^\w/.exec(tokens[i+1][0]) || tokens[i+1][0] === ',' || tokens[i+1][0] === '\n')
{
throw new Error('Unexpected '+tokens[i+1][0]+' at '+tokens[i+1][1]+' (operator expected)');
}
if (!/^[\w"(]/.exec(tokens[i+2][0])) // "
{
throw new Error('Unexpected '+tokens[i+2][0]+' at '+tokens[i+2][1]+' (id, round brace, number or node ID expected)');
}
let rule = [ tokens[i][0], tokens[i+1][0], tokens[i+2][0] ];
i += 3;
if (rule[2][0] == '"')
{
rule[2] = { id: rule[2].substr(1, rule[2].length-2) };
}
else if (rule[2] === '(')
{
rule[2] = [];
while (true)
{
if (i > tokens.length-1)
{
throw new Error('Unexpected EOF (expected list and a closing round brace)');
}
if (tokens[i][0] === ',')
{
i++;
}
else if (tokens[i][0] === ')')
{
i++;
break;
}
else if (tokens[i][0][0] === '"')
{
rule[2].push({ id: tokens[i][0].substr(1, tokens[i][0].length-2) });
i++;
}
else if (/^\d+$/.exec(tokens[i][0]))
{
const n = 0|tokens[i][0];
if (!n)
{
throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i][1]);
}
else if (n > positions.length)
{
throw new Error('Forward references are forbidden at '+tokens[i][1]);
}
rule[2].push(n);
i++;
}
else if (!/^\w/.exec(tokens[i][0]))
{
throw new Error('Unexpected '+tokens[i][0]+' at '+tokens[i][1]+' (number or node ID expected)');
}
else
{
rule[2].push({ id: tokens[i][0] });
i++;
}
}
}
else if (!/^\d+$/.exec(rule[2]))
{
rule[2] = { id: rule[2] };
}
else
{
rule[2] = 0|rule[2];
if (!rule[2])
{
throw new Error('Level reference cannot be 0 (refs count from 1) at '+tokens[i-1][1]);
}
else if (rule[2] > positions.length)
{
throw new Error('Forward references are forbidden at '+tokens[i-1][1]);
}
}
rules.push(rule);
}
}
return positions;
}
// osd_tree = index_tree() output
// levels = { string: number }
// rules = [ level, operator, value ][][]
// level = string
// operator = '=' | '!=' | '>' | '?='
// value = number|number[] | { id: string|string[] }
// examples:
// 1) simple 3 replicas with failure_domain=host:
// [ [], [ [ 'host', '!=', 1 ] ], [ [ 'host', '!=', [ 1, 2 ] ] ] ]
// in DSL form: any, host!=1, host!=(1,2)
// 2) EC 4+2 in 3 DC:
// [ [], [ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
// [ 'dc', '!=', 1 ], [ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
// [ 'dc', '!=', [ 1, 3 ] ], [ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ] ]
// in DSL form: any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5
// 3) 1 replica in fixed DC + 2 in random DCs:
// [ [ [ 'dc', '=', { id: 'meow' } ] ], [ [ 'dc', '!=', 1 ] ], [ [ 'dc', '!=', [ 1, 2 ] ] ] ]
// in DSL form: dc=meow, dc!=1, dc!=(1,2)
// 4) 2 replicas in each DC (almost the same as (2)):
// DSL: any, dc=1 host!=1, dc!=1, dc=3 host!=3
// Alternative simpler way to specify rules would be: [ DC: 112233 HOST: 123456 ]
function random_custom_combinations(osd_tree, rules, count, ordered)
{
const r = {};
const first = filter_tree_by_rules(osd_tree, rules[0], []);
let max_size = 0;
// All combinations for the first item (usually "any") to try to include each OSD at least once
for (const f of first)
{
const selected = [ f ];
for (let i = 1; i < rules.length; i++)
{
const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
const idx = select_murmur3(filtered.length, i => 'p:'+f.id+':'+filtered[i].id);
selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
}
const size = selected.filter(s => s.id !== null).length;
max_size = max_size < size ? size : max_size;
const pg = selected.map(s => s.id === null ? NO_OSD : s.id);
if (!ordered)
pg.sort();
r['pg_'+pg.join('_')] = pg;
}
// Pseudo-random selection
for (let n = 0; n < count; n++)
{
const selected = [];
for (const item_rules of rules)
{
const filtered = selected.length ? filter_tree_by_rules(osd_tree, item_rules, selected) : first;
const idx = select_murmur3(filtered.length, i => n+':'+filtered[i].id);
selected.push(idx == null ? { levels: {}, id: null } : filtered[idx]);
}
const size = selected.filter(s => s.id !== null).length;
max_size = max_size < size ? size : max_size;
const pg = selected.map(s => s.id === null ? NO_OSD : s.id);
if (!ordered)
pg.sort();
r['pg_'+pg.join('_')] = pg;
}
// Exclude PGs with less successful selections than maximum
for (const k in r)
{
if (r[k].filter(s => s !== NO_OSD).length < max_size)
{
delete r[k];
}
}
return r;
}
function filter_tree_by_rules(osd_tree, rules, selected)
{
let cur = osd_tree[''].children;
for (const rule of rules)
{
const val = (rule[2] instanceof Array ? rule[2] : [ rule[2] ])
.map(v => v instanceof Object ? v.id : selected[v-1].levels[rule[0]]);
let preferred = [], other = [];
for (let i = 0; i < cur.length; i++)
{
const item = cur[i];
const level_id = item.levels[rule[0]];
if (level_id)
{
if (rule[1] == '>' && val.filter(v => level_id <= v).length == 0 ||
(rule[1] == '=' || rule[1] == '?=') && val.filter(v => level_id != v).length == 0 ||
rule[1] == '!=' && val.filter(v => level_id == v).length == 0)
{
// Include
preferred.push(item);
}
else if (rule[1] == '?=' && val.filter(v => level_id != v).length > 0)
{
// Non-preferred
other.push(item);
}
}
else if (item.children)
{
// Descend
cur.splice(i+1, 0, ...item.children);
}
}
cur = preferred.length ? preferred : other;
}
// Get leaf items
for (let i = 0; i < cur.length; i++)
{
if (cur[i].children)
{
// Descend
cur.splice(i, 1, ...cur[i].children);
i--;
}
}
return cur;
}
// Convert from
// node_list = { id: string|number, level: string, size?: number, parent?: string|number }[]
// to
// node_tree = { [node_id]: { id, level, size?, parent?, children?: child_node_id[], levels: { [level]: id, ... } } }
function index_tree(node_list)
{
const tree = { '': { children: [], levels: {} } };
for (const node of node_list)
{
tree[node.id] = { ...node, levels: {} };
delete tree[node.id].children;
}
for (const node of node_list)
{
const parent_id = node.parent && tree[node.parent] ? node.parent : '';
tree[parent_id].children = tree[parent_id].children || [];
tree[parent_id].children.push(tree[node.id]);
}
const cur = tree[''].children;
for (let i = 0; i < cur.length; i++)
{
cur[i].levels[cur[i].level] = cur[i].id;
if (cur[i].children)
{
for (const child of cur[i].children)
{
child.levels = { ...cur[i].levels, ...child.levels };
}
cur.splice(i, 1, ...cur[i].children);
i--;
}
}
return tree;
}
// selection = id[]
// osd_tree = index_tree output
// rules = parse_pg_dsl output
function check_custom_combinations(osd_tree, rules, pgs)
{
const res = [];
skip_pg: for (const pg of pgs)
{
let selected = pg.map(id => osd_tree[id] || null);
for (let i = 0; i < rules.length; i++)
{
const filtered = filter_tree_by_rules(osd_tree, rules[i], selected);
if (selected[i] === null && filtered.length ||
!filtered.filter(ok => selected[i].id === ok.id).length)
{
continue skip_pg;
}
}
res.push(pg);
}
return res;
}
module.exports = {
RuleCombinator,
NO_OSD,
index_tree,
parse_level_indexes,
parse_pg_dsl,
random_custom_combinations,
check_custom_combinations,
};

View File

@ -50,15 +50,15 @@ async function lp_solve(text)
return { score, vars };
}
async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
// osd_weights = { [id]: weight }
async function optimize_initial({ osd_weights, combinator, pg_count, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
{
if (!pg_count || !osd_tree)
if (!pg_count || !osd_weights)
{
return null;
}
const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
const all_pgs = Object.values(random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1));
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
const all_pgs = Object.values(make_cyclic(combinator.random_combinations(), parity_space));
const pg_per_osd = {};
for (const pg of all_pgs)
{
@ -69,15 +69,15 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
pg_per_osd[osd].push((i >= pg_minsize ? parity_space+'*' : '')+"pg_"+pg.join("_"));
}
}
const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
+ Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
let pg_effsize = all_pgs.reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
let lp = '';
lp += "max: "+all_pgs.map(pg => 'pg_'+pg.join('_')).join(' + ')+";\n";
for (const osd in pg_per_osd)
{
if (osd !== NO_OSD)
{
let osd_pg_count = all_weights[osd]/total_weight*pg_effsize*pg_count;
let osd_pg_count = osd_weights[osd]/total_weight*pg_effsize*pg_count;
lp += pg_per_osd[osd].join(' + ')+' <= '+osd_pg_count+';\n';
}
}
@ -93,7 +93,7 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
throw new Error('Problem is infeasible or unbounded - is it a bug?');
}
const int_pgs = make_int_pgs(lp_result.vars, pg_count, ordered);
const eff = pg_list_space_efficiency(int_pgs, all_weights, pg_minsize, parity_space);
const eff = pg_list_space_efficiency(int_pgs, osd_weights, pg_minsize, parity_space);
const res = {
score: lp_result.score,
weights: lp_result.vars,
@ -104,6 +104,22 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
return res;
}
function make_cyclic(pgs, parity_space)
{
if (parity_space > 1)
{
for (const pg in pgs)
{
for (let i = 1; i < pg.size; i++)
{
const cyclic = [ ...pg.slice(i), ...pg.slice(0, i) ];
pgs['pg_'+cyclic.join('_')] = cyclic;
}
}
}
return pgs;
}
function shuffle(array)
{
for (let i = array.length - 1, j, x; i > 0; i--)
@ -216,47 +232,17 @@ function calc_intersect_weights(old_pg_size, pg_size, pg_count, prev_weights, al
return move_weights;
}
function add_valid_previous(osd_tree, prev_weights, all_pgs)
{
// Add previous combinations that are still valid
const hosts = Object.keys(osd_tree).sort();
const host_per_osd = {};
for (const host in osd_tree)
{
for (const osd in osd_tree[host])
{
host_per_osd[osd] = host;
}
}
skip_pg: for (const pg_name in prev_weights)
{
const seen_hosts = {};
const pg = pg_name.substr(3).split(/_/);
for (const osd of pg)
{
if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
{
continue skip_pg;
}
seen_hosts[host_per_osd[osd]] = true;
}
if (!all_pgs[pg_name])
{
all_pgs[pg_name] = pg;
}
}
}
// Try to minimize data movement
async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3, pg_minsize = 2, max_combinations = 10000, parity_space = 1, ordered = false })
async function optimize_change({ prev_pgs: prev_int_pgs, osd_weights, combinator, pg_size = 3, pg_minsize = 2, parity_space = 1, ordered = false })
{
if (!osd_tree)
if (!osd_weights)
{
return null;
}
// FIXME: use parity_chunks with parity_space instead of pg_minsize
const pg_effsize = Math.min(pg_minsize, Object.keys(osd_tree).length)
+ Math.max(0, Math.min(pg_size, Object.keys(osd_tree).length) - pg_minsize) * parity_space;
let all_pgs = make_cyclic(combinator.random_combinations(), parity_space);
let pg_effsize = Object.values(all_pgs).reduce((a, c) => Math.max(a, c.filter(e => e != NO_OSD).length), 0);
pg_effsize = Math.min(pg_minsize, pg_effsize) + Math.max(0, Math.min(pg_size, pg_effsize) - pg_minsize) * parity_space;
const pg_count = prev_int_pgs.length;
const prev_weights = {};
const prev_pg_per_osd = {};
@ -273,10 +259,13 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
}
const old_pg_size = prev_int_pgs[0].length;
// Get all combinations
let all_pgs = random_combinations(osd_tree, pg_size, max_combinations, parity_space > 1);
if (old_pg_size == pg_size)
{
add_valid_previous(osd_tree, prev_weights, all_pgs);
const still_valid = combinator.check_combinations(Object.keys(prev_weights).map(pg_name => pg_name.substr(3).split('_')));
for (const pg of still_valid)
{
all_pgs['pg_'+pg.join('_')] = pg;
}
}
all_pgs = Object.values(all_pgs);
const pg_per_osd = {};
@ -295,8 +284,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
// Calculate total weight - old PG weights
const all_pg_names = all_pgs.map(pg => 'pg_'+pg.join('_'));
const all_pgs_hash = all_pg_names.reduce((a, c) => { a[c] = true; return a; }, {});
const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
// Generate the LP problem
let lp = '';
lp += 'max: '+all_pg_names.map(pg_name => (
@ -311,7 +299,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
)).join(' + ');
const rm_osd_pg_count = (prev_pg_per_osd[osd]||[])
.reduce((a, [ old_pg_name, space ]) => (a + (all_pgs_hash[old_pg_name] ? space : 0)), 0);
const osd_pg_count = all_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
const osd_pg_count = osd_weights[osd]*pg_effsize/total_weight*pg_count - rm_osd_pg_count;
lp += osd_sum + ' <= ' + osd_pg_count + ';\n';
}
}
@ -421,7 +409,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
int_pgs: new_pgs,
differs,
osd_differs,
space: pg_effsize * pg_list_space_efficiency(new_pgs, all_weights, pg_minsize, parity_space),
space: pg_effsize * pg_list_space_efficiency(new_pgs, osd_weights, pg_minsize, parity_space),
total_space: total_weight,
};
}
@ -502,198 +490,6 @@ function put_aligned_pgs(aligned_pgs, int_pgs, prev_int_pgs, keygen)
}
}
// Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
// levels = { string: number }
// to a two-level osd_tree suitable for all_combinations()
function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
{
osd_level = levels[osd_level] || osd_level;
failure_domain_level = levels[failure_domain_level] || failure_domain_level;
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) < failure_domain_level)
{
flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
}
else
{
domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
}
}
return domains;
}
function extract_osds(osd_tree, levels, osd_level, osds = {})
{
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) >= osd_level)
{
osds[node.id] = node.size;
}
else
{
extract_osds(node.children||[], levels, osd_level, osds);
}
}
return osds;
}
// ordered = don't treat (x,y) and (y,x) as equal
function random_combinations(osd_tree, pg_size, count, ordered)
{
let seed = 0x5f020e43;
let rng = () =>
{
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed + 2147483648;
};
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
const r = {};
// Generate random combinations including each OSD at least once
for (let h = 0; h < hosts.length; h++)
{
for (let o = 0; o < osds[hosts[h]].length; o++)
{
const pg = [ osds[hosts[h]][o] ];
const cur_hosts = [ ...hosts ];
cur_hosts.splice(h, 1);
for (let i = 1; i < pg_size && i < hosts.length; i++)
{
const next_host = rng() % cur_hosts.length;
const next_osd = rng() % osds[cur_hosts[next_host]].length;
pg.push(osds[cur_hosts[next_host]][next_osd]);
cur_hosts.splice(next_host, 1);
}
const cyclic_pgs = [ pg ];
if (ordered)
{
for (let i = 1; i < pg.size; i++)
{
cyclic_pgs.push([ ...pg.slice(i), ...pg.slice(0, i) ]);
}
}
for (const pg of cyclic_pgs)
{
while (pg.length < pg_size)
{
pg.push(NO_OSD);
}
r['pg_'+pg.join('_')] = pg;
}
}
}
// Generate purely random combinations
while (count > 0)
{
let host_idx = [];
const cur_hosts = [ ...hosts.map((h, i) => i) ];
const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
if (ordered)
{
for (let i = 0; i < max_hosts; i++)
{
const r = rng() % cur_hosts.length;
host_idx[i] = cur_hosts[r];
cur_hosts.splice(r, 1);
}
}
else
{
for (let i = 0; i < max_hosts; i++)
{
const r = rng() % (cur_hosts.length - (max_hosts - i - 1));
host_idx[i] = cur_hosts[r];
cur_hosts.splice(0, r+1);
}
}
let pg = host_idx.map(h => osds[hosts[h]][rng() % osds[hosts[h]].length]);
while (pg.length < pg_size)
{
pg.push(NO_OSD);
}
r['pg_'+pg.join('_')] = pg;
count--;
}
return r;
}
// Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
// osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
// ordered = return combinations without duplicates having different order
function all_combinations(osd_tree, pg_size, ordered, count)
{
const hosts = Object.keys(osd_tree).sort();
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
while (hosts.length < pg_size)
{
osds[NO_OSD] = [ NO_OSD ];
hosts.push(NO_OSD);
}
let host_idx = [];
let osd_idx = [];
for (let i = 0; i < pg_size; i++)
{
host_idx.push(i);
osd_idx.push(0);
}
const r = [];
while (!count || count < 0 || r.length < count)
{
r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
let inc = pg_size-1;
while (inc >= 0)
{
osd_idx[inc]++;
if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
{
osd_idx[inc] = 0;
inc--;
}
else
{
break;
}
}
if (inc < 0)
{
// no osds left in the current host combination, select the next one
inc = pg_size-1;
same_again: while (inc >= 0)
{
host_idx[inc]++;
for (let prev_host = 0; prev_host < inc; prev_host++)
{
if (host_idx[prev_host] == host_idx[inc])
{
continue same_again;
}
}
if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
{
while ((++inc) < pg_size)
{
host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
}
break;
}
else
{
inc--;
}
}
if (inc < 0)
{
break;
}
}
}
return r;
}
function pg_weights_space_efficiency(weights, pg_count, osd_sizes)
{
const per_osd = {};
@ -752,11 +548,8 @@ module.exports = {
pg_weights_space_efficiency,
pg_list_space_efficiency,
pg_per_osd_space_efficiency,
flatten_tree,
lp_solve,
make_int_pgs,
align_pgs,
random_combinations,
all_combinations,
};

View File

@ -6,6 +6,8 @@ const http = require('http');
const crypto = require('crypto');
const os = require('os');
const WebSocket = require('ws');
const { RuleCombinator, parse_level_indexes, parse_pg_dsl } = require('./dsl_pgs.js');
const { SimpleCombinator } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const stableStringify = require('./stable-stringify.js');
const PGUtil = require('./PGUtil.js');
@ -63,6 +65,7 @@ const etcd_tree = {
mon_stats_timeout: 1000, // ms. min: 100
osd_out_time: 600, // seconds. min: 0
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
force_new_pg_combinator: false,
// client and osd
tcp_header_buffer_size: 65536,
use_sync_send_recv: false,
@ -185,7 +188,10 @@ const etcd_tree = {
// number of parity chunks, required for EC
parity_chunks?: 1,
pg_count: 100,
failure_domain: 'host',
// default is failure_domain=host
failure_domain?: 'host',
level_rules?: { host: '123' },
pg_rules?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
max_osd_combinations: 10000,
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
block_size: 131072,
@ -930,7 +936,6 @@ class Mon
// Parent's level must be less than child's; OSDs must be leaves
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
tree[parent].children.push(tree[node_id]);
delete node_cfg.parent;
}
return { up_osds, levels, osd_tree: tree };
}
@ -1096,7 +1101,6 @@ class Mon
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
pool_cfg.failure_domain = pool_cfg.failure_domain || 'host';
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
if (!/^[1-9]\d*$/.exec(''+pool_id))
{
@ -1176,10 +1180,32 @@ class Mon
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
return false;
}
if (!this.get_pg_rules(pool_id, pool_cfg, true))
{
return false;
}
return true;
}
filter_osds_by_tags(orig_tree, flat_tree, tags)
filter_osds_by_root_node(pool_tree, root_node)
{
if (!root_node)
{
return;
}
pool_tree = pool_tree[pool_cfg.root_node];
const cur = [ ...(pool_tree||{}).children||[] ];
for (let i = 0; i < cur.length; i++)
{
if (cur.children)
{
cur.splice(i+1, 1, ...cur.children);
}
}
return cur;
}
filter_osds_by_tags(orig_tree, tags)
{
if (!tags)
{
@ -1187,30 +1213,22 @@ class Mon
}
for (const tag of (tags instanceof Array ? tags : [ tags ]))
{
for (const host in flat_tree)
for (const osd in orig_tree)
{
let found = 0;
for (const osd in flat_tree[host])
if (orig_tree[osd].level === 'osd' &&
(!orig_tree[osd].tags || !orig_tree[osd].tags[tag]))
{
if (!orig_tree[osd].tags || !orig_tree[osd].tags[tag])
delete flat_tree[host][osd];
else
found++;
}
if (!found)
{
delete flat_tree[host];
delete orig_tree[osd];
}
}
}
}
filter_osds_by_block_layout(flat_tree, block_size, bitmap_granularity, immediate_commit)
filter_osds_by_block_layout(orig_tree, block_size, bitmap_granularity, immediate_commit)
{
for (const host in flat_tree)
for (const osd in orig_tree)
{
let found = 0;
for (const osd in flat_tree[host])
if (orig_tree[osd].level === 'osd')
{
const osd_stat = this.state.osd.stats[osd];
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
@ -1218,16 +1236,8 @@ class Mon
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
{
delete flat_tree[host][osd];
delete orig_tree[host][osd];
}
else
{
found++;
}
}
if (!found)
{
delete flat_tree[host];
}
}
}
@ -1237,12 +1247,60 @@ class Mon
let aff_osds = up_osds;
if (pool_cfg.primary_affinity_tags)
{
aff_osds = { ...up_osds };
this.filter_osds_by_tags(osd_tree, { x: aff_osds }, pool_cfg.primary_affinity_tags);
aff_osds = Object.keys(up_osds).reduce((a, c) => { a[c] = osd_tree[c]; return a; }, {});
this.filter_osds_by_tags(aff_osds, pool_cfg.primary_affinity_tags);
for (const osd in aff_osds)
{
aff_osds[osd] = true;
}
}
return aff_osds;
}
get_pg_rules(pool_id, pool_cfg, warn)
{
if (pool_cfg.level_rules instanceof Object)
{
const levels = this.config.placement_levels||{};
levels.host = levels.host || 100;
levels.osd = levels.osd || 101;
for (const k in pool_cfg.level_rules)
{
if (!levels[k] || typeof pool_cfg.level_rules[k] !== 'string' &&
(!pool_cfg.level_rules[k] instanceof Array || pool_cfg.level_rules[k].filter(s => typeof s !== 'string').length > 0))
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: level_rules should be { [level]: string or array of strings }');
return null;
}
}
return parse_level_indexes(pool_cfg.level_rules);
}
else if (typeof pool_cfg.pg_rules === 'string')
{
try
{
return parse_pg_dsl(pool_cfg.pg_rules);
}
catch (e)
{
if (warn)
console.log('Pool '+pool_id+' configuration is invalid: invalid pg_rules: '+e.message);
}
}
else
{
let rules = [ [] ];
let prev = [ 1 ];
for (let i = 1; i < pool_cfg.pg_size; i++)
{
rules.push([ [ pool_cfg.failure_domain||'host', '!=', prev ] ]);
prev = [ ...prev, i+1 ];
}
return rules;
}
}
async generate_pool_pgs(pool_id, osd_tree, levels)
{
const pool_cfg = this.state.config.pools[pool_id];
@ -1250,10 +1308,9 @@ class Mon
{
return null;
}
let pool_tree = osd_tree[pool_cfg.root_node || ''];
pool_tree = pool_tree ? pool_tree.children : [];
pool_tree = LPOptimizer.flatten_tree(pool_tree, levels, pool_cfg.failure_domain, 'osd');
this.filter_osds_by_tags(osd_tree, pool_tree, pool_cfg.osd_tags);
let pool_tree = osd_tree;
this.filter_osds_by_root_node(pool_tree, pool_cfg.root_node);
this.filter_osds_by_tags(pool_tree, pool_cfg.osd_tags);
this.filter_osds_by_block_layout(
pool_tree,
pool_cfg.block_size || this.config.block_size || 131072,
@ -1276,11 +1333,15 @@ class Mon
}
const old_pg_count = prev_pgs.length;
const optimize_cfg = {
osd_tree: pool_tree,
osd_weights: Object.values(pool_tree).filter(item => item.level === 'osd').reduce((a, c) => { a[c.id] = c.size; return a; }, {}),
combinator: 1 || this.config.force_new_pg_combinator || !pool_cfg.level_rules && !pool_cfg.pg_rules
// new algorithm:
? new RuleCombinator(osd_tree, this.get_pg_rules(pool_id, pool_cfg), pool_cfg.max_osd_combinations)
// old algorithm:
: new SimpleCombinator(flatten_tree(osd_tree, levels, pool_cfg.failure_domain, 'osd'), pool_cfg.pg_size, pool_cfg.max_osd_combinations),
pg_count: pool_cfg.pg_count,
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
max_combinations: pool_cfg.max_osd_combinations,
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
@ -1349,8 +1410,8 @@ class Mon
// Something has changed
console.log('Pool configuration or OSD tree changed, re-optimizing');
// First re-optimize PGs, but don't look at history yet
const optimize_results = await Promise.all(Object.keys(this.state.config.pools)
.map(pool_id => this.generate_pool_pgs(pool_id, osd_tree, levels)));
const optimize_results = (await Promise.all(Object.keys(this.state.config.pools)
.map(pool_id => this.generate_pool_pgs(pool_id, osd_tree, levels)))).filter(r => r);
// Then apply the modification in the form of an optimistic transaction,
// each time considering new pg/history modifications (OSDs modify it during rebalance)
while (!await this.apply_pool_pgs(optimize_results, up_osds, osd_tree, tree_hash))
@ -1436,6 +1497,10 @@ class Mon
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
{
if (!results.length)
{
return true;
}
for (const pool_id in (this.state.config.pgs||{}).items||{})
{
// We should stop all PGs when deleting a pool or changing its PG count

38
mon/murmur3.js Normal file
View File

@ -0,0 +1,38 @@
function select_murmur3(count, cb)
{
if (!count)
{
return null;
}
else
{
let i = 0, maxh = -1;
for (let j = 0; j < count; j++)
{
const h = murmur3(cb(j));
if (h > maxh)
{
i = j;
maxh = h;
}
}
return i;
}
}
function murmur3(s)
{
let hash = 0x12345678;
for (let i = 0; i < s.length; i++)
{
hash ^= s.charCodeAt(i);
hash = (hash*0x5bd1e995) & 0xFFFFFFFF;
hash ^= (hash >> 15);
}
return hash;
}
module.exports = {
murmur3,
select_murmur3,
};

241
mon/simple_pgs.js Normal file
View File

@ -0,0 +1,241 @@
const { select_murmur3 } = require('./murmur3.js');
const NO_OSD = 'Z';
class SimpleCombinator
{
constructor(flat_tree, pg_size, max_combinations, ordered)
{
this.osd_tree = flat_tree;
this.pg_size = pg_size;
this.max_combinations = max_combinations;
this.ordered = ordered;
}
random_combinations()
{
return random_combinations(this.osd_tree, this.pg_size, this.max_combinations, this.ordered);
}
check_combinations(pgs)
{
return check_combinations(this.osd_tree, pgs);
}
}
// Convert multi-level osd_tree = { level: number|string, id?: string, size?: number, children?: osd_tree }[]
// levels = { string: number }
// to a two-level osd_tree suitable for all_combinations()
function flatten_tree(osd_tree, levels, failure_domain_level, osd_level, domains = {}, i = { i: 1 })
{
osd_level = levels[osd_level] || osd_level;
failure_domain_level = levels[failure_domain_level] || failure_domain_level;
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) < failure_domain_level)
{
flatten_tree(node.children||[], levels, failure_domain_level, osd_level, domains, i);
}
else
{
domains['dom'+(i.i++)] = extract_osds([ node ], levels, osd_level);
}
}
return domains;
}
function extract_osds(osd_tree, levels, osd_level, osds = {})
{
for (const node of osd_tree)
{
if ((levels[node.level] || node.level) >= osd_level)
{
osds[node.id] = node.size;
}
else
{
extract_osds(node.children||[], levels, osd_level, osds);
}
}
return osds;
}
// ordered = don't treat (x,y) and (y,x) as equal
function random_combinations(osd_tree, pg_size, count, ordered)
{
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
const r = {};
// Generate random combinations including each OSD at least once
for (let h = 0; h < hosts.length; h++)
{
for (let o = 0; o < osds[hosts[h]].length; o++)
{
const pg = [ osds[hosts[h]][o] ];
const cur_hosts = [ ...hosts ];
cur_hosts.splice(h, 1);
for (let i = 1; i < pg_size && i < hosts.length; i++)
{
const next_host = select_murmur3(cur_hosts.length, i => pg[0]+':i:'+cur_hosts[i]);
const next_osd = select_murmur3(osds[cur_hosts[next_host]].length, i => pg[0]+':i:'+osds[cur_hosts[next_host]][i]);
pg.push(osds[cur_hosts[next_host]][next_osd]);
cur_hosts.splice(next_host, 1);
}
while (pg.length < pg_size)
{
pg.push(NO_OSD);
}
r['pg_'+pg.join('_')] = pg;
}
}
// Generate purely random combinations
while (count > 0)
{
let host_idx = [];
const cur_hosts = [ ...hosts.map((h, i) => i) ];
const max_hosts = pg_size < hosts.length ? pg_size : hosts.length;
if (ordered)
{
for (let i = 0; i < max_hosts; i++)
{
const r = select_murmur3(cur_hosts.length, i => count+':h:'+cur_hosts[i]);
host_idx[i] = cur_hosts[r];
cur_hosts.splice(r, 1);
}
}
else
{
for (let i = 0; i < max_hosts; i++)
{
const r = select_murmur3(cur_hosts.length - (max_hosts - i - 1), i => count+':h:'+cur_hosts[i]);
host_idx[i] = cur_hosts[r];
cur_hosts.splice(0, r+1);
}
}
let pg = host_idx.map(h => osds[hosts[h]][select_murmur3(osds[hosts[h]].length, i => count+':o:'+osds[hosts[h]][i])]);
while (pg.length < pg_size)
{
pg.push(NO_OSD);
}
r['pg_'+pg.join('_')] = pg;
count--;
}
return r;
}
// Super-stupid algorithm. Given the current OSD tree, generate all possible OSD combinations
// osd_tree = { failure_domain1: { osd1: size1, ... }, ... }
// ordered = return combinations without duplicates having different order
function all_combinations(osd_tree, pg_size, ordered, count)
{
const hosts = Object.keys(osd_tree).sort();
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
while (hosts.length < pg_size)
{
osds[NO_OSD] = [ NO_OSD ];
hosts.push(NO_OSD);
}
let host_idx = [];
let osd_idx = [];
for (let i = 0; i < pg_size; i++)
{
host_idx.push(i);
osd_idx.push(0);
}
const r = [];
while (!count || count < 0 || r.length < count)
{
r.push(host_idx.map((hi, i) => osds[hosts[hi]][osd_idx[i]]));
let inc = pg_size-1;
while (inc >= 0)
{
osd_idx[inc]++;
if (osd_idx[inc] >= osds[hosts[host_idx[inc]]].length)
{
osd_idx[inc] = 0;
inc--;
}
else
{
break;
}
}
if (inc < 0)
{
// no osds left in the current host combination, select the next one
inc = pg_size-1;
same_again: while (inc >= 0)
{
host_idx[inc]++;
for (let prev_host = 0; prev_host < inc; prev_host++)
{
if (host_idx[prev_host] == host_idx[inc])
{
continue same_again;
}
}
if (host_idx[inc] < (ordered ? hosts.length-(pg_size-1-inc) : hosts.length))
{
while ((++inc) < pg_size)
{
host_idx[inc] = (ordered ? host_idx[inc-1]+1 : 0);
}
break;
}
else
{
inc--;
}
}
if (inc < 0)
{
break;
}
}
}
return r;
}
function check_combinations(osd_tree, pgs)
{
const hosts = Object.keys(osd_tree).sort();
const host_per_osd = {};
for (const host in osd_tree)
{
for (const osd in osd_tree[host])
{
host_per_osd[osd] = host;
}
}
const res = [];
skip_pg: for (const pg of pgs)
{
const seen_hosts = {};
for (const osd of pg)
{
if (!host_per_osd[osd] || seen_hosts[host_per_osd[osd]])
{
continue skip_pg;
}
seen_hosts[host_per_osd[osd]] = true;
}
res.push(pg);
}
return res;
}
function compat(params)
{
return {
...params,
osd_weights: Object.assign({}, ...Object.values(params.osd_tree)),
combinator: new SimpleCombinator(params.osd_tree, params.pg_size, params.max_combinations||10000),
};
}
module.exports = {
flatten_tree,
SimpleCombinator,
compat,
NO_OSD,
};

View File

@ -7,6 +7,7 @@
// This leads to really uneven OSD fill ratio in Ceph even when PGs are perfectly balanced.
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
const { SimpleCombinator } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const osd_tree = {
@ -114,16 +115,17 @@ Fine, let's try to optimize for it.
async function run()
{
const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
const eff = LPOptimizer.pg_list_space_efficiency(prev_pgs, all_weights, 2, 2.26);
const osd_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(osd_weights).reduce((a, c) => Number(a) + Number(c), 0);
const eff = LPOptimizer.pg_list_space_efficiency(prev_pgs, osd_weights, 2, 2.26);
const orig = eff*4.26 / total_weight;
console.log('Original efficiency was: '+Math.round(orig*10000)/100+' %');
let prev = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 256, parity_space: 2.26 });
const combinator = new SimpleCombinator(osd_tree, 3, 10000);
let prev = await LPOptimizer.optimize_initial({ osd_weights, combinator, pg_size: 3, pg_count: 256, parity_space: 2.26 });
LPOptimizer.print_change_stats(prev);
let next = await LPOptimizer.optimize_change({ prev_pgs, osd_tree, pg_size: 3, max_combinations: 10000, parity_space: 2.26 });
let next = await LPOptimizer.optimize_change({ prev_pgs, osd_weights, combinator, pg_size: 3, parity_space: 2.26 });
LPOptimizer.print_change_stats(next);
}

View File

@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
async function run()
@ -14,26 +15,26 @@ async function run()
let res;
console.log('16 PGs, size=3');
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 16, ordered: false });
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 16, ordered: false }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 3, 'Initial distribution');
console.log('\nChange size to 2');
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: false });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: false }));
LPOptimizer.print_change_stats(res, false);
assert(res.space >= 3*14/16 && res.osd_differs == 0, 'Redistribution');
console.log('\nRemove OSD 3');
const no3_tree = { ...osd_tree };
delete no3_tree['300'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: no3_tree, pg_size: 2, ordered: false });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: no3_tree, pg_size: 2, ordered: false }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 2, 'Redistribution after OSD removal');
console.log('\n16 PGs, size=3, ordered');
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 16, ordered: true });
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 16, ordered: true }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 3, 'Initial distribution');
console.log('\nChange size to 2, ordered');
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: true });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2, ordered: true }));
LPOptimizer.print_change_stats(res, false);
assert(res.space >= 3*14/16 && res.osd_differs < 8, 'Redistribution');
}

View File

@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const crush_tree = [
@ -36,44 +37,44 @@ const crush_tree = [
] },
];
const osd_tree = LPOptimizer.flatten_tree(crush_tree, {}, 1, 3);
const osd_tree = flatten_tree(crush_tree, {}, 1, 3);
console.log(osd_tree);
async function run()
{
const cur_tree = {};
console.log('Empty tree:');
let res = await LPOptimizer.optimize_initial({ osd_tree: cur_tree, pg_size: 3, pg_count: 256 });
let res = await LPOptimizer.optimize_initial(compat({ osd_tree: cur_tree, pg_size: 3, pg_count: 256 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 0);
console.log('\nAdding 1st failure domain:');
cur_tree['dom1'] = osd_tree['dom1'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 12 && res.total_space == 12);
console.log('\nAdding 2nd failure domain:');
cur_tree['dom2'] = osd_tree['dom2'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 24 && res.total_space == 24);
console.log('\nAdding 3rd failure domain:');
cur_tree['dom3'] = osd_tree['dom3'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 36 && res.total_space == 36);
console.log('\nRemoving 3rd failure domain:');
delete cur_tree['dom3'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 24 && res.total_space == 24);
console.log('\nRemoving 2nd failure domain:');
delete cur_tree['dom2'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 12 && res.total_space == 12);
console.log('\nRemoving 1st failure domain:');
delete cur_tree['dom1'];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree: cur_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
assert(res.space == 0);
}

View File

@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { compat } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const osd_tree = {
@ -20,13 +21,13 @@ async function run()
{
let res;
console.log('256 PGs, 3+3 OSDs, size=2');
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 2, pg_count: 256 });
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 2, pg_count: 256 }));
LPOptimizer.print_change_stats(res, false);
// Should NOT fail with the "unfeasible or unbounded" exception
console.log('\nRemoving osd.2');
delete osd_tree[100][2];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
LPOptimizer.print_change_stats(res, false);
}

View File

@ -1,6 +1,7 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { compat, flatten_tree } = require('./simple_pgs.js');
const LPOptimizer = require('./lp-optimizer.js');
const osd_tree = {
@ -84,31 +85,31 @@ async function run()
// Space efficiency is ~99% in all cases.
console.log('256 PGs, size=2');
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 2, pg_count: 256 });
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 2, pg_count: 256 }));
LPOptimizer.print_change_stats(res, false);
console.log('\nAdding osd.8');
osd_tree[500][8] = 3.58589;
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
LPOptimizer.print_change_stats(res, false);
console.log('\nRemoving osd.8');
delete osd_tree[500][8];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 2 }));
LPOptimizer.print_change_stats(res, false);
console.log('\n256 PGs, size=3');
res = await LPOptimizer.optimize_initial({ osd_tree, pg_size: 3, pg_count: 256 });
res = await LPOptimizer.optimize_initial(compat({ osd_tree, pg_size: 3, pg_count: 256 }));
LPOptimizer.print_change_stats(res, false);
console.log('\nAdding osd.8');
osd_tree[500][8] = 3.58589;
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
console.log('\nRemoving osd.8');
delete osd_tree[500][8];
res = await LPOptimizer.optimize_change({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 });
res = await LPOptimizer.optimize_change(compat({ prev_pgs: res.int_pgs, osd_tree, pg_size: 3 }));
LPOptimizer.print_change_stats(res, false);
console.log('\n256 PGs, size=3, failure domain=rack');
res = await LPOptimizer.optimize_initial({ osd_tree: LPOptimizer.flatten_tree(crush_tree, {}, 1, 3), pg_size: 3, pg_count: 256 });
res = await LPOptimizer.optimize_initial(compat({ osd_tree: flatten_tree(crush_tree, {}, 1, 3), pg_size: 3, pg_count: 256 }));
LPOptimizer.print_change_stats(res, false);
}

104
mon/test-parse-dsl.js Normal file
View File

@ -0,0 +1,104 @@
const { random_custom_combinations, index_tree, parse_level_indexes, parse_pg_dsl } = require('./dsl_pgs.js');
function check(result, expected)
{
console.dir(result, { depth: null });
if (JSON.stringify(result) !== JSON.stringify(expected))
{
process.stderr.write('Unexpected value, expected: ');
console.dir(expected, { depth: null });
process.exit(1);
}
}
check(
parse_pg_dsl("any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5"),
[
[],
[ [ 'dc', '=', 1 ], [ 'host', '!=', 1 ] ],
[ [ 'dc', '!=', 1 ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', 3 ] ],
[ [ 'dc', '!=', [ 1, 3 ] ] ],
[ [ 'dc', '=', 5 ], [ 'host', '!=', 5 ] ],
]
);
check(
parse_pg_dsl("dc=meow, dc!=1, dc>2"),
[
[ [ 'dc', '=', { id: 'meow' } ] ],
[ [ 'dc', '!=', 1 ] ],
[ [ 'dc', '>', 2 ] ],
]
);
check(
parse_level_indexes({ dc: '112233', host: 'ABCDEF' }),
[
[],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
[ [ 'dc', '!=', [ 1 ] ], [ 'host', '!=', [ 1, 2 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3 ] ] ],
[ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
[ [ 'dc', '=', 5 ], [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
]
);
check(
parse_level_indexes({ dc: '112233', host: 'ABCDEF' }, [ 'dc', 'host' ]),
[
[],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
[ [ 'dc', '!=', [ 1 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3 ] ] ],
[ [ 'dc', '!=', [ 1, 3 ] ] ],
[ [ 'dc', '=', 5 ], [ 'host', '!=', [ 5 ] ] ],
]
);
check(
parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }),
[
[],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
[ [ 'dc', '!=', [ 1 ] ], [ 'host', '!=', [ 1, 2 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3 ] ] ],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 3, 4 ] ] ],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 3, 4, 5 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7 ] ] ],
[ [ 'dc', '!=', [ 1, 3 ] ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] ] ],
]
);
check(
parse_level_indexes({ dc: '112211223333', host: '123456789ABC' }, [ 'dc', 'host' ]),
[
[],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1 ] ] ],
[ [ 'dc', '!=', [ 1 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3 ] ] ],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2 ] ] ],
[ [ 'dc', '=', 1 ], [ 'host', '!=', [ 1, 2, 5 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4 ] ] ],
[ [ 'dc', '=', 3 ], [ 'host', '!=', [ 3, 4, 7 ] ] ],
[ [ 'dc', '!=', [ 1, 3 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10 ] ] ],
[ [ 'dc', '=', 9 ], [ 'host', '!=', [ 9, 10, 11 ] ] ]
]
);
check(
Object.keys(random_custom_combinations(index_tree([
{ id: '1', size: 1, level: 'osd' },
{ id: '2', size: 2, level: 'osd' },
{ id: '3', size: 3, level: 'osd' }
]), parse_level_indexes({ osd: '12' }), 10000)).sort(),
[ 'pg_1_2', 'pg_1_3', 'pg_2_3' ]
);
console.log('OK');