Compare commits

..

1 Commits

Author SHA1 Message Date
61ae4e903a WIP/experimental LRC matrix generator 2022-04-14 20:16:09 +03:00
22 changed files with 507 additions and 142 deletions

View File

@@ -1,13 +0,0 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-vitastor-pvc-block
spec:
storageClassName: vitastor
volumeMode: Block
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Pod
metadata:
name: vitastor-test-block-pvc
namespace: default
spec:
containers:
- name: vitastor-test-block-pvc
image: nginx
volumeDevices:
- name: data
devicePath: /dev/xvda
volumes:
- name: data
persistentVolumeClaim:
claimName: test-vitastor-pvc-block
readOnly: false

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Pod
metadata:
name: vitastor-test-nginx
namespace: default
spec:
containers:
- name: vitastor-test-nginx
image: nginx
volumeMounts:
- mountPath: /usr/share/nginx/html/s3
name: data
volumes:
- name: data
persistentVolumeClaim:
claimName: test-vitastor-pvc
readOnly: false

View File

@@ -10,6 +10,7 @@ import (
"bytes"
"strconv"
"time"
"fmt"
"os"
"os/exec"
"io/ioutil"
@@ -20,6 +21,8 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"go.etcd.io/etcd/clientv3"
"github.com/container-storage-interface/spec/lib/go/csi"
)
@@ -111,34 +114,6 @@ func GetConnectionParams(params map[string]string) (map[string]string, []string,
return ctxVars, etcdUrl, etcdPrefix
}
func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
{
if (ctxVars["etcdUrl"] != "")
{
args = append(args, "--etcd_address", ctxVars["etcdUrl"])
}
if (ctxVars["etcdPrefix"] != "")
{
args = append(args, "--etcd_prefix", ctxVars["etcdPrefix"])
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
c := exec.Command("/usr/bin/vitastor-cli", args...)
var stdout, stderr bytes.Buffer
c.Stdout = &stdout
c.Stderr = &stderr
err := c.Run()
stderrStr := string(stderr.Bytes())
if (err != nil)
{
klog.Errorf("vitastor-cli %s failed: %s, status %s\n", strings.Join(args, " "), stderrStr, err)
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
}
return stdout.Bytes(), nil
}
// Create the volume
func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error)
{
@@ -171,41 +146,128 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
}
ctxVars, etcdUrl, _ := GetConnectionParams(req.Parameters)
// FIXME: The following should PROBABLY be implemented externally in a management tool
ctxVars, etcdUrl, etcdPrefix := GetConnectionParams(req.Parameters)
if (len(etcdUrl) == 0)
{
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
}
// Create image using vitastor-cli
_, err := invokeCLI(ctxVars, []string{ "create", volName, "-s", string(volSize), "--pool", string(poolId) })
// Connect to etcd
cli, err := clientv3.New(clientv3.Config{
DialTimeout: ETCD_TIMEOUT,
Endpoints: etcdUrl,
})
if (err != nil)
{
if (strings.Index(err.Error(), "already exists") > 0)
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
}
defer cli.Close()
var imageId uint64 = 0
for
{
// Check if the image exists
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
cancel()
if (err != nil)
{
stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", volName })
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) > 0)
{
kv := resp.Kvs[0]
var v InodeIndex
err := json.Unmarshal(kv.Value, &v)
if (err != nil)
{
return nil, err
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
}
var inodeCfg []InodeConfig
err = json.Unmarshal(stat, &inodeCfg)
poolId = v.PoolId
imageId = v.Id
inodeCfgKey := fmt.Sprintf("/config/inode/%d/%d", poolId, imageId)
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+inodeCfgKey)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(inodeCfg) == 0)
if (len(resp.Kvs) == 0)
{
return nil, status.Error(codes.Internal, "vitastor-cli create said that image already exists, but ls can't find it")
return nil, status.Error(codes.Internal, "missing "+inodeCfgKey+" key in etcd")
}
if (inodeCfg[0].Size < uint64(volSize))
var inodeCfg InodeConfig
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
}
if (inodeCfg.Size < uint64(volSize))
{
return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
}
}
else
{
return nil, err
// Find a free ID
// Create image metadata in a transaction verifying that the image doesn't exist yet AND ID is still free
maxIdKey := fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, maxIdKey)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
var modRev int64
var nextId uint64
if (len(resp.Kvs) > 0)
{
var err error
nextId, err = strconv.ParseUint(string(resp.Kvs[0].Value), 10, 64)
if (err != nil)
{
return nil, status.Error(codes.Internal, maxIdKey+" contains invalid ID")
}
modRev = resp.Kvs[0].ModRevision
nextId++
}
else
{
nextId = 1
}
inodeIdxJson, _ := json.Marshal(InodeIndex{
Id: nextId,
PoolId: poolId,
})
inodeCfgJson, _ := json.Marshal(InodeConfig{
Name: volName,
Size: uint64(volSize),
})
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
txnResp, err := cli.Txn(ctx).If(
clientv3.Compare(clientv3.ModRevision(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)), "=", modRev),
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)), "=", 0),
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId)), "=", 0),
).Then(
clientv3.OpPut(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId), fmt.Sprintf("%d", nextId)),
clientv3.OpPut(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName), string(inodeIdxJson)),
clientv3.OpPut(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId), string(inodeCfgJson)),
).Commit()
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to commit transaction in etcd: "+err.Error())
}
if (txnResp.Succeeded)
{
imageId = nextId
break
}
// Start over if the transaction fails
}
}
@@ -237,12 +299,97 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
}
volName := ctxVars["name"]
ctxVars, _, _ = GetConnectionParams(ctxVars)
_, etcdUrl, etcdPrefix := GetConnectionParams(ctxVars)
if (len(etcdUrl) == 0)
{
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
}
_, err = invokeCLI(ctxVars, []string{ "rm", volName })
cli, err := clientv3.New(clientv3.Config{
DialTimeout: ETCD_TIMEOUT,
Endpoints: etcdUrl,
})
if (err != nil)
{
return nil, err
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
}
defer cli.Close()
// Find inode by name
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) == 0)
{
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
}
var idx InodeIndex
err = json.Unmarshal(resp.Kvs[0].Value, &idx)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
}
// Get inode config
inodeCfgKey := fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
resp, err = cli.Get(ctx, inodeCfgKey)
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
}
if (len(resp.Kvs) == 0)
{
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
}
var inodeCfg InodeConfig
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
if (err != nil)
{
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
}
// Delete inode data by invoking vitastor-cli
args := []string{
"rm-data", "--etcd_address", strings.Join(etcdUrl, ","),
"--pool", fmt.Sprintf("%d", idx.PoolId),
"--inode", fmt.Sprintf("%d", idx.Id),
}
if (ctxVars["configPath"] != "")
{
args = append(args, "--config_path", ctxVars["configPath"])
}
c := exec.Command("/usr/bin/vitastor-cli", args...)
var stderr bytes.Buffer
c.Stdout = nil
c.Stderr = &stderr
err = c.Run()
stderrStr := string(stderr.Bytes())
if (err != nil)
{
klog.Errorf("vitastor-cli rm-data failed: %s, status %s\n", stderrStr, err)
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
}
// Delete inode config in etcd
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
txnResp, err := cli.Txn(ctx).Then(
clientv3.OpDelete(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)),
clientv3.OpDelete(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)),
).Commit()
cancel()
if (err != nil)
{
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: "+err.Error())
}
if (!txnResp.Succeeded)
{
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: transaction failed")
}
return &csi.DeleteVolumeResponse{}, nil

View File

@@ -67,44 +67,29 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
klog.Infof("received node publish volume request %+v", protosanitizer.StripSecrets(req))
targetPath := req.GetTargetPath()
isBlock := req.GetVolumeCapability().GetBlock() != nil
// Check that it's not already mounted
_, error := mount.IsNotMountPoint(ns.mounter, targetPath)
free, error := mount.IsNotMountPoint(ns.mounter, targetPath)
if (error != nil)
{
if (os.IsNotExist(error))
{
if (isBlock)
error := os.MkdirAll(targetPath, 0777)
if (error != nil)
{
pathFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_RDWR, 0o600)
if (err != nil)
{
klog.Errorf("failed to create block device mount target %s with error: %v", targetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
err = pathFile.Close()
if (err != nil)
{
klog.Errorf("failed to close %s with error: %v", targetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
}
else
{
err := os.MkdirAll(targetPath, 0777)
if (err != nil)
{
klog.Errorf("failed to create fs mount target %s with error: %v", targetPath, err)
return nil, status.Error(codes.Internal, err.Error())
}
return nil, status.Error(codes.Internal, error.Error())
}
free = true
}
else
{
return nil, status.Error(codes.Internal, error.Error())
}
}
if (!free)
{
return &csi.NodePublishVolumeResponse{}, nil
}
ctxVars := make(map[string]string)
err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
@@ -164,6 +149,7 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
// Format the device (ext4 or xfs)
fsType := req.GetVolumeCapability().GetMount().GetFsType()
isBlock := req.GetVolumeCapability().GetBlock() != nil
opt := req.GetVolumeCapability().GetMount().GetMountFlags()
opt = append(opt, "_netdev")
if ((req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY ||

View File

@@ -25,7 +25,7 @@ allocator::allocator(uint64_t blocks)
size = free = blocks;
last_one_mask = (blocks % 64) == 0
? UINT64_MAX
: (((uint64_t)1 << (blocks % 64)) - 1);
: ((1l << (blocks % 64)) - 1);
for (uint64_t i = 0; i < total; i++)
{
mask[i] = 0;
@@ -79,7 +79,7 @@ void allocator::set(uint64_t addr, bool value)
}
if (value)
{
mask[last] = mask[last] | ((uint64_t)1 << bit);
mask[last] = mask[last] | (1l << bit);
if (mask[last] != (!is_last || cur_addr/64 < size/64
? UINT64_MAX : last_one_mask))
{
@@ -88,7 +88,7 @@ void allocator::set(uint64_t addr, bool value)
}
else
{
mask[last] = mask[last] & ~((uint64_t)1 << bit);
mask[last] = mask[last] & ~(1l << bit);
}
is_last = false;
if (p2 > 1)

View File

@@ -457,13 +457,13 @@ uint64_t parse_size(std::string size_str)
if (type_char == 'k' || type_char == 'm' || type_char == 'g' || type_char == 't')
{
if (type_char == 'k')
mul = (uint64_t)1<<10;
mul = 1l<<10;
else if (type_char == 'm')
mul = (uint64_t)1<<20;
mul = 1l<<20;
else if (type_char == 'g')
mul = (uint64_t)1<<30;
mul = 1l<<30;
else /*if (type_char == 't')*/
mul = (uint64_t)1<<40;
mul = 1l<<40;
size_str = size_str.substr(0, size_str.length()-1);
}
uint64_t size = json11::Json(size_str).uint64_value() * mul;

View File

@@ -124,8 +124,8 @@ resume_1:
{ "scheme_name", pool_cfg.scheme == POOL_SCHEME_REPLICATED
? std::to_string(pool_cfg.pg_size)+"/"+std::to_string(pool_cfg.pg_minsize)
: "EC "+std::to_string(pool_cfg.pg_size-pool_cfg.parity_chunks)+"+"+std::to_string(pool_cfg.parity_chunks) },
{ "used_raw", (uint64_t)(pool_stats[pool_cfg.id]["used_raw_tb"].number_value() * ((uint64_t)1<<40)) },
{ "total_raw", (uint64_t)(pool_stats[pool_cfg.id]["total_raw_tb"].number_value() * ((uint64_t)1<<40)) },
{ "used_raw", (uint64_t)(pool_stats[pool_cfg.id]["used_raw_tb"].number_value() * (1l<<40)) },
{ "total_raw", (uint64_t)(pool_stats[pool_cfg.id]["total_raw_tb"].number_value() * (1l<<40)) },
{ "max_available", pool_avail },
{ "raw_to_usable", pool_stats[pool_cfg.id]["raw_to_usable"].number_value() },
{ "space_efficiency", pool_stats[pool_cfg.id]["space_efficiency"].number_value() },

View File

@@ -436,8 +436,8 @@ std::string print_table(json11::Json items, json11::Json header, bool use_esc)
return str;
}
static uint64_t size_thresh[] = { (uint64_t)1024*1024*1024*1024, (uint64_t)1024*1024*1024, (uint64_t)1024*1024, 1024, 0 };
static uint64_t size_thresh_d[] = { (uint64_t)1000000000000, (uint64_t)1000000000, (uint64_t)1000000, (uint64_t)1000, 0 };
static uint64_t size_thresh[] = { 1024l*1024*1024*1024, 1024l*1024*1024, 1024l*1024, 1024, 0 };
static uint64_t size_thresh_d[] = { 1000000000000l, 1000000000l, 1000000l, 1000l, 0 };
static const int size_thresh_n = sizeof(size_thresh)/sizeof(size_thresh[0]);
static const char *size_unit = "TGMKB";

View File

@@ -193,7 +193,7 @@ std::function<bool(void)> cli_tool_t::start_rm(json11::Json cfg)
remover->pool_id = cfg["pool"].uint64_value();
if (remover->pool_id)
{
remover->inode = (remover->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
remover->inode = (remover->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
}
remover->pool_id = INODE_POOL(remover->inode);
if (!remover->pool_id)

View File

@@ -217,7 +217,7 @@ resume_2:
// JSON output
printf("%s\n", json11::Json(json11::Json::object {
{ "etcd_alive", etcd_alive },
{ "etcd_count", (uint64_t)etcd_states.size() },
{ "etcd_count", etcd_states.size() },
{ "etcd_db_size", etcd_db_size },
{ "mon_count", mon_count },
{ "mon_master", mon_master },

View File

@@ -968,7 +968,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
{
fprintf(
stderr, "Inode %lu/%lu parent_pool value is invalid, ignoring parent setting\n",
inode_num >> (64-POOL_ID_BITS), inode_num & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)
inode_num >> (64-POOL_ID_BITS), inode_num & ((1l << (64-POOL_ID_BITS)) - 1)
);
parent_inode_num = 0;
}

View File

@@ -214,14 +214,14 @@ static int sec_setup(struct thread_data *td)
if (!o->image)
{
if (!(o->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)))
if (!(o->inode & ((1l << (64-POOL_ID_BITS)) - 1)))
{
td_verror(td, EINVAL, "inode number is missing");
return 1;
}
if (o->pool)
{
o->inode = (o->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (o->pool << (64-POOL_ID_BITS));
o->inode = (o->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (o->pool << (64-POOL_ID_BITS));
}
if (!(o->inode >> (64-POOL_ID_BITS)))
{

2
src/lrc/Makefile Normal file
View File

@@ -0,0 +1,2 @@
mat: mat.c
gcc -O3 -I/usr/include/jerasure -o mat mat.c -lJerasure

277
src/lrc/mat.c Normal file
View File

@@ -0,0 +1,277 @@
#include <jerasure/reed_sol.h>
#include <jerasure.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Generate LRC matrix: (groups*local + global) code rows with (data_drives) columns
// w should be >= log2(data_drives + groups*local + global), but not necessary 8/16/32
int* reed_sol_vandermonde_lrc_matrix(int data_drives, int groups, int local, int global, int w)
{
if (w < 0 || w > 32 || data_drives + groups*local + global > (1<<w))
{
return NULL;
}
int *lrc_matrix = (int*)malloc(sizeof(int) * (local*groups+global));
int *matrix = reed_sol_vandermonde_coding_matrix(data_drives, local+global, w);
for (int gr = 0; gr < groups; gr++)
{
for (int l = 0; l < local; l++)
{
for (int j = 0; j < data_drives; j++)
{
lrc_matrix[(gr*local+l)*data_drives + j] = (j / (data_drives/groups)) == gr ? matrix[l*data_drives + j] : 0;
}
}
}
for (int i = 0; i < global; i++)
{
for (int j = 0; j < data_drives; j++)
{
lrc_matrix[(groups*local+i)*data_drives + j] = matrix[(local+i)*data_drives + j];
}
}
free(matrix);
return lrc_matrix;
}
// Check if the generated LRC with given parameters is Maximally Reconstructible (MR-LRC)
// Example of a MR-LRC: (8, 2, 1, 2, 6, 8)
void check_mr_lrc(int data_drives, int groups, int local, int global, int matrix_w, int w, int print)
{
}
int main()
{
int W = 8, MATRIX_W = 6;
int n = 8, groups = 2, local = 1, global = 2;
//n = 4, groups = 2, local = 1, global = 1;
int total_rows = n+groups*local+global;
int *matrix = reed_sol_vandermonde_lrc_matrix(n, groups, local, global, MATRIX_W);
int *lrc_matrix = (int*)malloc(sizeof(int) * total_rows*n);
// Fill identity+LRC matrix
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
lrc_matrix[i*n + j] = j == i ? 1 : 0;
memcpy(lrc_matrix + n*n, matrix, (total_rows-n)*n*sizeof(int));
free(matrix);
matrix = NULL;
// Print LRC matrix
for (int i = 0; i < total_rows; i++)
{
for (int j = 0; j < n; j++)
{
printf("%d ", lrc_matrix[i*n+j]);
}
printf("\n");
}
int impossible = 0, success = 0, failures = 0;
int *lost_per_group = (int*)malloc(sizeof(int) * groups);
for (int lost = local+global+1; lost <= groups*local+global; lost++)
//int lost = groups*local+global;
{
int *erased_matrix = (int*)malloc(sizeof(int) * (total_rows-lost)*n);
int *inverted_matrix = (int*)malloc(sizeof(int) * (total_rows-lost)*n);
int *p = (int*)malloc(sizeof(int) * (total_rows-lost));
for (int i = 0; i < n; i++)
p[i] = i;
int *p2 = (int*)malloc(sizeof(int) * n);
if (total_rows-lost > n)
{
p[n-1] = n; // skip combinations with all N data disks (0..n-1)
for (int i = n; i < total_rows-lost; i++)
p[i] = i+1;
p[total_rows-lost-1]--; // will be incremented on the first step
}
int inc = total_rows-lost-1;
while (1)
{
p[inc]++;
if (p[inc] >= n+groups*local+global)
{
if (inc == 0)
break;
inc--;
}
else if (inc+1 < total_rows-lost)
{
p[inc+1] = p[inc];
inc++;
}
else
{
// Check if it should be recoverable
for (int gr = 0; gr < groups; gr++)
{
lost_per_group[gr] = ((gr+1)*(n/groups) > n ? (n - gr*(n/groups)) : n/groups);
}
// Calculate count of data chunks lost in each group
for (int j = 0; j < total_rows-lost; j++)
{
if (j < n)
{
lost_per_group[(p[j] / (n/groups))]--;
}
}
// Every local parity chunk is supposed to restore 1 missing chunk inside its group
// So, subtract local parity chunk counts from each group lost chunk count
for (int j = 0; j < total_rows-lost; j++)
{
if (p[j] >= n && p[j] < n+groups*local && lost_per_group[(p[j]-n)/local] > 0)
{
lost_per_group[(p[j]-n)/local]--;
}
}
// Every global parity chunk is supposed to restore 1 chunk of all that are still missing
int still_missing = 0;
for (int gr = 0; gr < groups; gr++)
{
still_missing += lost_per_group[gr];
}
for (int j = 0; j < total_rows-lost; j++)
{
if (p[j] >= n+groups*local && still_missing > 0)
{
still_missing--;
}
}
if (still_missing <= 0)
{
// We hope it can be recoverable. Try to invert it
int invert_ok = -1;
if (total_rows-lost == n)
{
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
erased_matrix[i*n+j] = lrc_matrix[p[i]*n+j];
invert_ok = jerasure_invert_matrix(erased_matrix, inverted_matrix, n, W);
}
else
{
// Check submatrices
for (int i = 0; i < n; i++)
p2[i] = i;
p2[n-1]--;
int inc2 = n-1;
while (1)
{
p2[inc2]++;
if (p2[inc2] >= total_rows-lost)
{
if (inc2 == 0)
break;
inc2--;
}
else if (inc2+1 < n)
{
p2[inc2+1] = p2[inc2];
inc2++;
}
else
{
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
erased_matrix[i*n+j] = lrc_matrix[p[p2[i]]*n+j];
invert_ok = jerasure_invert_matrix(erased_matrix, inverted_matrix, n, W);
if (invert_ok == 0)
break;
}
}
}
if (invert_ok < 0)
{
failures++;
printf("\nFAIL: ");
for (int i = 0; i < total_rows-lost; i++)
{
printf("%d ", p[i]);
}
printf("\nDIRECT:\n");
for (int i = 0; i < total_rows-lost; i++)
{
for (int j = 0; j < n; j++)
printf("%d ", lrc_matrix[p[i]*n+j]);
printf("\n");
}
printf("INVERSE:\n");
for (int i = 0; i < total_rows-lost; i++)
{
for (int j = 0; j < n; j++)
printf("%d ", inverted_matrix[i*n+j]);
printf("\n");
}
}
else
{
success++;
printf("OK: ");
for (int i = 0; i < total_rows-lost; i++)
{
printf("%d ", p[i]);
}
printf("\n");
}
}
else
{
impossible++;
printf("IMPOSSIBLE: ");
for (int i = 0; i < total_rows-lost; i++)
{
printf("%d ", p[i]);
}
printf("\n");
}
}
}
free(p2);
free(p);
free(inverted_matrix);
free(erased_matrix);
}
free(lost_per_group);
printf("\n%d recovered, %d impossible, %d failures\n", success, impossible, failures);
return 0;
}
// 1 1 1 1 0 0 0 0
// 0 0 0 0 1 1 1 1
// 1 55 39 73 84 181 225 217
// 1 172 70 235 143 34 200 101
//
// Can't recover
// 1 2 4 5 8 9 10 11 -1
// 2 3 4 6 8 9 10 11 -1
// FULL:
// 1 0 0 0 0 0 0 0
// 0 1 0 0 0 0 0 0
// 0 0 1 0 0 0 0 0
// 0 0 0 1 0 0 0 0
// 0 0 0 0 1 0 0 0
// 0 0 0 0 0 1 0 0
// 0 0 0 0 0 0 1 0
// 0 0 0 0 0 0 0 1
// 1 1 1 1 0 0 0 0
// 0 0 0 0 1 1 1 1
// 1 55 39 73 84 181 225 217
// 1 172 70 235 143 34 200 101
// FIRST UNRECOVERABLE:
// 0 1 0 0 0 0 0 0
// 0 0 1 0 0 0 0 0
// 0 0 0 0 1 0 0 0
// 0 0 0 0 0 1 0 0
// 1 1 1 1 0 0 0 0
// 0 0 0 0 1 1 1 1
// 1 55 39 73 84 181 225 217
// 1 172 70 235 143 34 200 101
// SECOND UNRECOVERABLE:
// 0 0 1 0 0 0 0 0
// 0 0 0 1 0 0 0 0
// 0 0 0 0 1 0 0 0
// 0 0 0 0 0 0 1 0
// 1 1 1 1 0 0 0 0
// 0 0 0 0 1 1 1 1
// 1 55 39 73 84 181 225 217
// 1 172 70 235 143 34 200 101
// Ho ho ho

View File

@@ -189,7 +189,7 @@ public:
uint64_t pool = cfg["pool"].uint64_value();
if (pool)
{
inode = (inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (pool << (64-POOL_ID_BITS));
inode = (inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (pool << (64-POOL_ID_BITS));
}
if (!(inode >> (64-POOL_ID_BITS)))
{

View File

@@ -189,7 +189,7 @@ void osd_t::report_statistics()
for (auto kv: bs->get_inode_space_stats())
{
pool_id_t pool_id = INODE_POOL(kv.first);
uint64_t only_inode_num = (kv.first & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1));
uint64_t only_inode_num = (kv.first & ((1l << (64-POOL_ID_BITS)) - 1));
if (!last_pool || pool_id != last_pool)
{
if (last_pool)
@@ -207,7 +207,7 @@ void osd_t::report_statistics()
for (auto kv: inode_stats)
{
pool_id_t pool_id = INODE_POOL(kv.first);
uint64_t only_inode_num = (kv.first & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1));
uint64_t only_inode_num = (kv.first & ((1l << (64-POOL_ID_BITS)) - 1));
if (!last_pool || pool_id != last_pool)
{
if (last_pool)

View File

@@ -9,7 +9,7 @@
#define POOL_ID_MAX 0x10000
#define POOL_ID_BITS 16
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
#define INODE_NO_POOL(inode) (inode_t)(inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
#define INODE_NO_POOL(inode) (inode_t)(inode & ((1l << (64-POOL_ID_BITS)) - 1))
#define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
// Pool ID is 16 bits long

View File

@@ -437,7 +437,7 @@ void pg_t::calc_object_states(int log_level)
st.walk();
if (this->state & (PG_DEGRADED|PG_LEFT_ON_DEAD))
{
assert(epoch != (((uint64_t)1 << PG_EPOCH_BITS)-1));
assert(epoch != ((1ul << PG_EPOCH_BITS)-1));
epoch++;
}
}

View File

@@ -144,9 +144,9 @@ resume_3:
}
else
{
if ((op_data->fact_ver & ((uint64_t)1 << (64-PG_EPOCH_BITS) - 1)) == ((uint64_t)1 << (64-PG_EPOCH_BITS) - 1))
if ((op_data->fact_ver & (1ul<<(64-PG_EPOCH_BITS) - 1)) == (1ul<<(64-PG_EPOCH_BITS) - 1))
{
assert(pg.epoch != (((uint64_t)1 << PG_EPOCH_BITS)-1));
assert(pg.epoch != ((1ul << PG_EPOCH_BITS)-1));
pg.epoch++;
}
op_data->target_ver = op_data->fact_ver + 1;

View File

@@ -262,7 +262,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
client->pool = qdict_get_try_int(options, "pool", 0);
if (client->pool)
{
client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
client->inode = (client->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
}
client->size = qdict_get_try_int(options, "size", 0);
}

View File

@@ -406,7 +406,7 @@ uint64_t crush(uint64_t key, int count, uint64_t *weights)
seed = (key + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= (j + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed = 2862933555777941757ull*seed + 3037000493ull; // LCPRNG
seed = -log(((double)seed) / ((uint64_t)1 << 32) / ((uint64_t)1 << 32)) * weights[j];
seed = -log(((double)seed) / (1ul << 32) / (1ul << 32)) * weights[j];
if (seed > max)
{
max = seed;
@@ -439,8 +439,8 @@ void crush3(uint64_t key, int count, uint64_t *weights, uint64_t *r, uint64_t to
seed ^= (k2 + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= (k3 + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed = 2862933555777941757ull*seed + 3037000493ull; // LCPRNG
//seed = ((double)seed) / ((uint64_t)1 << 32) / ((uint64_t)1 << 32) * (weights[k1] + weights[k2] + weights[k3]);
seed = ((double)seed) / ((uint64_t)1 << 32) / ((uint64_t)1 << 32) * (1 -
//seed = ((double)seed) / (1ul << 32) / (1ul << 32) * (weights[k1] + weights[k2] + weights[k3]);
seed = ((double)seed) / (1ul << 32) / (1ul << 32) * (1 -
(1 - 1.0*weights[k1]/total_weight)*
(1 - 1.0*weights[k2]/total_weight)*
(1 - 1.0*weights[k3]/total_weight)