Add support for PAR1 (#55)
PAR1 is a file format which uses a Reed-Solomon code similar to the current one, except it uses a different (flawed) coding matrix. Add support for it via a WithPAR1Matrix option, so that this code can be used to encode/decode PAR1 files. Also add the option to existing tests, and add a test demonstrating the flaw in PAR1's coding matrix. Also fix an mistakenly inverted test in testOpts(). Incidentally, PAR1 is obsoleted by PAR2, which uses GF(2^16) and tries to fix the flaw in the coding matrix; however, PAR2's coding matrix is still flawed! The real solution is to build the coding matrix like in this repository. PAR1 spec: http://parchive.sourceforge.net/docs/specifications/parity-volume-spec-1.0/article-spec.html Paper describing the (flawed) Reed-Solomon code used by PAR1: http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.htmlmaster
parent
87c4e5ae75
commit
18d548df63
13
options.go
13
options.go
|
@ -13,6 +13,7 @@ type options struct {
|
|||
maxGoroutines int
|
||||
minSplitSize int
|
||||
useAVX2, useSSSE3 bool
|
||||
usePAR1Matrix bool
|
||||
}
|
||||
|
||||
var defaultOptions = options{
|
||||
|
@ -43,7 +44,7 @@ func WithMaxGoroutines(n int) Option {
|
|||
}
|
||||
}
|
||||
|
||||
// MinSplitSize Is the minimum encoding size in bytes per goroutine.
|
||||
// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
|
||||
// See WithMaxGoroutines on how jobs are split.
|
||||
// If n <= 0, it is ignored.
|
||||
func WithMinSplitSize(n int) Option {
|
||||
|
@ -65,3 +66,13 @@ func withAVX2(enabled bool) Option {
|
|||
o.useAVX2 = enabled
|
||||
}
|
||||
}
|
||||
|
||||
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
|
||||
// does. Note that the method they use is buggy, and may lead to cases
|
||||
// where recovery is impossible, even if there are enough parity
|
||||
// shards.
|
||||
func WithPAR1Matrix() Option {
|
||||
return func(o *options) {
|
||||
o.usePAR1Matrix = true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,6 +94,68 @@ var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/pa
|
|||
// GF(2^8).
|
||||
var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
|
||||
|
||||
// buildMatrix creates the matrix to use for encoding, given the
|
||||
// number of data shards and the number of total shards.
|
||||
//
|
||||
// The top square of the matrix is guaranteed to be an identity
|
||||
// matrix, which means that the data shards are unchanged after
|
||||
// encoding.
|
||||
func buildMatrix(dataShards, totalShards int) (matrix, error) {
|
||||
// Start with a Vandermonde matrix. This matrix would work,
|
||||
// in theory, but doesn't have the property that the data
|
||||
// shards are unchanged after encoding.
|
||||
vm, err := vandermonde(totalShards, dataShards)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Multiply by the inverse of the top square of the matrix.
|
||||
// This will make the top square be the identity matrix, but
|
||||
// preserve the property that any square subset of rows is
|
||||
// invertible.
|
||||
top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
topInv, err := top.Invert()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return vm.Multiply(topInv)
|
||||
}
|
||||
|
||||
// buildMatrixPAR1 creates the matrix to use for encoding according to
|
||||
// the PARv1 spec, given the number of data shards and the number of
|
||||
// total shards. Note that the method they use is buggy, and may lead
|
||||
// to cases where recovery is impossible, even if there are enough
|
||||
// parity shards.
|
||||
//
|
||||
// The top square of the matrix is guaranteed to be an identity
|
||||
// matrix, which means that the data shards are unchanged after
|
||||
// encoding.
|
||||
func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
|
||||
result, err := newMatrix(totalShards, dataShards)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for r, row := range result {
|
||||
// The top portion of the matrix is the identity
|
||||
// matrix, and the bottom is a transposed Vandermonde
|
||||
// matrix starting at 1 instead of 0.
|
||||
if r < dataShards {
|
||||
result[r][r] = 1
|
||||
} else {
|
||||
for c := range row {
|
||||
result[r][c] = galExp(byte(c+1), r-dataShards)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// New creates a new encoder and initializes it to
|
||||
// the number of data shards and parity shards that
|
||||
// you want to use. You can reuse this encoder.
|
||||
|
@ -118,22 +180,16 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
|
|||
return nil, ErrMaxShardNum
|
||||
}
|
||||
|
||||
// Start with a Vandermonde matrix. This matrix would work,
|
||||
// in theory, but doesn't have the property that the data
|
||||
// shards are unchanged after encoding.
|
||||
vm, err := vandermonde(r.Shards, dataShards)
|
||||
var err error
|
||||
if r.o.usePAR1Matrix {
|
||||
r.m, err = buildMatrixPAR1(dataShards, r.Shards)
|
||||
} else {
|
||||
r.m, err = buildMatrix(dataShards, r.Shards)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Multiply by the inverse of the top square of the matrix.
|
||||
// This will make the top square be the identity matrix, but
|
||||
// preserve the property that any square subset of rows is
|
||||
// invertible.
|
||||
top, _ := vm.SubMatrix(0, 0, dataShards, dataShards)
|
||||
top, _ = top.Invert()
|
||||
r.m, _ = vm.Multiply(top)
|
||||
|
||||
// Inverted matrices are cached in a tree keyed by the indices
|
||||
// of the invalid rows of the data to reconstruct.
|
||||
// The inversion root node will have the identity matrix as
|
||||
|
|
|
@ -14,11 +14,99 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func isIncreasingAndContainsDataRow(indices []int) bool {
|
||||
cols := len(indices)
|
||||
for i := 0; i < cols-1; i++ {
|
||||
if indices[i] >= indices[i+1] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Data rows are in the upper square portion of the matrix.
|
||||
return indices[0] < cols
|
||||
}
|
||||
|
||||
func incrementIndices(indices []int, indexBound int) (valid bool) {
|
||||
for i := len(indices) - 1; i >= 0; i-- {
|
||||
indices[i]++
|
||||
if indices[i] < indexBound {
|
||||
break
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
indices[i] = 0
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func incrementIndicesUntilIncreasingAndContainsDataRow(
|
||||
indices []int, maxIndex int) bool {
|
||||
for {
|
||||
valid := incrementIndices(indices, maxIndex)
|
||||
if !valid {
|
||||
return false
|
||||
}
|
||||
|
||||
if isIncreasingAndContainsDataRow(indices) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findSingularSubMatrix(m matrix) (matrix, error) {
|
||||
rows := len(m)
|
||||
cols := len(m[0])
|
||||
rowIndices := make([]int, cols)
|
||||
for incrementIndicesUntilIncreasingAndContainsDataRow(rowIndices, rows) {
|
||||
subMatrix, _ := newMatrix(cols, cols)
|
||||
for i, r := range rowIndices {
|
||||
for c := 0; c < cols; c++ {
|
||||
subMatrix[i][c] = m[r][c]
|
||||
}
|
||||
}
|
||||
|
||||
_, err := subMatrix.Invert()
|
||||
if err == errSingular {
|
||||
return subMatrix, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestBuildMatrixPAR1Singular(t *testing.T) {
|
||||
totalShards := 8
|
||||
dataShards := 4
|
||||
m, err := buildMatrixPAR1(dataShards, totalShards)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
singularSubMatrix, err := findSingularSubMatrix(m)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if singularSubMatrix == nil {
|
||||
t.Fatal("No singular sub-matrix found")
|
||||
}
|
||||
|
||||
t.Logf("matrix %s has singular sub-matrix %s", m, singularSubMatrix)
|
||||
}
|
||||
|
||||
func testOpts() [][]Option {
|
||||
if !testing.Short() {
|
||||
return [][]Option{}
|
||||
if testing.Short() {
|
||||
return [][]Option{
|
||||
{WithPAR1Matrix()},
|
||||
}
|
||||
}
|
||||
opts := [][]Option{
|
||||
{WithPAR1Matrix()},
|
||||
{WithMaxGoroutines(1), WithMinSplitSize(500), withSSE3(false), withAVX2(false)},
|
||||
{WithMaxGoroutines(5000), WithMinSplitSize(50), withSSE3(false), withAVX2(false)},
|
||||
{WithMaxGoroutines(5000), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
|
||||
|
@ -162,6 +250,43 @@ func testReconstruct(t *testing.T, o ...Option) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestReconstructPAR1Singular(t *testing.T) {
|
||||
perShard := 50
|
||||
r, err := New(4, 4, WithPAR1Matrix())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
shards := make([][]byte, 8)
|
||||
for s := range shards {
|
||||
shards[s] = make([]byte, perShard)
|
||||
}
|
||||
|
||||
rand.Seed(0)
|
||||
for s := 0; s < 8; s++ {
|
||||
fillRandom(shards[s])
|
||||
}
|
||||
|
||||
err = r.Encode(shards)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Reconstruct with only the last data shard present, and the
|
||||
// first, second, and fourth parity shard present (based on
|
||||
// the result of TestBuildMatrixPAR1Singular). This should
|
||||
// fail.
|
||||
shards[0] = nil
|
||||
shards[1] = nil
|
||||
shards[2] = nil
|
||||
shards[6] = nil
|
||||
|
||||
err = r.Reconstruct(shards)
|
||||
if err != errSingular {
|
||||
t.Fatal(err)
|
||||
t.Errorf("expected %v, got %v", errSingular, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerify(t *testing.T) {
|
||||
testVerify(t)
|
||||
for _, o := range testOpts() {
|
||||
|
|
Loading…
Reference in New Issue