Add support for PAR1 (#55)

PAR1 is a file format which uses a Reed-Solomon code similar
to the current one, except it uses a different (flawed) coding
matrix.

Add support for it via a WithPAR1Matrix option, so that this code
can be used to encode/decode PAR1 files. Also add the option to
existing tests, and add a test demonstrating the flaw in PAR1's
coding matrix.

Also fix an mistakenly inverted test in testOpts().

Incidentally, PAR1 is obsoleted by PAR2, which uses GF(2^16)
and tries to fix the flaw in the coding matrix; however, PAR2's
coding matrix is still flawed! The real solution is to build the
coding matrix like in this repository.

PAR1 spec:
http://parchive.sourceforge.net/docs/specifications/parity-volume-spec-1.0/article-spec.html

Paper describing the (flawed) Reed-Solomon code used by PAR1:
http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
master
Fred Akalin 2017-06-20 11:24:57 -07:00 committed by Klaus Post
parent 87c4e5ae75
commit 18d548df63
3 changed files with 207 additions and 15 deletions

View File

@ -13,6 +13,7 @@ type options struct {
maxGoroutines int
minSplitSize int
useAVX2, useSSSE3 bool
usePAR1Matrix bool
}
var defaultOptions = options{
@ -43,7 +44,7 @@ func WithMaxGoroutines(n int) Option {
}
}
// MinSplitSize Is the minimum encoding size in bytes per goroutine.
// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
// See WithMaxGoroutines on how jobs are split.
// If n <= 0, it is ignored.
func WithMinSplitSize(n int) Option {
@ -65,3 +66,13 @@ func withAVX2(enabled bool) Option {
o.useAVX2 = enabled
}
}
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
// does. Note that the method they use is buggy, and may lead to cases
// where recovery is impossible, even if there are enough parity
// shards.
func WithPAR1Matrix() Option {
return func(o *options) {
o.usePAR1Matrix = true
}
}

View File

@ -94,6 +94,68 @@ var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/pa
// GF(2^8).
var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
// buildMatrix creates the matrix to use for encoding, given the
// number of data shards and the number of total shards.
//
// The top square of the matrix is guaranteed to be an identity
// matrix, which means that the data shards are unchanged after
// encoding.
func buildMatrix(dataShards, totalShards int) (matrix, error) {
// Start with a Vandermonde matrix. This matrix would work,
// in theory, but doesn't have the property that the data
// shards are unchanged after encoding.
vm, err := vandermonde(totalShards, dataShards)
if err != nil {
return nil, err
}
// Multiply by the inverse of the top square of the matrix.
// This will make the top square be the identity matrix, but
// preserve the property that any square subset of rows is
// invertible.
top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
if err != nil {
return nil, err
}
topInv, err := top.Invert()
if err != nil {
return nil, err
}
return vm.Multiply(topInv)
}
// buildMatrixPAR1 creates the matrix to use for encoding according to
// the PARv1 spec, given the number of data shards and the number of
// total shards. Note that the method they use is buggy, and may lead
// to cases where recovery is impossible, even if there are enough
// parity shards.
//
// The top square of the matrix is guaranteed to be an identity
// matrix, which means that the data shards are unchanged after
// encoding.
func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
result, err := newMatrix(totalShards, dataShards)
if err != nil {
return nil, err
}
for r, row := range result {
// The top portion of the matrix is the identity
// matrix, and the bottom is a transposed Vandermonde
// matrix starting at 1 instead of 0.
if r < dataShards {
result[r][r] = 1
} else {
for c := range row {
result[r][c] = galExp(byte(c+1), r-dataShards)
}
}
}
return result, nil
}
// New creates a new encoder and initializes it to
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
@ -118,22 +180,16 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
return nil, ErrMaxShardNum
}
// Start with a Vandermonde matrix. This matrix would work,
// in theory, but doesn't have the property that the data
// shards are unchanged after encoding.
vm, err := vandermonde(r.Shards, dataShards)
var err error
if r.o.usePAR1Matrix {
r.m, err = buildMatrixPAR1(dataShards, r.Shards)
} else {
r.m, err = buildMatrix(dataShards, r.Shards)
}
if err != nil {
return nil, err
}
// Multiply by the inverse of the top square of the matrix.
// This will make the top square be the identity matrix, but
// preserve the property that any square subset of rows is
// invertible.
top, _ := vm.SubMatrix(0, 0, dataShards, dataShards)
top, _ = top.Invert()
r.m, _ = vm.Multiply(top)
// Inverted matrices are cached in a tree keyed by the indices
// of the invalid rows of the data to reconstruct.
// The inversion root node will have the identity matrix as

View File

@ -14,11 +14,99 @@ import (
"testing"
)
func isIncreasingAndContainsDataRow(indices []int) bool {
cols := len(indices)
for i := 0; i < cols-1; i++ {
if indices[i] >= indices[i+1] {
return false
}
}
// Data rows are in the upper square portion of the matrix.
return indices[0] < cols
}
func incrementIndices(indices []int, indexBound int) (valid bool) {
for i := len(indices) - 1; i >= 0; i-- {
indices[i]++
if indices[i] < indexBound {
break
}
if i == 0 {
return false
}
indices[i] = 0
}
return true
}
func incrementIndicesUntilIncreasingAndContainsDataRow(
indices []int, maxIndex int) bool {
for {
valid := incrementIndices(indices, maxIndex)
if !valid {
return false
}
if isIncreasingAndContainsDataRow(indices) {
return true
}
}
}
func findSingularSubMatrix(m matrix) (matrix, error) {
rows := len(m)
cols := len(m[0])
rowIndices := make([]int, cols)
for incrementIndicesUntilIncreasingAndContainsDataRow(rowIndices, rows) {
subMatrix, _ := newMatrix(cols, cols)
for i, r := range rowIndices {
for c := 0; c < cols; c++ {
subMatrix[i][c] = m[r][c]
}
}
_, err := subMatrix.Invert()
if err == errSingular {
return subMatrix, nil
} else if err != nil {
return nil, err
}
}
return nil, nil
}
func TestBuildMatrixPAR1Singular(t *testing.T) {
totalShards := 8
dataShards := 4
m, err := buildMatrixPAR1(dataShards, totalShards)
if err != nil {
t.Fatal(err)
}
singularSubMatrix, err := findSingularSubMatrix(m)
if err != nil {
t.Fatal(err)
}
if singularSubMatrix == nil {
t.Fatal("No singular sub-matrix found")
}
t.Logf("matrix %s has singular sub-matrix %s", m, singularSubMatrix)
}
func testOpts() [][]Option {
if !testing.Short() {
return [][]Option{}
if testing.Short() {
return [][]Option{
{WithPAR1Matrix()},
}
}
opts := [][]Option{
{WithPAR1Matrix()},
{WithMaxGoroutines(1), WithMinSplitSize(500), withSSE3(false), withAVX2(false)},
{WithMaxGoroutines(5000), WithMinSplitSize(50), withSSE3(false), withAVX2(false)},
{WithMaxGoroutines(5000), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
@ -162,6 +250,43 @@ func testReconstruct(t *testing.T, o ...Option) {
}
}
func TestReconstructPAR1Singular(t *testing.T) {
perShard := 50
r, err := New(4, 4, WithPAR1Matrix())
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 8)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < 8; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with only the last data shard present, and the
// first, second, and fourth parity shard present (based on
// the result of TestBuildMatrixPAR1Singular). This should
// fail.
shards[0] = nil
shards[1] = nil
shards[2] = nil
shards[6] = nil
err = r.Reconstruct(shards)
if err != errSingular {
t.Fatal(err)
t.Errorf("expected %v, got %v", errSingular, err)
}
}
func TestVerify(t *testing.T) {
testVerify(t)
for _, o := range testOpts() {