Shorten the variable names and make an encoder interface, so it isn't possible to create it without calling New.

master
Klaus Post 2015-06-19 18:54:58 +02:00
parent 67f8d8b8c7
commit e3aca6cd9d
2 changed files with 85 additions and 53 deletions

View File

@ -13,28 +13,60 @@ import (
"sync"
)
type ReedSolomon struct {
DataShardCount int
ParityShardCount int
TotalShardCount int
m matrix
parity [][]byte
type Encoder interface {
// Encodes parity for a set of data shards.
// An array 'shards' containing data shards followed by parity shards.
// The number of shards must match the number given to New.
// Each shard is a byte array, and they must all be the same size.
// The parity shards will always be overwritten and the data shards
// will remain the same.
Encode(shards [][]byte) error
// Verify returns true if the parity shards contain the right data.
// The data is the same format as Encode. No data is modified.
Verify(shards [][]byte) (bool, error)
// Reconstruct will recreate the missing shards, if possible.
//
// Given a list of shards, some of which contain data, fills in the
// ones that don't have data.
//
// The length of the array must be equal to Shards.
// You indicate that a shard is missing by setting it to nil.
//
// If there are too few shards to reconstruct the missing
// ones, ErrTooFewShards will be returned.
//
// The reconstructed shard set is complete, but integrity is not verified.
// Use the Verify function to check if data set is ok.
Reconstruct(shards [][]byte) error
}
// reedSolomon contains a matrix for a specific
// distribution of datashards and parity shards.
// Construct if using New()
type reedSolomon struct {
DataShards int // Number of data shards, should not be modified.
ParityShards int // Number of parity shards, should not be modified.
Shards int // Total number of shards. Calculated, and should not be modified.
m matrix
parity [][]byte
}
// New creates a new encoder and initializes it to
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
func New(dataShardCount, parityShardCount int) (*ReedSolomon, error) {
r := ReedSolomon{
DataShardCount: dataShardCount,
ParityShardCount: parityShardCount,
TotalShardCount: dataShardCount + parityShardCount,
func New(dataShards, parityShards int) (Encoder, error) {
r := reedSolomon{
DataShards: dataShards,
ParityShards: parityShards,
Shards: dataShards + parityShards,
}
// Start with a Vandermonde matrix. This matrix would work,
// in theory, but doesn't have the property that the data
// shards are unchanged after encoding.
vm, err := vandermonde(r.TotalShardCount, dataShardCount)
vm, err := vandermonde(r.Shards, dataShards)
if err != nil {
return nil, err
}
@ -43,13 +75,13 @@ func New(dataShardCount, parityShardCount int) (*ReedSolomon, error) {
// This will make the top square be the identity matrix, but
// preserve the property that any square subset of rows is
// invertible.
top, _ := vm.SubMatrix(0, 0, dataShardCount, dataShardCount)
top, _ := vm.SubMatrix(0, 0, dataShards, dataShards)
top, _ = top.Invert()
r.m, err = vm.Multiply(top)
r.parity = make([][]byte, parityShardCount)
r.parity = make([][]byte, parityShards)
for i := range r.parity {
r.parity[i] = r.m[dataShardCount+i]
r.parity[i] = r.m[dataShards+i]
}
return &r, err
@ -63,8 +95,8 @@ var ErrTooFewShards = errors.New("too few shards given to encode")
// Each shard is a byte array, and they must all be the same size.
// The parity shards will always be overwritten and the data shards
// will remain the same.
func (r ReedSolomon) Encode(shards [][]byte) error {
if len(shards) != r.TotalShardCount {
func (r reedSolomon) Encode(shards [][]byte) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
@ -74,17 +106,17 @@ func (r ReedSolomon) Encode(shards [][]byte) error {
}
// Get the slice of output buffers.
output := shards[r.DataShardCount:]
output := shards[r.DataShards:]
// Do the coding.
r.codeSomeShards(r.parity, shards[0:r.DataShardCount], output, r.ParityShardCount, len(shards[0]))
r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
return nil
}
// Verify returns true if the parity shards contain the right data.
// The data is the same format as Encode. No data is modified.
func (r ReedSolomon) Verify(shards [][]byte) (bool, error) {
if len(shards) != r.TotalShardCount {
func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
if len(shards) != r.Shards {
return false, ErrTooFewShards
}
err := checkShards(shards, false)
@ -93,10 +125,10 @@ func (r ReedSolomon) Verify(shards [][]byte) (bool, error) {
}
// Slice of buffers being checked.
toCheck := shards[r.DataShardCount:]
toCheck := shards[r.DataShards:]
// Do the checking.
return r.checkSomeShards(r.parity, shards[0:r.DataShardCount], toCheck, r.ParityShardCount, len(shards[0])), nil
return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
}
// Multiplies a subset of rows from a coding matrix by a full set of
@ -108,7 +140,7 @@ func (r ReedSolomon) Verify(shards [][]byte) (bool, error) {
// The number of outputs computed, and the
// number of matrix rows used, is determined by
// outputCount, which is the number of outputs to compute.
func (r ReedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
if runtime.GOMAXPROCS(0) > 1 {
r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
return
@ -117,7 +149,7 @@ func (r ReedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, output
for iRow := 0; iRow < outputCount; iRow++ {
matrixRow := matrixRows[iRow]
var value byte
for c := 0; c < r.DataShardCount; c++ {
for c := 0; c < r.DataShards; c++ {
// note: manual inlining is slower
value ^= galMultiply(matrixRow[c], inputs[c][iByte])
}
@ -131,7 +163,7 @@ const splitSize = 256
// Perform the same as codeSomeShards, but split the workload into
// several goroutines.
func (r ReedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
var wg sync.WaitGroup
left := byteCount
start := 0
@ -150,7 +182,7 @@ func (r ReedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
for iRow := 0; iRow < outputCount; iRow++ {
matrixRow := matrixRows[iRow]
var value byte
for c := 0; c < r.DataShardCount; c++ {
for c := 0; c < r.DataShards; c++ {
// note: manual inlining is slower
value ^= galMultiply(matrixRow[c], inputs[c][iByte])
}
@ -167,7 +199,7 @@ func (r ReedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
// checkSomeShards is nostly the same as codeSomeShards,
// except this will check values and return
// as soon as a difference is found.
func (r ReedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
if runtime.GOMAXPROCS(0) > 1 {
return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
}
@ -175,7 +207,7 @@ func (r ReedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outpu
for iRow := 0; iRow < outputCount; iRow++ {
matrixRow := matrixRows[iRow]
var value byte
for c := 0; c < r.DataShardCount; c++ {
for c := 0; c < r.DataShards; c++ {
// note: manual inlining is slower
value ^= galMultiply(matrixRow[c], inputs[c][iByte])
}
@ -188,7 +220,7 @@ func (r ReedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outpu
}
// Parallel version of checkSomeShards
func (r ReedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
var wg sync.WaitGroup
left := byteCount
start := 0
@ -209,7 +241,7 @@ func (r ReedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outp
for iRow := 0; iRow < outputCount; iRow++ {
matrixRow := matrixRows[iRow]
var value byte
for c := 0; c < r.DataShardCount; c++ {
for c := 0; c < r.DataShards; c++ {
// note: manual inlining is slower
value ^= galMultiply(matrixRow[c], inputs[c][iByte])
}
@ -272,7 +304,7 @@ func shardSize(shards [][]byte) int {
// Given a list of shards, some of which contain data, fills in the
// ones that don't have data.
//
// The length of the array must be equal to TotalShardCount.
// The length of the array must be equal to Shards.
// You indicate that a shard is missing by setting it to nil.
//
// If there are too few shards to reconstruct the missing
@ -280,8 +312,8 @@ func shardSize(shards [][]byte) int {
//
// The reconstructed shard set is complete, but integrity is not verified.
// Use the Verify function to check if data set is ok.
func (r ReedSolomon) Reconstruct(shards [][]byte) error {
if len(shards) != r.TotalShardCount {
func (r reedSolomon) Reconstruct(shards [][]byte) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
// Check arguments.
@ -295,19 +327,19 @@ func (r ReedSolomon) Reconstruct(shards [][]byte) error {
// Quick check: are all of the shards present? If so, there's
// nothing to do.
numberPresent := 0
for i := 0; i < r.TotalShardCount; i++ {
for i := 0; i < r.Shards; i++ {
if len(shards[i]) != 0 {
numberPresent += 1
}
}
if numberPresent == r.TotalShardCount {
if numberPresent == r.Shards {
// Cool. All of the shards data data. We don't
// need to do anything.
return nil
}
// More complete sanity check
if numberPresent < r.DataShardCount {
if numberPresent < r.DataShards {
return ErrTooFewShards
}
@ -320,15 +352,15 @@ func (r ReedSolomon) Reconstruct(shards [][]byte) error {
// correspond to the rows of the submatrix. These shards
// will be the input to the decoding process that re-creates
// the missing data shards.
subMatrix, err := newMatrix(r.DataShardCount, r.DataShardCount)
subMatrix, err := newMatrix(r.DataShards, r.DataShards)
if err != nil {
return err
}
subShards := make([][]byte, r.DataShardCount)
subShards := make([][]byte, r.DataShards)
subMatrixRow := 0
for matrixRow := 0; matrixRow < r.TotalShardCount && subMatrixRow < r.DataShardCount; matrixRow++ {
for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
if len(shards[matrixRow]) != 0 {
for c := 0; c < r.DataShardCount; c++ {
for c := 0; c < r.DataShards; c++ {
subMatrix[subMatrixRow][c] = r.m[matrixRow][c]
}
subShards[subMatrixRow] = shards[matrixRow]
@ -352,11 +384,11 @@ func (r ReedSolomon) Reconstruct(shards [][]byte) error {
// have, and the output is the missing data shards. The computation
// is done using the special decode matrix we just built.
outputs := make([][]byte, r.ParityShardCount)
matrixRows := make([][]byte, r.ParityShardCount)
outputs := make([][]byte, r.ParityShards)
matrixRows := make([][]byte, r.ParityShards)
outputCount := 0
for iShard := 0; iShard < r.DataShardCount; iShard++ {
for iShard := 0; iShard < r.DataShards; iShard++ {
if len(shards[iShard]) == 0 {
shards[iShard] = make([]byte, shardSize)
outputs[outputCount] = shards[iShard]
@ -373,14 +405,14 @@ func (r ReedSolomon) Reconstruct(shards [][]byte) error {
// any that we just calculated. The output is whichever of the
// data shards were missing.
outputCount = 0
for iShard := r.DataShardCount; iShard < r.TotalShardCount; iShard++ {
for iShard := r.DataShards; iShard < r.Shards; iShard++ {
if len(shards[iShard]) == 0 {
shards[iShard] = make([]byte, shardSize)
outputs[outputCount] = shards[iShard]
matrixRows[outputCount] = r.parity[iShard-r.DataShardCount]
matrixRows[outputCount] = r.parity[iShard-r.DataShards]
outputCount++
}
}
r.codeSomeShards(matrixRows, shards[:r.DataShardCount], outputs[:outputCount], outputCount, shardSize)
r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
return nil
}

View File

@ -24,7 +24,7 @@ func TestEncoding(t *testing.T) {
}
rand.Seed(0)
for s := 0; s < r.DataShardCount; s++ {
for s := 0; s < 13; s++ {
fillRandom(shards[s])
}
@ -53,7 +53,7 @@ func TestReconstruct(t *testing.T) {
}
rand.Seed(0)
for s := 0; s < r.DataShardCount; s++ {
for s := 0; s < 13; s++ {
fillRandom(shards[s])
}
@ -142,13 +142,13 @@ func benchmarkEncode(b *testing.B, dataShards, parityShards, shardSize int) {
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, r.TotalShardCount)
shards := make([][]byte, dataShards+parityShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < r.DataShardCount; s++ {
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
@ -184,13 +184,13 @@ func benchmarkVerify(b *testing.B, dataShards, parityShards, shardSize int) {
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, r.TotalShardCount)
shards := make([][]byte, parityShards+dataShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < r.DataShardCount; s++ {
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)