Add progressive erasure shard encoding (#170)

* Add progressive erasure shard encoding

Allow progressively building erasure shards.
master
Klaus Post 2021-10-26 05:39:44 -07:00 committed by GitHub
parent 7761c8f7cd
commit 0eef97bb02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 216 additions and 1 deletions

View File

@ -58,6 +58,63 @@ func ExampleEncoder() {
// ok
}
// Simple example of how to use all functions of the EncoderIdx.
// Note that all error checks have been removed to keep it short.
func ExampleEncoder_EncodeIdx() {
const dataShards = 7
const erasureShards = 3
// Create some sample data
var data = make([]byte, 250000)
fillRandom(data)
// Create an encoder with 17 data and 3 parity slices.
enc, _ := reedsolomon.New(dataShards, erasureShards)
// Split the data into shards
shards, _ := enc.Split(data)
// Zero erasure shards.
for i := 0; i < erasureShards; i++ {
clear := shards[dataShards+i]
for j := range clear {
clear[j] = 0
}
}
for i := 0; i < dataShards; i++ {
// Encode one shard at the time.
// Note how this gives linear access.
// There is however no requirement on shards being delivered in order.
// All parity shards will be updated on each run.
_ = enc.EncodeIdx(shards[i], i, shards[dataShards:])
}
// Verify the parity set
ok, err := enc.Verify(shards)
if ok {
fmt.Println("ok")
} else {
fmt.Println(err)
}
// Delete two shards
shards[dataShards-2], shards[dataShards-2] = nil, nil
// Reconstruct the shards
_ = enc.Reconstruct(shards)
// Verify the data set
ok, err = enc.Verify(shards)
if ok {
fmt.Println("ok")
} else {
fmt.Println(err)
}
// Output: ok
// ok
}
// This demonstrates that shards can be arbitrary sliced and
// merged and still remain valid.
func ExampleEncoder_slicing() {

View File

@ -32,6 +32,12 @@ type Encoder interface {
// data shards while this is running.
Encode(shards [][]byte) error
// EncodeIdx will add parity for a single data shard.
// Parity shards should start out as 0. The caller must zero them.
// Data shards must be delivered exactly once. There is no check for this.
// The parity shards will always be updated and the data shards will remain the same.
EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
// Verify returns true if the parity shards contain correct data.
// The data is the same format as Encode. No data is modified, so
// you are allowed to read from data while this is running.
@ -396,6 +402,48 @@ func (r *reedSolomon) Encode(shards [][]byte) error {
return nil
}
// EncodeIdx will add parity for a single data shard.
// Parity shards should start out zeroed. The caller must zero them before first call.
// Data shards should only be delivered once. There is no check for this.
// The parity shards will always be updated and the data shards will remain the unchanged.
func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
if len(parity) != r.ParityShards {
return ErrTooFewShards
}
if len(parity) == 0 {
return nil
}
if idx < 0 || idx >= r.DataShards {
return ErrInvShardNum
}
err := checkShards(parity, false)
if err != nil {
return err
}
if len(parity[0]) != len(dataShard) {
return ErrShardSize
}
// Process using no goroutines for now.
start, end := 0, r.o.perRound
if end > len(dataShard) {
end = len(dataShard)
}
for start < len(dataShard) {
in := dataShard[start:end]
for iRow := 0; iRow < r.ParityShards; iRow++ {
galMulSliceXor(r.parity[iRow][idx], in, parity[iRow][start:end], &r.o)
}
start = end
end += r.o.perRound
if end > len(dataShard) {
end = len(dataShard)
}
}
return nil
}
// ErrInvalidInput is returned if invalid input parameter of Update.
var ErrInvalidInput = errors.New("invalid input")

View File

@ -171,10 +171,18 @@ func TestEncoding(t *testing.T) {
t.Run("default", func(t *testing.T) {
testEncoding(t, testOptions()...)
})
t.Run("default-dx", func(t *testing.T) {
testEncodingIdx(t, testOptions()...)
})
for i, o := range testOpts() {
t.Run(fmt.Sprintf("opt-%d", i), func(t *testing.T) {
testEncoding(t, o...)
})
if !testing.Short() {
t.Run(fmt.Sprintf("idx-opt-%d", i), func(t *testing.T) {
testEncodingIdx(t, o...)
})
}
}
}
@ -207,7 +215,7 @@ func testEncoding(t *testing.T, o ...Option) {
shards[s] = make([]byte, perShard)
}
for s := 0; s < data; s++ {
for s := 0; s < len(shards); s++ {
rng.Read(shards[s])
}
@ -281,6 +289,108 @@ func testEncoding(t *testing.T, o ...Option) {
}
}
func testEncodingIdx(t *testing.T, o ...Option) {
for _, size := range testSizes {
data, parity := size[0], size[1]
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
sz := testDataSizes
if testing.Short() {
sz = testDataSizesShort
}
for _, perShard := range sz {
t.Run(fmt.Sprint(perShard), func(t *testing.T) {
r, err := New(data, parity, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, data+parity)
for s := range shards {
shards[s] = make([]byte, perShard)
}
shuffle := make([]int, data)
for i := range shuffle {
shuffle[i] = i
}
rng.Shuffle(len(shuffle), func(i, j int) { shuffle[i], shuffle[j] = shuffle[j], shuffle[i] })
// Send shards in random order.
for s := 0; s < data; s++ {
s := shuffle[s]
rng.Read(shards[s])
err = r.EncodeIdx(shards[s], s, shards[data:])
if err != nil {
t.Fatal(err)
}
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
if parity == 0 {
// Check that Reconstruct and ReconstructData do nothing
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
// Skip integrity checks
return
}
// Delete one in data
idx := rng.Intn(data)
want := shards[idx]
shards[idx] = nil
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not ReconstructData correctly")
}
// Delete one randomly
idx = rng.Intn(data + parity)
want = shards[idx]
shards[idx] = nil
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not Reconstruct correctly")
}
err = r.Encode(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
// Make one too short.
shards[idx] = shards[idx][:perShard-1]
err = r.Encode(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrShardSize, err)
}
})
}
})
}
}
func TestUpdate(t *testing.T) {
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {