reedsolomon-go/reedsolomon_test.go

1868 lines
44 KiB
Go
Raw Permalink Normal View History

2015-06-19 17:31:24 +03:00
/**
* Unit tests for ReedSolomon
*
* Copyright 2015, Klaus Post
* Copyright 2015, Backblaze, Inc. All rights reserved.
*/
package reedsolomon
import (
2015-08-07 06:37:51 +03:00
"bytes"
"flag"
"fmt"
2015-06-19 17:31:24 +03:00
"math/rand"
"os"
2015-08-07 07:21:24 +03:00
"runtime"
"strconv"
2015-06-19 17:31:24 +03:00
"testing"
)
var noSSE2 = flag.Bool("no-sse2", !defaultOptions.useSSE2, "Disable SSE2")
var noSSSE3 = flag.Bool("no-ssse3", !defaultOptions.useSSSE3, "Disable SSSE3")
var noAVX2 = flag.Bool("no-avx2", !defaultOptions.useAVX2, "Disable AVX2")
var noAVX512 = flag.Bool("no-avx512", !defaultOptions.useAVX512, "Disable AVX512")
func TestMain(m *testing.M) {
flag.Parse()
os.Exit(m.Run())
}
func testOptions(o ...Option) []Option {
o = append(o, WithFastOneParityMatrix())
if *noSSSE3 {
o = append(o, WithSSSE3(false))
}
if *noSSE2 {
o = append(o, WithSSE2(false))
}
if *noAVX2 {
o = append(o, WithAVX2(false))
}
if *noAVX512 {
o = append(o, WithAVX512(false))
}
return o
}
func isIncreasingAndContainsDataRow(indices []int) bool {
cols := len(indices)
for i := 0; i < cols-1; i++ {
if indices[i] >= indices[i+1] {
return false
}
}
// Data rows are in the upper square portion of the matrix.
return indices[0] < cols
}
func incrementIndices(indices []int, indexBound int) (valid bool) {
for i := len(indices) - 1; i >= 0; i-- {
indices[i]++
if indices[i] < indexBound {
break
}
if i == 0 {
return false
}
indices[i] = 0
}
return true
}
func incrementIndicesUntilIncreasingAndContainsDataRow(
indices []int, maxIndex int) bool {
for {
valid := incrementIndices(indices, maxIndex)
if !valid {
return false
}
if isIncreasingAndContainsDataRow(indices) {
return true
}
}
}
func findSingularSubMatrix(m matrix) (matrix, error) {
rows := len(m)
cols := len(m[0])
rowIndices := make([]int, cols)
for incrementIndicesUntilIncreasingAndContainsDataRow(rowIndices, rows) {
subMatrix, _ := newMatrix(cols, cols)
for i, r := range rowIndices {
for c := 0; c < cols; c++ {
subMatrix[i][c] = m[r][c]
}
}
_, err := subMatrix.Invert()
if err == errSingular {
return subMatrix, nil
} else if err != nil {
return nil, err
}
}
return nil, nil
}
func TestBuildMatrixJerasure(t *testing.T) {
totalShards := 12
dataShards := 8
m, err := buildMatrixJerasure(dataShards, totalShards)
if err != nil {
t.Fatal(err)
}
refMatrix := matrix{
{1, 1, 1, 1, 1, 1, 1, 1},
{1, 55, 39, 73, 84, 181, 225, 217},
{1, 39, 217, 161, 92, 60, 172, 90},
{1, 172, 70, 235, 143, 34, 200, 101},
}
for i := 0; i < 8; i++ {
for j := 0; j < 8; j++ {
if i != j && m[i][j] != 0 || i == j && m[i][j] != 1 {
t.Fatal("Top part of the matrix is not identity")
}
}
}
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
if m[8+i][j] != refMatrix[i][j] {
t.Fatal("Coding matrix for EC 8+4 differs from Jerasure")
}
}
}
}
func TestBuildMatrixPAR1Singular(t *testing.T) {
totalShards := 8
dataShards := 4
m, err := buildMatrixPAR1(dataShards, totalShards)
if err != nil {
t.Fatal(err)
}
singularSubMatrix, err := findSingularSubMatrix(m)
if err != nil {
t.Fatal(err)
}
if singularSubMatrix == nil {
t.Fatal("No singular sub-matrix found")
}
t.Logf("matrix %s has singular sub-matrix %s", m, singularSubMatrix)
}
func testOpts() [][]Option {
if testing.Short() {
return [][]Option{
{WithPAR1Matrix()}, {WithCauchyMatrix()},
}
}
opts := [][]Option{
{WithPAR1Matrix()}, {WithCauchyMatrix()},
{WithFastOneParityMatrix()}, {WithPAR1Matrix(), WithFastOneParityMatrix()}, {WithCauchyMatrix(), WithFastOneParityMatrix()},
{WithMaxGoroutines(1), WithMinSplitSize(500), WithSSSE3(false), WithAVX2(false), WithAVX512(false)},
{WithMaxGoroutines(5000), WithMinSplitSize(50), WithSSSE3(false), WithAVX2(false), WithAVX512(false)},
{WithMaxGoroutines(5000), WithMinSplitSize(500000), WithSSSE3(false), WithAVX2(false), WithAVX512(false)},
{WithMaxGoroutines(1), WithMinSplitSize(500000), WithSSSE3(false), WithAVX2(false), WithAVX512(false)},
Split blocks into size divisible by 16 Older systems (typically without AVX2) are more sensitive to misaligned load+stores. Add parameter to automatically set the number of goroutines. name old time/op new time/op delta Encode10x2x10000-8 18.4µs ± 1% 16.1µs ± 1% -12.43% (p=0.000 n=9+9) Encode100x20x10000-8 692µs ± 1% 608µs ± 1% -12.10% (p=0.000 n=10+10) Encode17x3x1M-8 1.78ms ± 5% 1.49ms ± 1% -16.63% (p=0.000 n=10+10) Encode10x4x16M-8 21.5ms ± 5% 19.6ms ± 4% -8.74% (p=0.000 n=10+9) Encode5x2x1M-8 343µs ± 2% 267µs ± 2% -22.22% (p=0.000 n=9+10) Encode10x2x1M-8 858µs ± 5% 701µs ± 5% -18.34% (p=0.000 n=10+10) Encode10x4x1M-8 1.34ms ± 1% 1.16ms ± 1% -13.19% (p=0.000 n=9+9) Encode50x20x1M-8 30.3ms ± 4% 25.0ms ± 2% -17.51% (p=0.000 n=10+8) Encode17x3x16M-8 26.9ms ± 1% 24.5ms ± 4% -9.13% (p=0.000 n=8+10) name old speed new speed delta Encode10x2x10000-8 5.45GB/s ± 1% 6.22GB/s ± 1% +14.20% (p=0.000 n=9+9) Encode100x20x10000-8 1.44GB/s ± 1% 1.64GB/s ± 1% +13.77% (p=0.000 n=10+10) Encode17x3x1M-8 10.0GB/s ± 5% 12.0GB/s ± 1% +19.88% (p=0.000 n=10+10) Encode10x4x16M-8 7.81GB/s ± 5% 8.56GB/s ± 5% +9.58% (p=0.000 n=10+9) Encode5x2x1M-8 15.3GB/s ± 2% 19.6GB/s ± 2% +28.57% (p=0.000 n=9+10) Encode10x2x1M-8 12.2GB/s ± 5% 15.0GB/s ± 5% +22.45% (p=0.000 n=10+10) Encode10x4x1M-8 7.84GB/s ± 1% 9.03GB/s ± 1% +15.19% (p=0.000 n=9+9) Encode50x20x1M-8 1.73GB/s ± 4% 2.09GB/s ± 4% +20.59% (p=0.000 n=10+9) Encode17x3x16M-8 10.6GB/s ± 1% 11.7GB/s ± 4% +10.12% (p=0.000 n=8+10)
2017-11-18 19:37:40 +03:00
{WithAutoGoroutines(50000), WithMinSplitSize(500)},
{WithInversionCache(false)},
}
for _, o := range opts[:] {
if defaultOptions.useSSSE3 {
n := make([]Option, len(o), len(o)+1)
copy(n, o)
n = append(n, WithSSSE3(true))
opts = append(opts, n)
}
if defaultOptions.useAVX2 {
n := make([]Option, len(o), len(o)+1)
copy(n, o)
n = append(n, WithAVX2(true))
opts = append(opts, n)
}
if defaultOptions.useAVX512 {
n := make([]Option, len(o), len(o)+1)
copy(n, o)
n = append(n, WithAVX512(true))
opts = append(opts, n)
}
}
return opts
}
2015-06-19 17:31:24 +03:00
func TestEncoding(t *testing.T) {
t.Run("default", func(t *testing.T) {
testEncoding(t, testOptions()...)
})
t.Run("default-dx", func(t *testing.T) {
testEncodingIdx(t, testOptions()...)
})
for i, o := range testOpts() {
t.Run(fmt.Sprintf("opt-%d", i), func(t *testing.T) {
testEncoding(t, o...)
})
if !testing.Short() {
t.Run(fmt.Sprintf("idx-opt-%d", i), func(t *testing.T) {
testEncodingIdx(t, o...)
})
}
}
}
// matrix sizes to test.
// note that par1 matric will fail on some combinations.
2021-03-08 18:13:24 +03:00
var testSizes = [][2]int{
{1, 0}, {3, 0}, {5, 0}, {8, 0}, {10, 0}, {12, 0}, {14, 0}, {41, 0}, {49, 0},
{1, 1}, {1, 2}, {3, 3}, {3, 1}, {5, 3}, {8, 4}, {10, 30}, {12, 10}, {14, 7}, {41, 17}, {49, 1}, {5, 20},
{256, 20}, {500, 300}, {2945, 129},
}
var testDataSizes = []int{10, 100, 1000, 10001, 100003, 1000055}
var testDataSizesShort = []int{10, 10001, 100003}
2015-08-07 06:01:54 +03:00
func testEncoding(t *testing.T, o ...Option) {
for _, size := range testSizes {
data, parity := size[0], size[1]
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
sz := testDataSizes
if testing.Short() || data+parity > 256 {
sz = testDataSizesShort
}
for _, perShard := range sz {
if data+parity > 256 {
if perShard > 1000 {
t.Skip("long tests not needed. Not length sensitive")
}
// Round up to 64 bytes.
perShard = (perShard + 63) &^ 63
}
t.Run(fmt.Sprint(perShard), func(t *testing.T) {
r, err := New(data, parity, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, data+parity)
for s := range shards {
shards[s] = make([]byte, perShard)
}
for s := 0; s < len(shards); s++ {
rng.Read(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
2021-03-08 18:13:24 +03:00
if parity == 0 {
// Check that Reconstruct and ReconstructData do nothing
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
// Skip integrity checks
return
}
// Delete one in data
idx := rng.Intn(data)
want := shards[idx]
shards[idx] = nil
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not ReconstructData correctly")
}
// Delete one randomly
idx = rng.Intn(data + parity)
want = shards[idx]
shards[idx] = nil
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not Reconstruct correctly")
}
err = r.Encode(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
// Make one too short.
shards[idx] = shards[idx][:perShard-1]
err = r.Encode(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrShardSize, err)
}
})
}
})
}
}
func testEncodingIdx(t *testing.T, o ...Option) {
for _, size := range testSizes {
data, parity := size[0], size[1]
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
if data+parity > 256 {
t.Skip("EncodingIdx not supported for total shards > 256")
}
sz := testDataSizes
if testing.Short() {
sz = testDataSizesShort
}
for _, perShard := range sz {
t.Run(fmt.Sprint(perShard), func(t *testing.T) {
r, err := New(data, parity, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, data+parity)
for s := range shards {
shards[s] = make([]byte, perShard)
}
shuffle := make([]int, data)
for i := range shuffle {
shuffle[i] = i
}
rng.Shuffle(len(shuffle), func(i, j int) { shuffle[i], shuffle[j] = shuffle[j], shuffle[i] })
// Send shards in random order.
for s := 0; s < data; s++ {
s := shuffle[s]
rng.Read(shards[s])
err = r.EncodeIdx(shards[s], s, shards[data:])
if err != nil {
t.Fatal(err)
}
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
if parity == 0 {
// Check that Reconstruct and ReconstructData do nothing
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
err = r.Reconstruct(shards)
if err != nil {
2021-03-08 18:13:24 +03:00
t.Fatal(err)
}
// Skip integrity checks
return
}
// Delete one in data
idx := rng.Intn(data)
want := shards[idx]
shards[idx] = nil
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not ReconstructData correctly")
}
// Delete one randomly
idx = rng.Intn(data + parity)
want = shards[idx]
shards[idx] = nil
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(shards[idx], want) {
t.Fatal("did not Reconstruct correctly")
}
err = r.Encode(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
// Make one too short.
shards[idx] = shards[idx][:perShard-1]
err = r.Encode(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrShardSize, err)
}
})
}
})
2015-08-07 06:01:54 +03:00
}
2015-06-19 17:31:24 +03:00
}
func TestUpdate(t *testing.T) {
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {
testUpdate(t, o...)
})
}
}
func testUpdate(t *testing.T, o ...Option) {
rand.Seed(0)
for _, size := range [][2]int{{10, 3}, {17, 2}} {
data, parity := size[0], size[1]
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
sz := testDataSizesShort
if testing.Short() {
sz = []int{50000}
}
for _, perShard := range sz {
t.Run(fmt.Sprint(perShard), func(t *testing.T) {
r, err := New(data, parity, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, data+parity)
for s := range shards {
shards[s] = make([]byte, perShard)
}
for s := range shards {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
newdatashards := make([][]byte, data)
for s := range newdatashards {
newdatashards[s] = make([]byte, perShard)
fillRandom(newdatashards[s])
err = r.Update(shards, newdatashards)
if err != nil {
t.Fatal(err)
}
shards[s] = newdatashards[s]
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
newdatashards[s] = nil
}
for s := 0; s < len(newdatashards)-1; s++ {
newdatashards[s] = make([]byte, perShard)
newdatashards[s+1] = make([]byte, perShard)
fillRandom(newdatashards[s])
fillRandom(newdatashards[s+1])
err = r.Update(shards, newdatashards)
if err != nil {
t.Fatal(err)
}
shards[s] = newdatashards[s]
shards[s+1] = newdatashards[s+1]
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
newdatashards[s] = nil
newdatashards[s+1] = nil
}
for newNum := 1; newNum <= data; newNum++ {
for s := 0; s <= data-newNum; s++ {
for i := 0; i < newNum; i++ {
newdatashards[s+i] = make([]byte, perShard)
fillRandom(newdatashards[s+i])
}
err = r.Update(shards, newdatashards)
if err != nil {
t.Fatal(err)
}
for i := 0; i < newNum; i++ {
shards[s+i] = newdatashards[s+i]
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
for i := 0; i < newNum; i++ {
newdatashards[s+i] = nil
}
}
}
})
}
})
}
}
2015-06-19 17:31:24 +03:00
func TestReconstruct(t *testing.T) {
testReconstruct(t)
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {
testReconstruct(t, o...)
})
}
}
func testReconstruct(t *testing.T, o ...Option) {
2015-06-19 17:31:24 +03:00
perShard := 50000
r, err := New(10, 3, testOptions(o...)...)
2015-06-19 17:31:24 +03:00
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 13)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < 13; s++ {
2015-06-19 17:31:24 +03:00
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with all shards present
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with 10 shards present. Use pre-allocated memory for one of them.
2015-06-19 17:31:24 +03:00
shards[0] = nil
shards[7] = nil
shard11 := shards[11]
shards[11] = shard11[:0]
fillRandom(shard11)
2015-06-19 17:31:24 +03:00
err = r.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
if &shard11[0] != &shards[11][0] {
t.Errorf("Shard was not reconstructed into pre-allocated memory")
}
// Reconstruct with 9 shards present (should fail)
shards[0] = nil
shards[4] = nil
shards[7] = nil
shards[11] = nil
err = r.Reconstruct(shards)
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
err = r.Reconstruct(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
err = r.Reconstruct(make([][]byte, 13))
if err != ErrShardNoData {
t.Errorf("expected %v, got %v", ErrShardNoData, err)
}
2015-06-19 17:31:24 +03:00
}
func TestReconstructCustom(t *testing.T) {
perShard := 50000
r, err := New(4, 3, WithCustomMatrix([][]byte{
{1, 1, 0, 0},
{0, 0, 1, 1},
{1, 2, 3, 4},
}))
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 7)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < len(shards); s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with 1 shard absent.
shards1 := make([][]byte, len(shards))
copy(shards1, shards)
shards1[0] = nil
err = r.Reconstruct(shards1)
if err != nil {
t.Fatal(err)
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
// Reconstruct with 3 shards absent.
copy(shards1, shards)
shards1[0] = nil
shards1[1] = nil
shards1[2] = nil
err = r.Reconstruct(shards1)
if err != nil {
t.Fatal(err)
}
ok, err = r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
}
func TestReconstructData(t *testing.T) {
testReconstructData(t)
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {
testReconstructData(t, o...)
})
}
}
func testReconstructData(t *testing.T, o ...Option) {
perShard := 100000
r, err := New(8, 5, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 13)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < 13; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with all shards present
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct 3 shards with 3 data and 5 parity shards
shardsCopy := make([][]byte, 13)
copy(shardsCopy, shards)
shardsCopy[2] = nil
shardsCopy[3] = nil
shardsCopy[4] = nil
shardsCopy[5] = nil
shardsCopy[6] = nil
shardsRequired := make([]bool, 8)
shardsRequired[3] = true
shardsRequired[4] = true
err = r.ReconstructSome(shardsCopy, shardsRequired)
if err != nil {
t.Fatal(err)
}
if 0 != bytes.Compare(shardsCopy[3], shards[3]) ||
0 != bytes.Compare(shardsCopy[4], shards[4]) {
t.Fatal("ReconstructSome did not reconstruct required shards correctly")
}
if shardsCopy[2] != nil || shardsCopy[5] != nil || shardsCopy[6] != nil {
t.Fatal("ReconstructSome reconstructed extra shards")
}
// Reconstruct with 10 shards present. Use pre-allocated memory for one of them.
shards[0] = nil
shards[2] = nil
shard4 := shards[4]
shards[4] = shard4[:0]
fillRandom(shard4)
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
// Since all parity shards are available, verification will succeed
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("Verification failed")
}
if &shard4[0] != &shards[4][0] {
t.Errorf("Shard was not reconstructed into pre-allocated memory")
}
// Reconstruct with 6 data and 4 parity shards
shards[0] = nil
shards[2] = nil
shards[12] = nil
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
// Verification will fail now due to absence of a parity block
_, err = r.Verify(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
// Reconstruct with 7 data and 1 parity shards
shards[0] = nil
shards[9] = nil
shards[10] = nil
shards[11] = nil
shards[12] = nil
err = r.ReconstructData(shards)
if err != nil {
t.Fatal(err)
}
_, err = r.Verify(shards)
if err != ErrShardSize {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
// Reconstruct with 6 data and 1 parity shards (should fail)
shards[0] = nil
shards[1] = nil
shards[9] = nil
shards[10] = nil
shards[11] = nil
shards[12] = nil
err = r.ReconstructData(shards)
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
err = r.ReconstructData(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
err = r.ReconstructData(make([][]byte, 13))
if err != ErrShardNoData {
t.Errorf("expected %v, got %v", ErrShardNoData, err)
}
}
func TestReconstructPAR1Singular(t *testing.T) {
perShard := 50
r, err := New(4, 4, testOptions(WithPAR1Matrix())...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 8)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < 8; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Reconstruct with only the last data shard present, and the
// first, second, and fourth parity shard present (based on
// the result of TestBuildMatrixPAR1Singular). This should
// fail.
shards[0] = nil
shards[1] = nil
shards[2] = nil
shards[6] = nil
err = r.Reconstruct(shards)
if err != errSingular {
t.Fatal(err)
t.Errorf("expected %v, got %v", errSingular, err)
}
}
func TestVerify(t *testing.T) {
testVerify(t)
for i, o := range testOpts() {
t.Run(fmt.Sprintf("options %d", i), func(t *testing.T) {
testVerify(t, o...)
})
}
}
func testVerify(t *testing.T, o ...Option) {
perShard := 33333
r, err := New(10, 4, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards := make([][]byte, 14)
for s := range shards {
shards[s] = make([]byte, perShard)
}
rand.Seed(0)
for s := 0; s < 10; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
ok, err := r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
Faster AVX2 encoding (#153) * Remove 50% of bounds checks when copying. * Use RIP only addressing, free one register. ``` benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 57663.49 58005.87 1.01x BenchmarkGalois1M-32 49479.31 49848.29 1.01x BenchmarkGaloisXor128K-32 46310.69 46501.88 1.00x BenchmarkGaloisXor1M-32 43804.86 43984.39 1.00x BenchmarkEncode10x2x10000-32 25926.93 27457.75 1.06x BenchmarkEncode100x20x10000-32 2635.82 2818.95 1.07x BenchmarkEncode17x3x1M-32 63215.11 61576.76 0.97x BenchmarkEncode10x4x16M-32 19551.54 19505.07 1.00x BenchmarkEncode5x2x1M-32 79612.06 81985.14 1.03x BenchmarkEncode10x2x1M-32 121478.29 127739.41 1.05x BenchmarkEncode10x4x1M-32 70757.61 74423.67 1.05x BenchmarkEncode50x20x1M-32 19811.96 20103.32 1.01x BenchmarkEncode17x3x16M-32 27202.10 27825.34 1.02x BenchmarkEncode_8x4x8M-32 19029.04 19701.31 1.04x BenchmarkEncode_12x4x12M-32 22449.87 22480.51 1.00x BenchmarkEncode_16x4x16M-32 24536.74 24672.24 1.01x BenchmarkEncode_16x4x32M-32 24381.34 24981.99 1.02x BenchmarkEncode_16x4x64M-32 24717.69 25086.94 1.01x BenchmarkEncode_8x5x8M-32 16763.51 17154.04 1.02x BenchmarkEncode_8x6x8M-32 15067.22 15205.87 1.01x BenchmarkEncode_8x7x8M-32 13156.38 13589.40 1.03x BenchmarkEncode_8x9x8M-32 11363.74 11523.70 1.01x BenchmarkEncode_8x10x8M-32 10359.37 10474.91 1.01x BenchmarkEncode_8x11x8M-32 9627.07 9463.24 0.98x BenchmarkEncode_8x8x05M-32 30104.80 32634.89 1.08x BenchmarkEncode_8x8x1M-32 36497.28 36425.88 1.00x BenchmarkEncode_8x8x8M-32 12186.19 11602.41 0.95x BenchmarkEncode_8x8x32M-32 11670.72 11413.71 0.98x BenchmarkEncode_24x8x24M-32 21709.83 21652.50 1.00x BenchmarkEncode_24x8x48M-32 22494.40 22280.59 0.99x BenchmarkVerify10x2x10000-32 10567.56 10483.91 0.99x BenchmarkVerify50x5x50000-32 28102.84 27923.63 0.99x BenchmarkVerify10x2x1M-32 30298.33 30106.18 0.99x BenchmarkVerify5x2x1M-32 16115.91 15847.03 0.98x BenchmarkVerify10x4x1M-32 15382.13 14852.68 0.97x BenchmarkVerify50x20x1M-32 8476.02 8466.24 1.00x BenchmarkVerify10x4x16M-32 15101.03 15434.71 1.02x BenchmarkReconstruct10x2x10000-32 26228.18 26960.19 1.03x BenchmarkReconstruct50x5x50000-32 31091.42 30975.82 1.00x BenchmarkReconstruct10x2x1M-32 58548.87 60281.92 1.03x BenchmarkReconstruct5x2x1M-32 39499.23 41791.80 1.06x BenchmarkReconstruct10x4x1M-32 41448.60 43053.15 1.04x BenchmarkReconstruct50x20x1M-32 17185.99 17354.67 1.01x BenchmarkReconstruct10x4x16M-32 18798.60 18847.43 1.00x BenchmarkReconstructData10x2x10000-32 27208.48 27538.38 1.01x BenchmarkReconstructData50x5x50000-32 32135.65 32078.91 1.00x BenchmarkReconstructData10x2x1M-32 63180.19 67332.17 1.07x BenchmarkReconstructData5x2x1M-32 47532.85 49932.17 1.05x BenchmarkReconstructData10x4x1M-32 50059.14 52323.15 1.05x BenchmarkReconstructData50x20x1M-32 26679.75 26714.11 1.00x BenchmarkReconstructData10x4x16M-32 24854.99 24527.23 0.99x BenchmarkReconstructP10x2x10000-32 115089.87 113229.75 0.98x BenchmarkReconstructP10x5x20000-32 129838.75 132871.10 1.02x BenchmarkParallel_8x8x64K-32 69951.43 69980.44 1.00x BenchmarkParallel_8x8x05M-32 11752.94 11724.35 1.00x BenchmarkParallel_20x10x05M-32 18553.93 18613.33 1.00x BenchmarkParallel_8x8x1M-32 11639.19 11746.86 1.01x BenchmarkParallel_8x8x8M-32 11799.36 11685.63 0.99x BenchmarkParallel_8x8x32M-32 11510.94 11791.72 1.02x BenchmarkParallel_8x3x1M-32 20268.92 20678.21 1.02x BenchmarkParallel_8x4x1M-32 17616.05 17856.17 1.01x BenchmarkParallel_8x5x1M-32 15590.87 15872.42 1.02x BenchmarkStreamEncode10x2x10000-32 14917.08 15408.39 1.03x BenchmarkStreamEncode100x20x10000-32 2014.81 2077.31 1.03x BenchmarkStreamEncode17x3x1M-32 11839.37 12434.80 1.05x BenchmarkStreamEncode10x4x16M-32 9151.14 9206.98 1.01x BenchmarkStreamEncode5x2x1M-32 13598.55 13663.56 1.00x BenchmarkStreamEncode10x2x1M-32 13192.91 13453.41 1.02x BenchmarkStreamEncode10x4x1M-32 12109.90 12050.68 1.00x BenchmarkStreamEncode50x20x1M-32 8640.73 8370.10 0.97x BenchmarkStreamEncode17x3x16M-32 10473.17 10527.04 1.01x BenchmarkStreamVerify10x2x10000-32 7032.23 7128.82 1.01x BenchmarkStreamVerify50x5x50000-32 13023.46 13109.31 1.01x BenchmarkStreamVerify10x2x1M-32 11941.63 11949.91 1.00x BenchmarkStreamVerify5x2x1M-32 8029.93 8263.39 1.03x BenchmarkStreamVerify10x4x1M-32 8137.82 8271.11 1.02x BenchmarkStreamVerify50x20x1M-32 7378.87 7708.81 1.04x BenchmarkStreamVerify10x4x16M-32 8973.18 8955.29 1.00x ```
2020-11-10 16:39:23 +03:00
t.Error("Verification failed")
return
}
2015-08-07 06:06:30 +03:00
// Put in random data. Verification should fail
fillRandom(shards[10])
ok, err = r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if ok {
t.Fatal("Verification did not fail")
}
// Re-encode
err = r.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Fill a data segment with random data
fillRandom(shards[0])
ok, err = r.Verify(shards)
if err != nil {
t.Fatal(err)
}
if ok {
t.Fatal("Verification did not fail")
}
2015-08-07 06:06:30 +03:00
_, err = r.Verify(make([][]byte, 1))
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
_, err = r.Verify(make([][]byte, 14))
if err != ErrShardNoData {
t.Errorf("expected %v, got %v", ErrShardNoData, err)
}
}
2015-06-19 17:31:24 +03:00
func TestOneEncode(t *testing.T) {
codec, err := New(5, 5, testOptions()...)
2015-06-19 17:31:24 +03:00
if err != nil {
t.Fatal(err)
}
2015-08-07 05:56:32 +03:00
shards := [][]byte{
{0, 1},
{4, 5},
{2, 3},
{6, 7},
{8, 9},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
}
2015-06-19 17:31:24 +03:00
codec.Encode(shards)
if shards[5][0] != 12 || shards[5][1] != 13 {
t.Fatal("shard 5 mismatch")
}
if shards[6][0] != 10 || shards[6][1] != 11 {
t.Fatal("shard 6 mismatch")
}
if shards[7][0] != 14 || shards[7][1] != 15 {
t.Fatal("shard 7 mismatch")
}
if shards[8][0] != 90 || shards[8][1] != 91 {
t.Fatal("shard 8 mismatch")
}
if shards[9][0] != 94 || shards[9][1] != 95 {
t.Fatal("shard 9 mismatch")
}
ok, err := codec.Verify(shards)
if err != nil {
t.Fatal(err)
}
if !ok {
t.Fatal("did not verify")
}
2015-06-20 11:11:33 +03:00
shards[8][0]++
2015-06-19 17:31:24 +03:00
ok, err = codec.Verify(shards)
if err != nil {
t.Fatal(err)
}
if ok {
t.Fatal("verify did not fail as expected")
}
}
func fillRandom(p []byte) {
for i := 0; i < len(p); i += 7 {
val := rand.Int63()
for j := 0; i+j < len(p) && j < 7; j++ {
p[i+j] = byte(val)
val >>= 8
}
2015-06-19 17:31:24 +03:00
}
}
func benchmarkEncode(b *testing.B, dataShards, parityShards, shardSize int) {
r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
2015-06-19 17:31:24 +03:00
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, dataShards+parityShards)
2015-06-19 17:31:24 +03:00
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < dataShards; s++ {
2015-06-19 17:31:24 +03:00
fillRandom(shards[s])
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
2015-06-19 17:31:24 +03:00
b.ResetTimer()
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
b.ReportAllocs()
2015-06-19 17:31:24 +03:00
for i := 0; i < b.N; i++ {
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
}
}
Wider AVX2 loops and less usage. (#162) * Experiment with 64 bytes/loop AVX2 * Only reduce when doing 64. * Use no more than 8 goroutines for avx2 codegen. ``` name old speed new speed delta Encode10x2x10000-32 33.3GB/s ± 0% 37.5GB/s ± 1% +12.49% (p=0.000 n=9+10) Encode100x20x10000-32 3.79GB/s ± 5% 3.77GB/s ± 5% ~ (p=0.853 n=10+10) Encode17x3x1M-32 78.2GB/s ± 1% 76.0GB/s ± 6% ~ (p=0.123 n=10+10) Encode10x4x16M-32 28.3GB/s ± 0% 27.7GB/s ± 2% -2.32% (p=0.000 n=8+10) Encode5x2x1M-32 112GB/s ± 1% 113GB/s ± 1% ~ (p=0.796 n=10+10) Encode10x2x1M-32 149GB/s ± 1% 129GB/s ± 3% -13.24% (p=0.000 n=9+10) Encode10x4x1M-32 99.1GB/s ± 1% 91.5GB/s ± 3% -7.74% (p=0.000 n=10+10) Encode50x20x1M-32 19.7GB/s ± 1% 19.8GB/s ± 1% ~ (p=0.447 n=9+10) Encode17x3x16M-32 33.4GB/s ± 0% 33.3GB/s ± 1% -0.46% (p=0.043 n=10+9) Encode_8x4x8M-32 30.1GB/s ± 1% 29.4GB/s ± 3% -2.31% (p=0.000 n=10+10) Encode_12x4x12M-32 30.6GB/s ± 0% 30.5GB/s ± 0% ~ (p=0.720 n=10+9) Encode_16x4x16M-32 31.5GB/s ± 0% 31.5GB/s ± 0% ~ (p=0.497 n=10+9) Encode_16x4x32M-32 31.9GB/s ± 0% 31.5GB/s ± 4% ~ (p=0.165 n=10+10) Encode_16x4x64M-32 32.4GB/s ± 0% 32.3GB/s ± 0% ~ (p=0.321 n=9+8) Encode_8x5x8M-32 28.4GB/s ± 0% 28.4GB/s ± 1% ~ (p=0.237 n=10+8) Encode_8x6x8M-32 27.0GB/s ± 0% 27.2GB/s ± 2% ~ (p=0.075 n=10+10) Encode_8x7x8M-32 26.0GB/s ± 1% 25.8GB/s ± 1% -0.53% (p=0.003 n=9+10) Encode_8x9x8M-32 24.6GB/s ± 1% 24.4GB/s ± 1% -0.63% (p=0.000 n=10+10) Encode_8x10x8M-32 23.7GB/s ± 1% 23.7GB/s ± 0% +0.32% (p=0.035 n=10+9) Encode_8x11x8M-32 23.0GB/s ± 1% 22.8GB/s ± 0% -0.59% (p=0.000 n=9+8) Encode_8x8x05M-32 66.4GB/s ± 1% 64.2GB/s ± 1% -3.32% (p=0.000 n=10+10) Encode_8x8x1M-32 56.7GB/s ± 0% 75.7GB/s ± 2% +33.55% (p=0.000 n=9+9) Encode_8x8x8M-32 24.9GB/s ± 0% 24.9GB/s ± 1% ~ (p=0.146 n=8+10) Encode_8x8x32M-32 23.8GB/s ± 0% 23.4GB/s ± 0% -1.42% (p=0.000 n=9+10) Encode_24x8x24M-32 29.9GB/s ± 0% 29.9GB/s ± 0% ~ (p=0.278 n=10+9) Encode_24x8x48M-32 30.7GB/s ± 1% 30.7GB/s ± 0% ~ (p=0.351 n=9+7) StreamEncode10x2x10000-32 15.5GB/s ± 1% 16.5GB/s ± 0% +6.53% (p=0.000 n=10+9) StreamEncode100x20x10000-32 2.09GB/s ± 1% 2.06GB/s ± 2% -1.78% (p=0.000 n=10+10) StreamEncode17x3x1M-32 12.2GB/s ± 2% 12.3GB/s ± 1% +1.19% (p=0.008 n=10+9) StreamEncode10x4x16M-32 8.68GB/s ± 0% 9.47GB/s ± 1% +9.05% (p=0.000 n=8+10) StreamEncode5x2x1M-32 12.3GB/s ± 1% 13.2GB/s ± 1% +7.61% (p=0.000 n=10+10) StreamEncode10x2x1M-32 11.5GB/s ± 4% 13.3GB/s ± 2% +15.15% (p=0.000 n=10+7) ```
2021-06-21 16:15:23 +03:00
func BenchmarkEncode2x1x1M(b *testing.B) {
benchmarkEncode(b, 2, 1, 1024*1024)
}
// Benchmark 800 data slices with 200 parity slices
func BenchmarkEncode800x200(b *testing.B) {
for size := 64; size <= 1<<20; size *= 4 {
b.Run(fmt.Sprintf("%v", size), func(b *testing.B) {
benchmarkEncode(b, 800, 200, size)
})
}
}
func BenchmarkEncodeLeopard(b *testing.B) {
size := (64 << 20) / 800 / 64 * 64
b.Run(strconv.Itoa(size), func(b *testing.B) {
benchmarkEncode(b, 800, 200, size)
})
}
2015-06-20 11:11:33 +03:00
func BenchmarkEncode10x2x10000(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkEncode(b, 10, 2, 10000)
}
2015-06-20 11:11:33 +03:00
func BenchmarkEncode100x20x10000(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkEncode(b, 100, 20, 10000)
}
2015-06-20 11:11:33 +03:00
func BenchmarkEncode17x3x1M(b *testing.B) {
2015-06-19 19:25:48 +03:00
benchmarkEncode(b, 17, 3, 1024*1024)
}
2015-06-19 17:31:24 +03:00
// Benchmark 10 data shards and 4 parity shards with 16MB each.
2015-06-20 11:11:33 +03:00
func BenchmarkEncode10x4x16M(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkEncode(b, 10, 4, 16*1024*1024)
}
// Benchmark 5 data shards and 2 parity shards with 1MB each.
2015-06-20 21:51:26 +03:00
func BenchmarkEncode5x2x1M(b *testing.B) {
benchmarkEncode(b, 5, 2, 1024*1024)
}
// Benchmark 1 data shards and 2 parity shards with 1MB each.
func BenchmarkEncode10x2x1M(b *testing.B) {
benchmarkEncode(b, 10, 2, 1024*1024)
}
// Benchmark 10 data shards and 4 parity shards with 1MB each.
2015-06-20 21:51:26 +03:00
func BenchmarkEncode10x4x1M(b *testing.B) {
benchmarkEncode(b, 10, 4, 1024*1024)
}
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
// Benchmark 50 data shards and 20 parity shards with 1M each.
2015-06-20 21:51:26 +03:00
func BenchmarkEncode50x20x1M(b *testing.B) {
benchmarkEncode(b, 50, 20, 1024*1024)
}
// Benchmark 17 data shards and 3 parity shards with 16MB each.
func BenchmarkEncode17x3x16M(b *testing.B) {
benchmarkEncode(b, 17, 3, 16*1024*1024)
}
func BenchmarkEncode_8x4x8M(b *testing.B) { benchmarkEncode(b, 8, 4, 8*1024*1024) }
func BenchmarkEncode_12x4x12M(b *testing.B) { benchmarkEncode(b, 12, 4, 12*1024*1024) }
func BenchmarkEncode_16x4x16M(b *testing.B) { benchmarkEncode(b, 16, 4, 16*1024*1024) }
func BenchmarkEncode_16x4x32M(b *testing.B) { benchmarkEncode(b, 16, 4, 32*1024*1024) }
func BenchmarkEncode_16x4x64M(b *testing.B) { benchmarkEncode(b, 16, 4, 64*1024*1024) }
func BenchmarkEncode_8x5x8M(b *testing.B) { benchmarkEncode(b, 8, 5, 8*1024*1024) }
func BenchmarkEncode_8x6x8M(b *testing.B) { benchmarkEncode(b, 8, 6, 8*1024*1024) }
func BenchmarkEncode_8x7x8M(b *testing.B) { benchmarkEncode(b, 8, 7, 8*1024*1024) }
func BenchmarkEncode_8x9x8M(b *testing.B) { benchmarkEncode(b, 8, 9, 8*1024*1024) }
func BenchmarkEncode_8x10x8M(b *testing.B) { benchmarkEncode(b, 8, 10, 8*1024*1024) }
func BenchmarkEncode_8x11x8M(b *testing.B) { benchmarkEncode(b, 8, 11, 8*1024*1024) }
func BenchmarkEncode_8x8x05M(b *testing.B) { benchmarkEncode(b, 8, 8, 1*1024*1024/2) }
func BenchmarkEncode_8x8x1M(b *testing.B) { benchmarkEncode(b, 8, 8, 1*1024*1024) }
func BenchmarkEncode_8x8x8M(b *testing.B) { benchmarkEncode(b, 8, 8, 8*1024*1024) }
func BenchmarkEncode_8x8x32M(b *testing.B) { benchmarkEncode(b, 8, 8, 32*1024*1024) }
func BenchmarkEncode_24x8x24M(b *testing.B) { benchmarkEncode(b, 24, 8, 24*1024*1024) }
func BenchmarkEncode_24x8x48M(b *testing.B) { benchmarkEncode(b, 24, 8, 48*1024*1024) }
2015-06-19 17:31:24 +03:00
func benchmarkVerify(b *testing.B, dataShards, parityShards, shardSize int) {
r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
2015-06-19 17:31:24 +03:00
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, parityShards+dataShards)
2015-06-19 17:31:24 +03:00
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < dataShards; s++ {
2015-06-19 17:31:24 +03:00
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
2015-06-19 17:31:24 +03:00
b.ResetTimer()
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
b.ReportAllocs()
2015-06-19 17:31:24 +03:00
for i := 0; i < b.N; i++ {
_, err = r.Verify(shards)
if err != nil {
b.Fatal(err)
}
}
}
// Benchmark 800 data slices with 200 parity slices
func BenchmarkVerify800x200(b *testing.B) {
for size := 64; size <= 1<<20; size *= 4 {
b.Run(fmt.Sprintf("%v", size), func(b *testing.B) {
benchmarkVerify(b, 800, 200, size)
})
}
}
2015-06-19 17:31:24 +03:00
// Benchmark 10 data slices with 2 parity slices holding 10000 bytes each
2015-06-20 11:11:33 +03:00
func BenchmarkVerify10x2x10000(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkVerify(b, 10, 2, 10000)
}
// Benchmark 50 data slices with 5 parity slices holding 100000 bytes each
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
func BenchmarkVerify50x5x100000(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkVerify(b, 50, 5, 100000)
}
2015-06-20 14:10:51 +03:00
// Benchmark 10 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkVerify10x2x1M(b *testing.B) {
benchmarkVerify(b, 10, 2, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkVerify5x2x1M(b *testing.B) {
benchmarkVerify(b, 5, 2, 1024*1024)
}
// Benchmark 10 data slices with 4 parity slices holding 1MB bytes each
func BenchmarkVerify10x4x1M(b *testing.B) {
benchmarkVerify(b, 10, 4, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkVerify50x20x1M(b *testing.B) {
benchmarkVerify(b, 50, 20, 1024*1024)
}
2015-06-19 17:31:24 +03:00
// Benchmark 10 data slices with 4 parity slices holding 16MB bytes each
2015-06-20 11:11:33 +03:00
func BenchmarkVerify10x4x16M(b *testing.B) {
2015-06-19 17:31:24 +03:00
benchmarkVerify(b, 10, 4, 16*1024*1024)
}
2015-06-20 12:29:26 +03:00
func corruptRandom(shards [][]byte, dataShards, parityShards int) {
shardsToCorrupt := rand.Intn(parityShards) + 1
for i := 0; i < shardsToCorrupt; i++ {
n := rand.Intn(dataShards + parityShards)
shards[n] = shards[n][:0]
}
}
func benchmarkReconstruct(b *testing.B, dataShards, parityShards, shardSize int) {
r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, parityShards+dataShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
b.ResetTimer()
2022-02-09 11:53:36 +03:00
b.ReportAllocs()
for i := 0; i < b.N; i++ {
corruptRandom(shards, dataShards, parityShards)
err = r.Reconstruct(shards)
if err != nil {
b.Fatal(err)
}
}
}
// Benchmark 10 data slices with 2 parity slices holding 10000 bytes each
func BenchmarkReconstruct10x2x10000(b *testing.B) {
benchmarkReconstruct(b, 10, 2, 10000)
}
// Benchmark 800 data slices with 200 parity slices
func BenchmarkReconstruct800x200(b *testing.B) {
for size := 64; size <= 1<<20; size *= 4 {
b.Run(fmt.Sprintf("%v", size), func(b *testing.B) {
benchmarkReconstruct(b, 800, 200, size)
})
}
}
// Benchmark 50 data slices with 5 parity slices holding 100000 bytes each
func BenchmarkReconstruct50x5x50000(b *testing.B) {
benchmarkReconstruct(b, 50, 5, 100000)
}
// Benchmark 10 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstruct10x2x1M(b *testing.B) {
benchmarkReconstruct(b, 10, 2, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstruct5x2x1M(b *testing.B) {
benchmarkReconstruct(b, 5, 2, 1024*1024)
}
// Benchmark 10 data slices with 4 parity slices holding 1MB bytes each
func BenchmarkReconstruct10x4x1M(b *testing.B) {
benchmarkReconstruct(b, 10, 4, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstruct50x20x1M(b *testing.B) {
benchmarkReconstruct(b, 50, 20, 1024*1024)
}
// Benchmark 10 data slices with 4 parity slices holding 16MB bytes each
func BenchmarkReconstruct10x4x16M(b *testing.B) {
benchmarkReconstruct(b, 10, 4, 16*1024*1024)
}
func corruptRandomData(shards [][]byte, dataShards, parityShards int) {
shardsToCorrupt := rand.Intn(parityShards) + 1
for i := 1; i <= shardsToCorrupt; i++ {
n := rand.Intn(dataShards)
shards[n] = shards[n][:0]
}
}
func benchmarkReconstructData(b *testing.B, dataShards, parityShards, shardSize int) {
r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
if err != nil {
b.Fatal(err)
}
shards := make([][]byte, parityShards+dataShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
b.ResetTimer()
2022-02-09 11:53:36 +03:00
b.ReportAllocs()
for i := 0; i < b.N; i++ {
corruptRandomData(shards, dataShards, parityShards)
err = r.ReconstructData(shards)
if err != nil {
b.Fatal(err)
}
}
}
// Benchmark 10 data slices with 2 parity slices holding 10000 bytes each
func BenchmarkReconstructData10x2x10000(b *testing.B) {
benchmarkReconstructData(b, 10, 2, 10000)
}
// Benchmark 800 data slices with 200 parity slices
func BenchmarkReconstructData800x200(b *testing.B) {
for size := 64; size <= 1<<20; size *= 4 {
b.Run(fmt.Sprintf("%v", size), func(b *testing.B) {
benchmarkReconstructData(b, 800, 200, size)
})
}
}
// Benchmark 50 data slices with 5 parity slices holding 100000 bytes each
func BenchmarkReconstructData50x5x50000(b *testing.B) {
benchmarkReconstructData(b, 50, 5, 100000)
}
// Benchmark 10 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstructData10x2x1M(b *testing.B) {
benchmarkReconstructData(b, 10, 2, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstructData5x2x1M(b *testing.B) {
benchmarkReconstructData(b, 5, 2, 1024*1024)
}
// Benchmark 10 data slices with 4 parity slices holding 1MB bytes each
func BenchmarkReconstructData10x4x1M(b *testing.B) {
benchmarkReconstructData(b, 10, 4, 1024*1024)
}
// Benchmark 5 data slices with 2 parity slices holding 1MB bytes each
func BenchmarkReconstructData50x20x1M(b *testing.B) {
benchmarkReconstructData(b, 50, 20, 1024*1024)
}
// Benchmark 10 data slices with 4 parity slices holding 16MB bytes each
func BenchmarkReconstructData10x4x16M(b *testing.B) {
benchmarkReconstructData(b, 10, 4, 16*1024*1024)
}
func benchmarkReconstructP(b *testing.B, dataShards, parityShards, shardSize int) {
r, err := New(dataShards, parityShards, testOptions(WithMaxGoroutines(1))...)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
b.ResetTimer()
2022-02-09 11:53:36 +03:00
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
shards := make([][]byte, parityShards+dataShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
rand.Seed(0)
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for pb.Next() {
corruptRandom(shards, dataShards, parityShards)
err = r.Reconstruct(shards)
if err != nil {
b.Fatal(err)
}
}
})
}
// Benchmark 10 data slices with 2 parity slices holding 10000 bytes each
func BenchmarkReconstructP10x2x10000(b *testing.B) {
benchmarkReconstructP(b, 10, 2, 10000)
}
// Benchmark 10 data slices with 5 parity slices holding 20000 bytes each
func BenchmarkReconstructP10x5x20000(b *testing.B) {
benchmarkReconstructP(b, 10, 5, 20000)
}
func TestEncoderReconstruct(t *testing.T) {
testEncoderReconstruct(t)
for _, o := range testOpts() {
testEncoderReconstruct(t, o...)
}
}
func testEncoderReconstruct(t *testing.T, o ...Option) {
// Create some sample data
var data = make([]byte, 250000)
fillRandom(data)
// Create 5 data slices of 50000 elements each
enc, err := New(5, 3, testOptions(o...)...)
if err != nil {
t.Fatal(err)
}
shards, err := enc.Split(data)
if err != nil {
t.Fatal(err)
}
err = enc.Encode(shards)
if err != nil {
t.Fatal(err)
}
// Check that it verifies
ok, err := enc.Verify(shards)
if !ok || err != nil {
t.Fatal("not ok:", ok, "err:", err)
}
// Delete a shard
shards[0] = nil
// Should reconstruct
err = enc.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
// Check that it verifies
ok, err = enc.Verify(shards)
if !ok || err != nil {
t.Fatal("not ok:", ok, "err:", err)
}
2015-08-07 06:37:51 +03:00
// Recover original bytes
buf := new(bytes.Buffer)
err = enc.Join(buf, shards, len(data))
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(buf.Bytes(), data) {
t.Fatal("recovered bytes do not match")
}
// Corrupt a shard
shards[0] = nil
shards[1][0], shards[1][500] = 75, 75
2015-08-07 06:37:51 +03:00
// Should reconstruct (but with corrupted data)
err = enc.Reconstruct(shards)
if err != nil {
t.Fatal(err)
}
// Check that it verifies
ok, err = enc.Verify(shards)
if ok || err != nil {
t.Fatal("error or ok:", ok, "err:", err)
}
2015-08-07 06:37:51 +03:00
// Recovered data should not match original
buf.Reset()
err = enc.Join(buf, shards, len(data))
if err != nil {
t.Fatal(err)
}
if bytes.Equal(buf.Bytes(), data) {
t.Fatal("corrupted data matches original")
}
}
func TestSplitJoin(t *testing.T) {
var data = make([]byte, 250000)
2015-08-09 01:20:40 +03:00
rand.Seed(0)
2015-08-07 06:37:51 +03:00
fillRandom(data)
enc, _ := New(5, 3, testOptions()...)
2015-08-07 06:37:51 +03:00
shards, err := enc.Split(data)
if err != nil {
t.Fatal(err)
}
_, err = enc.Split([]byte{})
if err != ErrShortData {
t.Errorf("expected %v, got %v", ErrShortData, err)
}
buf := new(bytes.Buffer)
err = enc.Join(buf, shards, 50)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(buf.Bytes(), data[:50]) {
t.Fatal("recovered data does match original")
}
err = enc.Join(buf, [][]byte{}, 0)
if err != ErrTooFewShards {
t.Errorf("expected %v, got %v", ErrTooFewShards, err)
}
err = enc.Join(buf, shards, len(data)+1)
if err != ErrShortData {
t.Errorf("expected %v, got %v", ErrShortData, err)
}
shards[0] = nil
err = enc.Join(buf, shards, len(data))
if err != ErrReconstructRequired {
t.Errorf("expected %v, got %v", ErrReconstructRequired, err)
}
2015-08-07 07:21:24 +03:00
}
func TestCodeSomeShards(t *testing.T) {
var data = make([]byte, 250000)
fillRandom(data)
enc, _ := New(5, 3, testOptions()...)
2015-08-07 07:21:24 +03:00
r := enc.(*reedSolomon) // need to access private methods
shards, _ := enc.Split(data)
old := runtime.GOMAXPROCS(1)
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
r.codeSomeShards(r.parity, shards[:r.DataShards], shards[r.DataShards:r.DataShards+r.ParityShards], len(shards[0]))
2015-08-07 07:21:24 +03:00
// hopefully more than 1 CPU
runtime.GOMAXPROCS(runtime.NumCPU())
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
r.codeSomeShards(r.parity, shards[:r.DataShards], shards[r.DataShards:r.DataShards+r.ParityShards], len(shards[0]))
2015-08-07 06:37:51 +03:00
2015-08-07 07:21:24 +03:00
// reset MAXPROCS, otherwise testing complains
runtime.GOMAXPROCS(old)
}
func TestStandardMatrices(t *testing.T) {
if testing.Short() || runtime.GOMAXPROCS(0) < 4 {
// Runtime ~15s.
t.Skip("Skipping slow matrix check")
}
for i := 1; i < 256; i++ {
i := i
t.Run(fmt.Sprintf("x%d", i), func(t *testing.T) {
t.Parallel()
// i == n.o. datashards
var shards = make([][]byte, 255)
for p := range shards {
v := byte(i)
shards[p] = []byte{v}
}
rng := rand.New(rand.NewSource(0))
for j := 1; j < 256; j++ {
// j == n.o. parity shards
if i+j > 255 {
continue
}
sh := shards[:i+j]
r, err := New(i, j, testOptions(WithFastOneParityMatrix())...)
if err != nil {
// We are not supposed to write to t from goroutines.
t.Fatal("creating matrix size", i, j, ":", err)
}
err = r.Encode(sh)
if err != nil {
t.Fatal("encoding", i, j, ":", err)
}
for k := 0; k < j; k++ {
// Remove random shard.
r := int(rng.Int63n(int64(i + j)))
sh[r] = sh[r][:0]
}
err = r.Reconstruct(sh)
if err != nil {
t.Fatal("reconstructing", i, j, ":", err)
}
ok, err := r.Verify(sh)
if err != nil {
t.Fatal("verifying", i, j, ":", err)
}
if !ok {
t.Fatal(i, j, ok)
}
for k := range sh {
if k == i {
// Only check data shards
break
}
if sh[k][0] != byte(i) {
t.Fatal("does not match", i, j, k, sh[0], sh[k])
}
}
}
})
}
}
func TestCauchyMatrices(t *testing.T) {
if testing.Short() || runtime.GOMAXPROCS(0) < 4 {
// Runtime ~15s.
t.Skip("Skipping slow matrix check")
}
for i := 1; i < 256; i++ {
i := i
t.Run(fmt.Sprintf("x%d", i), func(t *testing.T) {
t.Parallel()
var shards = make([][]byte, 255)
for p := range shards {
v := byte(i)
shards[p] = []byte{v}
}
rng := rand.New(rand.NewSource(0))
for j := 1; j < 256; j++ {
// j == n.o. parity shards
if i+j > 255 {
continue
}
sh := shards[:i+j]
r, err := New(i, j, testOptions(WithCauchyMatrix(), WithFastOneParityMatrix())...)
if err != nil {
// We are not supposed to write to t from goroutines.
t.Fatal("creating matrix size", i, j, ":", err)
}
err = r.Encode(sh)
if err != nil {
t.Fatal("encoding", i, j, ":", err)
}
for k := 0; k < j; k++ {
// Remove random shard.
r := int(rng.Int63n(int64(i + j)))
sh[r] = sh[r][:0]
}
err = r.Reconstruct(sh)
if err != nil {
t.Fatal("reconstructing", i, j, ":", err)
}
ok, err := r.Verify(sh)
if err != nil {
t.Fatal("verifying", i, j, ":", err)
}
if !ok {
t.Fatal(i, j, ok)
}
for k := range sh {
if k == i {
// Only check data shards
break
}
if sh[k][0] != byte(i) {
t.Fatal("does not match", i, j, k, sh[0], sh[k])
}
}
}
})
}
}
func TestPar1Matrices(t *testing.T) {
if testing.Short() || runtime.GOMAXPROCS(0) < 4 {
// Runtime ~15s.
t.Skip("Skipping slow matrix check")
}
for i := 1; i < 256; i++ {
i := i
t.Run(fmt.Sprintf("x%d", i), func(t *testing.T) {
t.Parallel()
var shards = make([][]byte, 255)
for p := range shards {
v := byte(i)
shards[p] = []byte{v}
}
rng := rand.New(rand.NewSource(0))
for j := 1; j < 256; j++ {
// j == n.o. parity shards
if i+j > 255 {
continue
}
sh := shards[:i+j]
r, err := New(i, j, testOptions(WithPAR1Matrix())...)
if err != nil {
// We are not supposed to write to t from goroutines.
t.Fatal("creating matrix size", i, j, ":", err)
}
err = r.Encode(sh)
if err != nil {
t.Fatal("encoding", i, j, ":", err)
}
for k := 0; k < j; k++ {
// Remove random shard.
r := int(rng.Int63n(int64(i + j)))
sh[r] = sh[r][:0]
}
err = r.Reconstruct(sh)
if err != nil {
if err == errSingular {
t.Logf("Singular: %d (data), %d (parity)", i, j)
for p := range sh {
if len(sh[p]) == 0 {
shards[p] = []byte{byte(i)}
}
}
continue
}
t.Fatal("reconstructing", i, j, ":", err)
}
ok, err := r.Verify(sh)
if err != nil {
t.Fatal("verifying", i, j, ":", err)
}
if !ok {
t.Fatal(i, j, ok)
}
for k := range sh {
if k == i {
// Only check data shards
break
}
if sh[k][0] != byte(i) {
t.Fatal("does not match", i, j, k, sh[0], sh[k])
}
}
}
})
}
}
2015-08-07 05:47:11 +03:00
func TestNew(t *testing.T) {
tests := []struct {
data, parity int
err error
}{
{127, 127, nil},
{128, 128, nil},
{255, 1, nil},
2021-03-08 18:13:24 +03:00
{255, 0, nil},
{1, 0, nil},
{65536, 65536, ErrMaxShardNum},
2015-08-07 05:47:11 +03:00
{0, 1, ErrInvShardNum},
2021-03-08 18:13:24 +03:00
{1, -1, ErrInvShardNum},
{65636, 1, ErrMaxShardNum},
2015-08-07 05:47:11 +03:00
// overflow causes r.Shards to be negative
{256, int(^uint(0) >> 1), errInvalidRowSize},
}
for _, test := range tests {
_, err := New(test.data, test.parity, testOptions()...)
2015-08-07 05:47:11 +03:00
if err != test.err {
t.Errorf("New(%v, %v): expected %v, got %v", test.data, test.parity, test.err, err)
}
}
}
// Benchmark 10 data shards and 4 parity shards and 160MB data.
func BenchmarkSplit10x4x160M(b *testing.B) {
benchmarkSplit(b, 10, 4, 160*1024*1024)
}
// Benchmark 5 data shards and 2 parity shards with 5MB data.
func BenchmarkSplit5x2x5M(b *testing.B) {
benchmarkSplit(b, 5, 2, 5*1024*1024)
}
// Benchmark 1 data shards and 2 parity shards with 1MB data.
func BenchmarkSplit10x2x1M(b *testing.B) {
benchmarkSplit(b, 10, 2, 1024*1024)
}
// Benchmark 10 data shards and 4 parity shards with 10MB data.
func BenchmarkSplit10x4x10M(b *testing.B) {
benchmarkSplit(b, 10, 4, 10*1024*1024)
}
// Benchmark 50 data shards and 20 parity shards with 50MB data.
func BenchmarkSplit50x20x50M(b *testing.B) {
benchmarkSplit(b, 50, 20, 50*1024*1024)
}
// Benchmark 17 data shards and 3 parity shards with 272MB data.
func BenchmarkSplit17x3x272M(b *testing.B) {
benchmarkSplit(b, 17, 3, 272*1024*1024)
}
func benchmarkSplit(b *testing.B, shards, parity, dataSize int) {
r, err := New(shards, parity, testOptions(WithAutoGoroutines(dataSize))...)
if err != nil {
b.Fatal(err)
}
data := make([]byte, dataSize)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err = r.Split(data)
if err != nil {
b.Fatal(err)
}
}
}
func benchmarkParallel(b *testing.B, dataShards, parityShards, shardSize int) {
// Run max 1 goroutine per operation.
r, err := New(dataShards, parityShards, testOptions(WithMaxGoroutines(1))...)
if err != nil {
b.Fatal(err)
}
c := runtime.GOMAXPROCS(0)
// Note that concurrency also affects total data size and will make caches less effective.
avx2: Improve speed when > 10 input or output shards. (#174) Speeds are including a limiting the number of goroutines with all AVX2 paths, Before/after ``` benchmark old ns/op new ns/op delta BenchmarkGalois128K-32 2240 2240 +0.00% BenchmarkGalois1M-32 19578 18891 -3.51% BenchmarkGaloisXor128K-32 2798 2852 +1.93% BenchmarkGaloisXor1M-32 23334 23345 +0.05% BenchmarkEncode2x1x1M-32 34357 34370 +0.04% BenchmarkEncode10x2x10000-32 3210 3093 -3.64% BenchmarkEncode100x20x10000-32 362925 148214 -59.16% BenchmarkEncode17x3x1M-32 323767 224157 -30.77% BenchmarkEncode10x4x16M-32 8376895 8376737 -0.00% BenchmarkEncode5x2x1M-32 68365 66861 -2.20% BenchmarkEncode10x2x1M-32 101407 93023 -8.27% BenchmarkEncode10x4x1M-32 171880 155477 -9.54% BenchmarkEncode50x20x1M-32 3704691 3015047 -18.62% BenchmarkEncode17x3x16M-32 10279233 10106658 -1.68% BenchmarkEncode_8x4x8M-32 3438245 3326479 -3.25% BenchmarkEncode_12x4x12M-32 6632257 6581637 -0.76% BenchmarkEncode_16x4x16M-32 10815755 10788377 -0.25% BenchmarkEncode_16x4x32M-32 21029061 21507995 +2.28% BenchmarkEncode_16x4x64M-32 42145450 43876850 +4.11% BenchmarkEncode_8x5x8M-32 4543208 3846378 -15.34% BenchmarkEncode_8x6x8M-32 5065494 4397218 -13.19% BenchmarkEncode_8x7x8M-32 5818995 4962884 -14.71% BenchmarkEncode_8x9x8M-32 6215449 6114898 -1.62% BenchmarkEncode_8x10x8M-32 6923415 6610501 -4.52% BenchmarkEncode_8x11x8M-32 7365988 7010473 -4.83% BenchmarkEncode_8x8x05M-32 150857 136820 -9.30% BenchmarkEncode_8x8x1M-32 256722 254854 -0.73% BenchmarkEncode_8x8x8M-32 5547790 5422048 -2.27% BenchmarkEncode_8x8x32M-32 23038643 22705859 -1.44% BenchmarkEncode_24x8x24M-32 27729259 30332216 +9.39% BenchmarkEncode_24x8x48M-32 53865705 61187658 +13.59% BenchmarkVerify10x2x10000-32 8769 8154 -7.01% BenchmarkVerify10x2x1M-32 516149 476180 -7.74% BenchmarkVerify5x2x1M-32 443888 419541 -5.48% BenchmarkVerify10x4x1M-32 1030299 948021 -7.99% BenchmarkVerify50x20x1M-32 7209689 6186891 -14.19% BenchmarkVerify10x4x16M-32 17774456 17681879 -0.52% BenchmarkReconstruct10x2x10000-32 3352 3256 -2.86% BenchmarkReconstruct50x5x50000-32 166417 140900 -15.33% BenchmarkReconstruct10x2x1M-32 189711 174615 -7.96% BenchmarkReconstruct5x2x1M-32 128080 126520 -1.22% BenchmarkReconstruct10x4x1M-32 273312 254017 -7.06% BenchmarkReconstruct50x20x1M-32 3628812 3192474 -12.02% BenchmarkReconstruct10x4x16M-32 8562186 8781479 +2.56% BenchmarkReconstructData10x2x10000-32 3241 3116 -3.86% BenchmarkReconstructData50x5x50000-32 162520 134794 -17.06% BenchmarkReconstructData10x2x1M-32 171253 161955 -5.43% BenchmarkReconstructData5x2x1M-32 102215 106942 +4.62% BenchmarkReconstructData10x4x1M-32 225593 219969 -2.49% BenchmarkReconstructData50x20x1M-32 2515311 2129721 -15.33% BenchmarkReconstructData10x4x16M-32 6980308 6698111 -4.04% BenchmarkReconstructP10x2x10000-32 924 937 +1.35% BenchmarkReconstructP10x5x20000-32 1639 1703 +3.90% BenchmarkSplit10x4x160M-32 4984993 4898045 -1.74% BenchmarkSplit5x2x5M-32 380415 221446 -41.79% BenchmarkSplit10x2x1M-32 58761 53335 -9.23% BenchmarkSplit10x4x10M-32 643188 410959 -36.11% BenchmarkSplit50x20x50M-32 1843879 1647205 -10.67% BenchmarkSplit17x3x272M-32 3684920 3613951 -1.93% BenchmarkParallel_8x8x64K-32 7022 6630 -5.58% BenchmarkParallel_8x8x05M-32 348308 348369 +0.02% BenchmarkParallel_20x10x05M-32 575672 581028 +0.93% BenchmarkParallel_8x8x1M-32 716033 697167 -2.63% BenchmarkParallel_8x8x8M-32 5716048 5616437 -1.74% BenchmarkParallel_8x8x32M-32 22650878 22098667 -2.44% BenchmarkParallel_8x3x1M-32 406839 399125 -1.90% BenchmarkParallel_8x4x1M-32 459107 463890 +1.04% BenchmarkParallel_8x5x1M-32 527488 520334 -1.36% BenchmarkStreamEncode10x2x10000-32 6013 5878 -2.25% BenchmarkStreamEncode100x20x10000-32 503124 267894 -46.75% BenchmarkStreamEncode17x3x1M-32 1561838 1376618 -11.86% BenchmarkStreamEncode10x4x16M-32 19124427 17762582 -7.12% BenchmarkStreamEncode5x2x1M-32 429701 384666 -10.48% BenchmarkStreamEncode10x2x1M-32 801257 763637 -4.70% BenchmarkStreamEncode10x4x1M-32 876065 820744 -6.31% BenchmarkStreamEncode50x20x1M-32 7205112 6081398 -15.60% BenchmarkStreamEncode17x3x16M-32 27182786 26117143 -3.92% BenchmarkStreamVerify10x2x10000-32 13767 14026 +1.88% BenchmarkStreamVerify50x5x50000-32 826983 690453 -16.51% BenchmarkStreamVerify10x2x1M-32 1238566 1182591 -4.52% BenchmarkStreamVerify5x2x1M-32 892661 806301 -9.67% BenchmarkStreamVerify10x4x1M-32 1676394 1631495 -2.68% BenchmarkStreamVerify50x20x1M-32 10877875 10037678 -7.72% BenchmarkStreamVerify10x4x16M-32 27599576 30435400 +10.27% benchmark old MB/s new MB/s speedup BenchmarkGalois128K-32 58518.53 58510.17 1.00x BenchmarkGalois1M-32 53558.10 55507.44 1.04x BenchmarkGaloisXor128K-32 46839.74 45961.09 0.98x BenchmarkGaloisXor1M-32 44936.98 44917.46 1.00x BenchmarkEncode2x1x1M-32 91561.27 91524.11 1.00x BenchmarkEncode10x2x10000-32 37385.54 38792.54 1.04x BenchmarkEncode100x20x10000-32 3306.47 8096.40 2.45x BenchmarkEncode17x3x1M-32 64773.49 93557.14 1.44x BenchmarkEncode10x4x16M-32 28039.15 28039.68 1.00x BenchmarkEncode5x2x1M-32 107365.88 109781.16 1.02x BenchmarkEncode10x2x1M-32 124083.62 135266.27 1.09x BenchmarkEncode10x4x1M-32 85408.99 94419.71 1.11x BenchmarkEncode50x20x1M-32 19812.81 24344.67 1.23x BenchmarkEncode17x3x16M-32 32642.93 33200.32 1.02x BenchmarkEncode_8x4x8M-32 29277.52 30261.21 1.03x BenchmarkEncode_12x4x12M-32 30355.67 30589.14 1.01x BenchmarkEncode_16x4x16M-32 31023.66 31102.39 1.00x BenchmarkEncode_16x4x32M-32 31912.44 31201.82 0.98x BenchmarkEncode_16x4x64M-32 31846.32 30589.65 0.96x BenchmarkEncode_8x5x8M-32 24003.28 28351.84 1.18x BenchmarkEncode_8x6x8M-32 23184.41 26707.91 1.15x BenchmarkEncode_8x7x8M-32 21623.86 25354.03 1.17x BenchmarkEncode_8x9x8M-32 22943.85 23321.13 1.02x BenchmarkEncode_8x10x8M-32 21809.31 22841.68 1.05x BenchmarkEncode_8x11x8M-32 21637.77 22735.06 1.05x BenchmarkEncode_8x8x05M-32 55606.22 61311.47 1.10x BenchmarkEncode_8x8x1M-32 65351.80 65830.73 1.01x BenchmarkEncode_8x8x8M-32 24193.01 24754.07 1.02x BenchmarkEncode_8x8x32M-32 23303.06 23644.60 1.01x BenchmarkEncode_24x8x24M-32 29041.76 26549.54 0.91x BenchmarkEncode_24x8x48M-32 29900.52 26322.51 0.88x BenchmarkVerify10x2x10000-32 13685.12 14717.10 1.08x BenchmarkVerify10x2x1M-32 24378.43 26424.72 1.08x BenchmarkVerify5x2x1M-32 16535.79 17495.41 1.06x BenchmarkVerify10x4x1M-32 14248.35 15484.96 1.09x BenchmarkVerify50x20x1M-32 10180.79 11863.85 1.17x BenchmarkVerify10x4x16M-32 13214.53 13283.71 1.01x BenchmarkReconstruct10x2x10000-32 35799.16 36854.89 1.03x BenchmarkReconstruct50x5x50000-32 33049.47 39034.89 1.18x BenchmarkReconstruct10x2x1M-32 66326.88 72061.06 1.09x BenchmarkReconstruct5x2x1M-32 57308.21 58014.92 1.01x BenchmarkReconstruct10x4x1M-32 53711.74 57791.66 1.08x BenchmarkReconstruct50x20x1M-32 20227.09 22991.67 1.14x BenchmarkReconstruct10x4x16M-32 27432.37 26747.32 0.98x BenchmarkReconstructData10x2x10000-32 37030.86 38511.87 1.04x BenchmarkReconstructData50x5x50000-32 33842.07 40802.85 1.21x BenchmarkReconstructData10x2x1M-32 73475.57 77693.87 1.06x BenchmarkReconstructData5x2x1M-32 71809.58 68635.57 0.96x BenchmarkReconstructData10x4x1M-32 65073.27 66736.88 1.03x BenchmarkReconstructData50x20x1M-32 29181.41 34464.76 1.18x BenchmarkReconstructData10x4x16M-32 33649.09 35066.75 1.04x BenchmarkReconstructP10x2x10000-32 129819.98 128086.76 0.99x BenchmarkReconstructP10x5x20000-32 183073.89 176202.21 0.96x BenchmarkParallel_8x8x64K-32 149327.33 158153.67 1.06x BenchmarkParallel_8x8x05M-32 24083.89 24079.69 1.00x BenchmarkParallel_20x10x05M-32 27322.20 27070.35 0.99x BenchmarkParallel_8x8x1M-32 23430.78 24064.83 1.03x BenchmarkParallel_8x8x8M-32 23480.86 23897.31 1.02x BenchmarkParallel_8x8x32M-32 23701.99 24294.27 1.02x BenchmarkParallel_8x3x1M-32 28351.11 28899.03 1.02x BenchmarkParallel_8x4x1M-32 27407.34 27124.76 0.99x BenchmarkParallel_8x5x1M-32 25842.27 26197.58 1.01x BenchmarkStreamEncode10x2x10000-32 16629.76 17012.26 1.02x BenchmarkStreamEncode100x20x10000-32 1987.58 3732.83 1.88x BenchmarkStreamEncode17x3x1M-32 11413.34 12948.97 1.13x BenchmarkStreamEncode10x4x16M-32 8772.66 9445.26 1.08x BenchmarkStreamEncode5x2x1M-32 12201.21 13629.70 1.12x BenchmarkStreamEncode10x2x1M-32 13086.64 13731.34 1.05x BenchmarkStreamEncode10x4x1M-32 11969.16 12775.92 1.07x BenchmarkStreamEncode50x20x1M-32 7276.61 8621.18 1.18x BenchmarkStreamEncode17x3x16M-32 10492.40 10920.52 1.04x BenchmarkStreamVerify10x2x10000-32 7264.00 7129.49 0.98x BenchmarkStreamVerify50x5x50000-32 6046.07 7241.62 1.20x BenchmarkStreamVerify10x2x1M-32 8466.05 8866.77 1.05x BenchmarkStreamVerify5x2x1M-32 5873.31 6502.39 1.11x BenchmarkStreamVerify10x4x1M-32 6254.95 6427.09 1.03x BenchmarkStreamVerify50x20x1M-32 4819.76 5223.20 1.08x BenchmarkStreamVerify10x4x16M-32 6078.79 5512.40 0.91x ```
2021-12-09 14:28:44 +03:00
if testing.Verbose() {
b.Log("Total data:", (c*dataShards*shardSize)>>20, "MiB", "parity:", (c*parityShards*shardSize)>>20, "MiB")
}
// Create independent shards
shardsCh := make(chan [][]byte, c)
for i := 0; i < c; i++ {
rand.Seed(int64(i))
shards := make([][]byte, dataShards+parityShards)
for s := range shards {
shards[s] = make([]byte, shardSize)
}
for s := 0; s < dataShards; s++ {
fillRandom(shards[s])
}
shardsCh <- shards
}
b.SetBytes(int64(shardSize * (dataShards + parityShards)))
b.SetParallelism(c)
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
shards := <-shardsCh
err = r.Encode(shards)
if err != nil {
b.Fatal(err)
}
shardsCh <- shards
}
})
}
Generate AVX2 code (#141) Replaces AVX2 up to 10x8 configurations with specific generated functions. If code size is a concern `-tags=nogen` can be used. Biggest speedup when not memory constrained. ``` benchmark old MB/s new MB/s speedup BenchmarkEncode_8x5x8M 5895.75 9648.18 1.64x BenchmarkEncode_8x5x8M-4 16773.41 17220.67 1.03x BenchmarkEncode_8x5x8M-16 18263.12 17176.28 0.94x BenchmarkEncode_8x6x8M 5075.89 8548.39 1.68x BenchmarkEncode_8x6x8M-4 14559.83 15370.95 1.06x BenchmarkEncode_8x6x8M-16 16183.37 15291.98 0.94x BenchmarkEncode_8x7x8M 4481.18 7015.60 1.57x BenchmarkEncode_8x7x8M-4 12835.35 13695.90 1.07x BenchmarkEncode_8x7x8M-16 14246.94 13737.36 0.96x BenchmarkEncode_8x8x05M 5569.95 7947.70 1.43x BenchmarkEncode_8x8x05M-4 17334.91 25271.37 1.46x BenchmarkEncode_8x8x05M-16 29349.42 35043.36 1.19x BenchmarkEncode_8x8x1M 4830.58 7891.32 1.63x BenchmarkEncode_8x8x1M-4 17531.36 27371.42 1.56x BenchmarkEncode_8x8x1M-16 29593.98 39241.09 1.33x BenchmarkEncode_8x8x8M 3953.66 6584.26 1.67x BenchmarkEncode_8x8x8M-4 11527.34 12331.23 1.07x BenchmarkEncode_8x8x8M-16 12718.89 12173.08 0.96x BenchmarkEncode_8x8x32M 3927.51 6195.91 1.58x BenchmarkEncode_8x8x32M-4 11490.85 11424.39 0.99x BenchmarkEncode_8x8x32M-16 12506.09 11888.55 0.95x benchmark old MB/s new MB/s speedup BenchmarkParallel_8x8x64K 5490.24 6959.57 1.27x BenchmarkParallel_8x8x64K-4 21078.94 29557.51 1.40x BenchmarkParallel_8x8x64K-16 57508.45 73672.54 1.28x BenchmarkParallel_8x8x1M 4755.49 7667.84 1.61x BenchmarkParallel_8x8x1M-4 11818.66 12013.49 1.02x BenchmarkParallel_8x8x1M-16 12923.12 12109.42 0.94x BenchmarkParallel_8x8x8M 3973.94 6525.85 1.64x BenchmarkParallel_8x8x8M-4 11725.68 11312.46 0.96x BenchmarkParallel_8x8x8M-16 12608.20 11484.98 0.91x BenchmarkParallel_8x3x1M 14139.71 17993.04 1.27x BenchmarkParallel_8x3x1M-4 21805.97 23053.92 1.06x BenchmarkParallel_8x3x1M-16 24673.05 23596.71 0.96x BenchmarkParallel_8x4x1M 10617.88 14474.54 1.36x BenchmarkParallel_8x4x1M-4 18635.82 18965.65 1.02x BenchmarkParallel_8x4x1M-16 21518.12 20171.47 0.94x BenchmarkParallel_8x5x1M 8669.88 11833.96 1.36x BenchmarkParallel_8x5x1M-4 16321.00 17500.30 1.07x BenchmarkParallel_8x5x1M-16 17267.16 17191.04 1.00x ```
2020-05-20 13:48:34 +03:00
func BenchmarkParallel_8x8x64K(b *testing.B) { benchmarkParallel(b, 8, 8, 64<<10) }
func BenchmarkParallel_8x8x05M(b *testing.B) { benchmarkParallel(b, 8, 8, 512<<10) }
func BenchmarkParallel_20x10x05M(b *testing.B) { benchmarkParallel(b, 20, 10, 512<<10) }
func BenchmarkParallel_8x8x1M(b *testing.B) { benchmarkParallel(b, 8, 8, 1<<20) }
func BenchmarkParallel_8x8x8M(b *testing.B) { benchmarkParallel(b, 8, 8, 8<<20) }
func BenchmarkParallel_8x8x32M(b *testing.B) { benchmarkParallel(b, 8, 8, 32<<20) }
func BenchmarkParallel_8x3x1M(b *testing.B) { benchmarkParallel(b, 8, 3, 1<<20) }
func BenchmarkParallel_8x4x1M(b *testing.B) { benchmarkParallel(b, 8, 4, 1<<20) }
func BenchmarkParallel_8x5x1M(b *testing.B) { benchmarkParallel(b, 8, 5, 1<<20) }