Generate AVX2 code (#141)

Replaces AVX2 up to 10x8 configurations with specific generated functions.

If code size is a concern `-tags=nogen` can be used.

Biggest speedup when not memory constrained.
```
benchmark                                old MB/s      new MB/s      speedup
BenchmarkEncode_8x5x8M                   5895.75       9648.18       1.64x
BenchmarkEncode_8x5x8M-4                 16773.41      17220.67      1.03x
BenchmarkEncode_8x5x8M-16                18263.12      17176.28      0.94x
BenchmarkEncode_8x6x8M                   5075.89       8548.39       1.68x
BenchmarkEncode_8x6x8M-4                 14559.83      15370.95      1.06x
BenchmarkEncode_8x6x8M-16                16183.37      15291.98      0.94x
BenchmarkEncode_8x7x8M                   4481.18       7015.60       1.57x
BenchmarkEncode_8x7x8M-4                 12835.35      13695.90      1.07x
BenchmarkEncode_8x7x8M-16                14246.94      13737.36      0.96x 
BenchmarkEncode_8x8x05M                  5569.95       7947.70       1.43x
BenchmarkEncode_8x8x05M-4                17334.91      25271.37      1.46x
BenchmarkEncode_8x8x05M-16               29349.42      35043.36      1.19x
BenchmarkEncode_8x8x1M                   4830.58       7891.32       1.63x
BenchmarkEncode_8x8x1M-4                 17531.36      27371.42      1.56x
BenchmarkEncode_8x8x1M-16                29593.98      39241.09      1.33x
BenchmarkEncode_8x8x8M                   3953.66       6584.26       1.67x
BenchmarkEncode_8x8x8M-4                 11527.34      12331.23      1.07x
BenchmarkEncode_8x8x8M-16                12718.89      12173.08      0.96x
BenchmarkEncode_8x8x32M                  3927.51       6195.91       1.58x
BenchmarkEncode_8x8x32M-4                11490.85      11424.39      0.99x
BenchmarkEncode_8x8x32M-16               12506.09      11888.55      0.95x

benchmark                          old MB/s     new MB/s     speedup
BenchmarkParallel_8x8x64K          5490.24      6959.57      1.27x
BenchmarkParallel_8x8x64K-4        21078.94     29557.51     1.40x
BenchmarkParallel_8x8x64K-16       57508.45     73672.54     1.28x
BenchmarkParallel_8x8x1M           4755.49      7667.84      1.61x
BenchmarkParallel_8x8x1M-4         11818.66     12013.49     1.02x
BenchmarkParallel_8x8x1M-16        12923.12     12109.42     0.94x
BenchmarkParallel_8x8x8M           3973.94      6525.85      1.64x
BenchmarkParallel_8x8x8M-4         11725.68     11312.46     0.96x
BenchmarkParallel_8x8x8M-16        12608.20     11484.98     0.91x
BenchmarkParallel_8x3x1M           14139.71     17993.04     1.27x
BenchmarkParallel_8x3x1M-4         21805.97     23053.92     1.06x
BenchmarkParallel_8x3x1M-16        24673.05     23596.71     0.96x
BenchmarkParallel_8x4x1M           10617.88     14474.54     1.36x
BenchmarkParallel_8x4x1M-4         18635.82     18965.65     1.02x
BenchmarkParallel_8x4x1M-16        21518.12     20171.47     0.94x
BenchmarkParallel_8x5x1M           8669.88      11833.96     1.36x
BenchmarkParallel_8x5x1M-4         16321.00     17500.30     1.07x
BenchmarkParallel_8x5x1M-16        17267.16     17191.04     1.00x
```
master
Klaus Post 2020-05-20 03:48:34 -07:00 committed by GitHub
parent 01b307ec91
commit 7daa20bf74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 19565 additions and 23 deletions

View File

@ -900,3 +900,30 @@ func galExp(a byte, n int) byte {
}
return expTable[logResult]
}
func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte {
if !avx2CodeGen {
panic("codegen not enabled")
}
total := inputs * outputs
// Duplicated in+out
wantBytes := total * 32 * 2
if cap(dst) < wantBytes {
dst = make([]byte, wantBytes)
} else {
dst = dst[:wantBytes]
}
for i, row := range matrixRows[:outputs] {
for j, idx := range row[:inputs] {
dstIdx := (j*outputs + i) * 64
lo := mulTableLow[idx][:]
hi := mulTableHigh[idx][:]
copy(dst[dstIdx:], lo)
copy(dst[dstIdx+16:], lo)
copy(dst[dstIdx+32:], hi)
copy(dst[dstIdx+48:], hi)
}
}
return dst
}

View File

@ -7,7 +7,9 @@
package reedsolomon
import "sync"
import (
"sync"
)
//go:noescape
func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
@ -224,7 +226,7 @@ func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outp
// Perform the same as codeSomeShards, but taking advantage of
// AVX512 parallelism for up to 4x faster execution as compared to AVX2
func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
// Process using no goroutines
start, end := 0, r.o.perRound
if end > byteCount {
@ -271,7 +273,7 @@ func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte,
// Perform the same as codeSomeShards, but taking advantage of
// AVX512 parallelism for up to 4x faster execution as compared to AVX2
func (r reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
var wg sync.WaitGroup
do := byteCount / r.o.maxGoroutines
if do < r.o.minSplitSize {

408
galois_gen_amd64.go Normal file
View File

@ -0,0 +1,408 @@
// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
// +build !appengine
// +build !noasm
// +build !nogen
// +build gc
package reedsolomon
// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs.
// The output is initialized to 0.
//go:noescape
func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)

18526
galois_gen_amd64.s Normal file

File diff suppressed because it is too large Load Diff

11
galois_gen_none.go Normal file
View File

@ -0,0 +1,11 @@
//+build !amd64 noasm appengine gccgo nogen
package reedsolomon
const maxAvx2Inputs = 0
const maxAvx2Outputs = 0
const avx2CodeGen = false
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
panic("avx2 codegen not available")
}

293
galois_gen_switch_amd64.go Normal file
View File

@ -0,0 +1,293 @@
// Code generated by command: go generate gen.go. DO NOT EDIT.
// +build !appengine
// +build !noasm
// +build gc
// +build !nogen
package reedsolomon
import "fmt"
const avx2CodeGen = true
const maxAvx2Inputs = 10
const maxAvx2Outputs = 8
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := stop - start
n = (n >> 5) << 5
switch len(in) {
case 1:
switch len(out) {
case 1:
mulAvxTwo_1x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_1x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_1x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_1x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_1x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_1x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_1x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_1x8(matrix, in, out, start, n)
return n
}
case 2:
switch len(out) {
case 1:
mulAvxTwo_2x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_2x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_2x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_2x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_2x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_2x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_2x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_2x8(matrix, in, out, start, n)
return n
}
case 3:
switch len(out) {
case 1:
mulAvxTwo_3x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_3x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_3x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_3x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_3x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_3x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_3x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_3x8(matrix, in, out, start, n)
return n
}
case 4:
switch len(out) {
case 1:
mulAvxTwo_4x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_4x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_4x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_4x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_4x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_4x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_4x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_4x8(matrix, in, out, start, n)
return n
}
case 5:
switch len(out) {
case 1:
mulAvxTwo_5x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_5x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_5x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_5x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_5x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_5x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_5x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_5x8(matrix, in, out, start, n)
return n
}
case 6:
switch len(out) {
case 1:
mulAvxTwo_6x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_6x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_6x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_6x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_6x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_6x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_6x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_6x8(matrix, in, out, start, n)
return n
}
case 7:
switch len(out) {
case 1:
mulAvxTwo_7x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_7x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_7x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_7x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_7x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_7x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_7x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_7x8(matrix, in, out, start, n)
return n
}
case 8:
switch len(out) {
case 1:
mulAvxTwo_8x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_8x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_8x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_8x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_8x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_8x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_8x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_8x8(matrix, in, out, start, n)
return n
}
case 9:
switch len(out) {
case 1:
mulAvxTwo_9x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_9x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_9x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_9x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_9x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_9x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_9x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_9x8(matrix, in, out, start, n)
return n
}
case 10:
switch len(out) {
case 1:
mulAvxTwo_10x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_10x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_10x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_10x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_10x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_10x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_10x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_10x8(matrix, in, out, start, n)
return n
}
}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}

View File

@ -4,10 +4,10 @@
package reedsolomon
func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
panic("codeSomeShardsAvx512 should not be called if built without asm")
}
func (r reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
panic("codeSomeShardsAvx512P should not be called if built without asm")
}

249
gen.go Normal file
View File

@ -0,0 +1,249 @@
//+build generate
//go:generate go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go
//go:generate gofmt -w galois_gen_switch_amd64.go
package main
import (
"bufio"
"fmt"
"os"
. "github.com/mmcloughlin/avo/build"
"github.com/mmcloughlin/avo/buildtags"
. "github.com/mmcloughlin/avo/operand"
"github.com/mmcloughlin/avo/reg"
)
// Technically we can do slightly bigger, but we stay reasonable.
const inputMax = 10
const outputMax = 8
var switchDefs [inputMax][outputMax]string
var switchDefsX [inputMax][outputMax]string
const perLoopBits = 5
const perLoop = 1 << perLoopBits
func main() {
Constraint(buildtags.Not("appengine").ToConstraint())
Constraint(buildtags.Not("noasm").ToConstraint())
Constraint(buildtags.Not("nogen").ToConstraint())
Constraint(buildtags.Term("gc").ToConstraint())
for i := 1; i <= inputMax; i++ {
for j := 1; j <= outputMax; j++ {
//genMulAvx2(fmt.Sprintf("mulAvxTwoXor_%dx%d", i, j), i, j, true)
genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
}
}
f, err := os.Create("galois_gen_switch_amd64.go")
if err != nil {
panic(err)
}
defer f.Close()
w := bufio.NewWriter(f)
defer w.Flush()
w.WriteString(`// Code generated by command: go generate ` + os.Getenv("GOFILE") + `. DO NOT EDIT.
// +build !appengine
// +build !noasm
// +build gc
// +build !nogen
package reedsolomon
import "fmt"
`)
w.WriteString("const avx2CodeGen = true\n")
w.WriteString(fmt.Sprintf("const maxAvx2Inputs = %d\nconst maxAvx2Outputs = %d\n", inputMax, outputMax))
w.WriteString(`
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := stop-start
`)
w.WriteString(fmt.Sprintf("n = (n>>%d)<<%d\n\n", perLoopBits, perLoopBits))
w.WriteString(`switch len(in) {
`)
for in, defs := range switchDefs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
for out, def := range defs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
w.WriteString(def)
}
w.WriteString("}\n")
}
w.WriteString(`}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}
`)
Generate()
}
func genMulAvx2(name string, inputs int, outputs int, xor bool) {
total := inputs * outputs
doc := []string{
fmt.Sprintf("%s takes %d inputs and produces %d outputs.", name, inputs, outputs),
}
if !xor {
doc = append(doc, "The output is initialized to 0.")
}
// Load shuffle masks on every use.
var loadNone bool
// Use registers for destination registers.
var regDst = true
// lo, hi, 1 in, 1 out, 2 tmp, 1 mask
est := total*2 + outputs + 5
if outputs == 1 {
// We don't need to keep a copy of the input if only 1 output.
est -= 2
}
if est > 16 {
loadNone = true
// We run out of GP registers first, now.
if inputs+outputs > 12 {
regDst = false
}
}
TEXT(name, 0, fmt.Sprintf("func(matrix []byte, in [][]byte, out [][]byte, start, n int)"))
// SWITCH DEFINITION:
s := fmt.Sprintf(" mulAvxTwo_%dx%d(matrix, in, out, start, n)\n", inputs, outputs)
s += fmt.Sprintf("\t\t\t\treturn n\n")
switchDefs[inputs-1][outputs-1] = s
if loadNone {
Comment("Loading no tables to registers")
} else {
// loadNone == false
Comment("Loading all tables to registers")
}
Doc(doc...)
Pragma("noescape")
Commentf("Full registers estimated %d YMM used", est)
length := Load(Param("n"), GP64())
matrixBase := GP64()
MOVQ(Param("matrix").Base().MustAddr(), matrixBase)
SHRQ(U8(perLoopBits), length)
TESTQ(length, length)
JZ(LabelRef(name + "_end"))
dst := make([]reg.VecVirtual, outputs)
dstPtr := make([]reg.GPVirtual, outputs)
outBase := Param("out").Base().MustAddr()
outSlicePtr := GP64()
MOVQ(outBase, outSlicePtr)
for i := range dst {
dst[i] = YMM()
if !regDst {
continue
}
ptr := GP64()
MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
dstPtr[i] = ptr
}
inLo := make([]reg.VecVirtual, total)
inHi := make([]reg.VecVirtual, total)
for i := range inLo {
if loadNone {
break
}
tableLo := YMM()
tableHi := YMM()
VMOVDQU(Mem{Base: matrixBase, Disp: i * 64}, tableLo)
VMOVDQU(Mem{Base: matrixBase, Disp: i*64 + 32}, tableHi)
inLo[i] = tableLo
inHi[i] = tableHi
}
inPtrs := make([]reg.GPVirtual, inputs)
inSlicePtr := GP64()
MOVQ(Param("in").Base().MustAddr(), inSlicePtr)
for i := range inPtrs {
ptr := GP64()
MOVQ(Mem{Base: inSlicePtr, Disp: i * 24}, ptr)
inPtrs[i] = ptr
}
tmpMask := GP64()
MOVQ(U32(15), tmpMask)
lowMask := YMM()
MOVQ(tmpMask, lowMask.AsX())
VPBROADCASTB(lowMask.AsX(), lowMask)
offset := GP64()
MOVQ(Param("start").MustAddr(), offset)
Label(name + "_loop")
if xor {
Commentf("Load %d outputs", outputs)
} else {
Commentf("Clear %d outputs", outputs)
}
for i := range dst {
if xor {
if regDst {
VMOVDQU(Mem{Base: dstPtr[i], Index: offset, Scale: 1}, dst[i])
continue
}
ptr := GP64()
MOVQ(outBase, ptr)
VMOVDQU(Mem{Base: ptr, Index: offset, Scale: 1}, dst[i])
} else {
VPXOR(dst[i], dst[i], dst[i])
}
}
lookLow, lookHigh := YMM(), YMM()
inLow, inHigh := YMM(), YMM()
for i := range inPtrs {
Commentf("Load and process 32 bytes from input %d to %d outputs", i, outputs)
VMOVDQU(Mem{Base: inPtrs[i], Index: offset, Scale: 1}, inLow)
VPSRLQ(U8(4), inLow, inHigh)
VPAND(lowMask, inLow, inLow)
VPAND(lowMask, inHigh, inHigh)
for j := range dst {
if loadNone {
VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow)
VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh)
VPSHUFB(inLow, lookLow, lookLow)
VPSHUFB(inHigh, lookHigh, lookHigh)
} else {
VPSHUFB(inLow, inLo[i*outputs+j], lookLow)
VPSHUFB(inHigh, inHi[i*outputs+j], lookHigh)
}
VPXOR(lookLow, lookHigh, lookLow)
VPXOR(lookLow, dst[j], dst[j])
}
}
Commentf("Store %d outputs", outputs)
for i := range dst {
if regDst {
VMOVDQU(dst[i], Mem{Base: dstPtr[i], Index: offset, Scale: 1})
continue
}
ptr := GP64()
MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
VMOVDQU(dst[i], Mem{Base: ptr, Index: offset, Scale: 1})
}
Comment("Prepare for next loop")
ADDQ(U8(perLoop), offset)
DECQ(length)
JNZ(LabelRef(name + "_loop"))
VZEROUPPER()
Label(name + "_end")
RET()
}

4
go.mod
View File

@ -2,4 +2,6 @@ module github.com/klauspost/reedsolomon
go 1.14
require github.com/klauspost/cpuid v1.2.4
require (
github.com/klauspost/cpuid v1.2.4
)

2
go.sum
View File

@ -1,4 +1,2 @@
github.com/klauspost/cpuid v1.2.3 h1:CCtW0xUnWGVINKvE/WWOYKdsPV6mawAtvQuSl8guwQs=
github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.4 h1:EBfaK0SWSwk+fgk6efYFWdzl8MwRWoOO1gkmiaTXPW4=
github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=

View File

@ -113,6 +113,7 @@ type reedSolomon struct {
tree inversionTree
parity [][]byte
o options
mPool sync.Pool
}
// ErrInvShardNum will be returned by New, if you attempt to create
@ -339,6 +340,11 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
r.parity[i] = r.m[dataShards+i]
}
if avx2CodeGen && r.o.useAVX2 {
r.mPool.New = func() interface{} {
return make([]byte, r.Shards*2*32)
}
}
return &r, err
}
@ -353,7 +359,7 @@ var ErrTooFewShards = errors.New("too few shards given")
// Each shard is a byte array, and they must all be the same size.
// The parity shards will always be overwritten and the data shards
// will remain the same.
func (r reedSolomon) Encode(shards [][]byte) error {
func (r *reedSolomon) Encode(shards [][]byte) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
@ -374,7 +380,7 @@ func (r reedSolomon) Encode(shards [][]byte) error {
// ErrInvalidInput is returned if invalid input parameter of Update.
var ErrInvalidInput = errors.New("invalid input")
func (r reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
@ -414,7 +420,7 @@ func (r reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
return nil
}
func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
return
@ -434,7 +440,7 @@ func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, output
}
}
func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
var wg sync.WaitGroup
do := byteCount / r.o.maxGoroutines
if do < r.o.minSplitSize {
@ -468,7 +474,7 @@ func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outpu
// Verify returns true if the parity shards contain the right data.
// The data is the same format as Encode. No data is modified.
func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
if len(shards) != r.Shards {
return false, ErrTooFewShards
}
@ -493,7 +499,10 @@ func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
// The number of outputs computed, and the
// number of matrix rows used, is determined by
// outputCount, which is the number of outputs to compute.
func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
if len(outputs) == 0 {
return
}
switch {
case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2:
r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, outputCount, byteCount)
@ -511,6 +520,13 @@ func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, output
if end > len(inputs[0]) {
end = len(inputs[0])
}
if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
m := genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
r.mPool.Put(m)
end = len(inputs[0])
}
for start < len(inputs[0]) {
for c := 0; c < r.DataShards; c++ {
in := inputs[c][start:end]
@ -532,7 +548,7 @@ func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, output
// Perform the same as codeSomeShards, but split the workload into
// several goroutines.
func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
var wg sync.WaitGroup
do := byteCount / r.o.maxGoroutines
if do < r.o.minSplitSize {
@ -541,6 +557,11 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
// Make sizes divisible by 64
do = (do + 63) & (^63)
start := 0
var avx2Matrix []byte
if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
defer r.mPool.Put(avx2Matrix)
}
for start < byteCount {
if start+do > byteCount {
do = byteCount - start
@ -548,6 +569,10 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
wg.Add(1)
go func(start, stop int) {
if avx2CodeGen && r.o.useAVX2 && stop-start >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
}
lstart, lstop := start, start+r.o.perRound
if lstop > stop {
lstop = stop
@ -579,7 +604,7 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
// checkSomeShards is mostly the same as codeSomeShards,
// except this will check values and return
// as soon as a difference is found.
func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
}
@ -602,7 +627,7 @@ func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outpu
return true
}
func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
same := true
var mu sync.RWMutex // For above
@ -706,7 +731,7 @@ func shardSize(shards [][]byte) int {
//
// The reconstructed shard set is complete, but integrity is not verified.
// Use the Verify function to check if data set is ok.
func (r reedSolomon) Reconstruct(shards [][]byte) error {
func (r *reedSolomon) Reconstruct(shards [][]byte) error {
return r.reconstruct(shards, false)
}
@ -725,7 +750,7 @@ func (r reedSolomon) Reconstruct(shards [][]byte) error {
//
// As the reconstructed shard set may contain missing parity shards,
// calling the Verify function is likely to fail.
func (r reedSolomon) ReconstructData(shards [][]byte) error {
func (r *reedSolomon) ReconstructData(shards [][]byte) error {
return r.reconstruct(shards, true)
}
@ -737,7 +762,7 @@ func (r reedSolomon) ReconstructData(shards [][]byte) error {
//
// If there are too few shards to reconstruct the missing
// ones, ErrTooFewShards will be returned.
func (r reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
@ -896,7 +921,7 @@ var ErrShortData = errors.New("not enough data to fill the number of requested s
//
// The data will not be copied, except for the last shard, so you
// should not modify the data of the input slice afterwards.
func (r reedSolomon) Split(data []byte) ([][]byte, error) {
func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
if len(data) == 0 {
return nil, ErrShortData
}
@ -945,7 +970,7 @@ var ErrReconstructRequired = errors.New("reconstruction required as one or more
// If there are to few shards given, ErrTooFewShards will be returned.
// If the total data size is less than outSize, ErrShortData will be returned.
// If one or more required data shards are nil, ErrReconstructRequired will be returned.
func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
// Do we have enough shards?
if len(shards) < r.DataShards {
return ErrTooFewShards

View File

@ -180,7 +180,7 @@ func TestEncoding(t *testing.T) {
// matrix sizes to test.
// note that par1 matric will fail on some combinations.
var testSizes = [][2]int{{1, 1}, {1, 2}, {3, 3}, {3, 1}, {5, 3}, {8, 4}, {10, 30}, {14, 7}, {41, 17}, {49, 1}}
var testSizes = [][2]int{{1, 1}, {1, 2}, {3, 3}, {3, 1}, {5, 3}, {8, 4}, {10, 30}, {12, 10}, {14, 7}, {41, 17}, {49, 1}}
var testDataSizes = []int{10, 100, 1000, 10001, 100003, 1000055}
var testDataSizesShort = []int{10, 10001, 100003}
@ -1546,6 +1546,7 @@ func benchmarkParallel(b *testing.B, dataShards, parityShards, shardSize int) {
})
}
func BenchmarkParallel_8x8x64K(b *testing.B) { benchmarkParallel(b, 8, 8, 64<<10) }
func BenchmarkParallel_8x8x05M(b *testing.B) { benchmarkParallel(b, 8, 8, 512<<10) }
func BenchmarkParallel_20x10x05M(b *testing.B) { benchmarkParallel(b, 20, 10, 512<<10) }
func BenchmarkParallel_8x8x1M(b *testing.B) { benchmarkParallel(b, 8, 8, 1<<20) }