Add commandline arg to disable asm for tests. (#116)

* Add commandline test args
2020-04-22 15:38:21 +02:00 · 2020-04-22 15:38:21 +02:00 · d2cfcb8065
parent 0abe9de20c
commit d2cfcb8065
6 changed files with 88 additions and 92 deletions
--- a/galois.go
+++ b/galois.go
@ -852,9 +852,6 @@ func galMultiply(a, b byte) byte {
 	return mulTable[a][b]
 }

-// amd64 indicates whether we are on an amd64 platform.
-var amd64 bool
-
 // Original function:
 /*
 // galMultiply multiplies to elements of the field.
--- a/galoisAvx512_amd64.go
+++ b/galoisAvx512_amd64.go
@ -13,10 +13,6 @@ func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo
 //go:noescape
 func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)

-func init() {
-	amd64 = true
-}
-
 const (
 	dimIn        = 8                            // Number of input rows processed simultaneously
 	dimOut82     = 2                            // Number of output rows processed simultaneously for x2 routine
--- a/galoisAvx512_amd64_test.go
+++ b/galoisAvx512_amd64_test.go
@ -17,7 +17,7 @@ import (
 func testGaloisAvx512Parallelx2(t *testing.T, inputSize int) {

 	if !defaultOptions.useAVX512 {
-		return
+		t.Skip("AVX512 not detected")
 	}

 	rand.Seed(time.Now().UnixNano())
@ -116,7 +116,7 @@ func TestGaloisAvx512Parallel82(t *testing.T) { testGaloisAvx512Parallelx2(t, 8)
 func testGaloisAvx512Parallelx4(t *testing.T, inputSize int) {

 	if !defaultOptions.useAVX512 {
-		return
+		t.Skip("AVX512 not detected")
 	}

 	rand.Seed(time.Now().UnixNano())
@ -221,7 +221,7 @@ func TestGaloisAvx512Parallel84(t *testing.T) { testGaloisAvx512Parallelx4(t, 8)
 func testCodeSomeShardsAvx512WithLength(t *testing.T, ds, ps, l int) {

 	if !defaultOptions.useAVX512 {
-		return
+		t.Skip("AVX512 not detected")
 	}

 	var data = make([]byte, l)
@ -246,7 +246,7 @@ func testCodeSomeShardsAvx512WithLength(t *testing.T, ds, ps, l int) {
 func testCodeSomeShardsAvx512(t *testing.T, ds, ps int) {

 	if !defaultOptions.useAVX512 {
-		return
+		t.Skip("AVX512 not detected")
 	}
 	step := 1
 	if testing.Short() {
@ -311,54 +311,3 @@ func TestCodeSomeShardsAvx512_ManyxMany(t *testing.T) {
 		}
 	}
 }
-
-func benchmarkAvx512Encode(b *testing.B, dataShards, parityShards, shardSize int) {
-
-	if !defaultOptions.useAVX512 {
-		return
-	}
-
-	enc, err := New(dataShards, parityShards)
-	if err != nil {
-		b.Fatal(err)
-	}
-	r := enc.(*reedSolomon) // need to access private methods
-	shards := make([][]byte, dataShards+parityShards)
-	for s := range shards {
-		shards[s] = make([]byte, shardSize)
-	}
-
-	rand.Seed(0)
-	for s := 0; s < dataShards; s++ {
-		fillRandom(shards[s])
-	}
-
-	b.SetBytes(int64(shardSize * dataShards))
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		// Do the coding.
-		r.codeSomeShardsAvx512(r.parity, shards[0:r.DataShards], shards[r.DataShards:], r.ParityShards, len(shards[0]))
-	}
-}
-
-// Benchmark various combination of data shards and parity shards for AVX512 accelerated code
-func BenchmarkEncodeAvx512_8x4x8M(b *testing.B)   { benchmarkAvx512Encode(b, 8, 4, 8*1024*1024) }
-func BenchmarkEncodeAvx512_12x4x12M(b *testing.B) { benchmarkAvx512Encode(b, 12, 4, 12*1024*1024) }
-func BenchmarkEncodeAvx512_16x4x16M(b *testing.B) { benchmarkAvx512Encode(b, 16, 4, 16*1024*1024) }
-func BenchmarkEncodeAvx512_16x4x32M(b *testing.B) { benchmarkAvx512Encode(b, 16, 4, 32*1024*1024) }
-func BenchmarkEncodeAvx512_16x4x64M(b *testing.B) { benchmarkAvx512Encode(b, 16, 4, 64*1024*1024) }
-
-func BenchmarkEncodeAvx512_8x5x8M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 5, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x6x8M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 6, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x7x8M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 7, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x9x8M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 9, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x10x8M(b *testing.B) { benchmarkAvx512Encode(b, 8, 10, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x11x8M(b *testing.B) { benchmarkAvx512Encode(b, 8, 11, 8*1024*1024) }
-
-func BenchmarkEncodeAvx512_8x8x05M(b *testing.B) { benchmarkAvx512Encode(b, 8, 8, 1*1024*1024/2) }
-func BenchmarkEncodeAvx512_8x8x1M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 8, 1*1024*1024) }
-func BenchmarkEncodeAvx512_8x8x8M(b *testing.B)  { benchmarkAvx512Encode(b, 8, 8, 8*1024*1024) }
-func BenchmarkEncodeAvx512_8x8x32M(b *testing.B) { benchmarkAvx512Encode(b, 8, 8, 32*1024*1024) }
-
-func BenchmarkEncodeAvx512_24x8x24M(b *testing.B) { benchmarkAvx512Encode(b, 24, 8, 24*1024*1024) }
-func BenchmarkEncodeAvx512_24x8x48M(b *testing.B) { benchmarkAvx512Encode(b, 24, 8, 48*1024*1024) }
--- a/galois_noasm.go
+++ b/galois_noasm.go
@ -29,6 +29,10 @@ func sliceXor(in, out []byte, sse2 bool) {
 	}
 }

+func init() {
+	defaultOptions.useAVX512 = false
+}
+
 func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
 	panic("unreachable")
 }
--- a/options.go
+++ b/options.go
@ -21,17 +21,18 @@ type options struct {
 var defaultOptions = options{
 	maxGoroutines: 384,
 	minSplitSize:  1024,
+
+	// Detect CPU capabilities.
+	useSSSE3:  cpuid.CPU.SSSE3(),
+	useSSE2:   cpuid.CPU.SSE2(),
+	useAVX2:   cpuid.CPU.AVX2(),
+	useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(),
 }

 func init() {
 	if runtime.GOMAXPROCS(0) <= 1 {
 		defaultOptions.maxGoroutines = 1
 	}
-	// Detect CPU capabilities.
-	defaultOptions.useSSSE3 = cpuid.CPU.SSSE3()
-	defaultOptions.useSSE2 = cpuid.CPU.SSE2()
-	defaultOptions.useAVX2 = cpuid.CPU.AVX2()
-	defaultOptions.useAVX512 = cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW() && amd64
 }

 // WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
@ -71,7 +72,7 @@ func WithMinSplitSize(n int) Option {
 	}
 }

-func withSSE3(enabled bool) Option {
+func withSSSE3(enabled bool) Option {
 	return func(o *options) {
 		o.useSSSE3 = enabled
 	}
--- a/reedsolomon_test.go
+++ b/reedsolomon_test.go
@ -9,13 +9,41 @@ package reedsolomon

 import (
 	"bytes"
+	"flag"
 	"fmt"
 	"math/rand"
+	"os"
 	"runtime"
 	"sync"
 	"testing"
 )

+var noSSE2 = flag.Bool("no-sse2", !defaultOptions.useSSE2, "Disable SSE2")
+var noSSSE3 = flag.Bool("no-ssse3", !defaultOptions.useSSSE3, "Disable SSSE3")
+var noAVX2 = flag.Bool("no-avx2", !defaultOptions.useAVX2, "Disable AVX2")
+var noAVX512 = flag.Bool("no-avx512", !defaultOptions.useAVX512, "Disable AVX512")
+
+func TestMain(m *testing.M) {
+	flag.Parse()
+	os.Exit(m.Run())
+}
+
+func testOptions(o ...Option) []Option {
+	if *noSSSE3 {
+		o = append(o, withSSSE3(false))
+	}
+	if *noSSE2 {
+		o = append(o, withSSE2(false))
+	}
+	if *noAVX2 {
+		o = append(o, withAVX2(false))
+	}
+	if *noAVX512 {
+		o = append(o, withAVX512(false))
+	}
+	return o
+}
+
 func isIncreasingAndContainsDataRow(indices []int) bool {
 	cols := len(indices)
 	for i := 0; i < cols-1; i++ {
@ -109,17 +137,17 @@ func testOpts() [][]Option {
 	}
 	opts := [][]Option{
 		{WithPAR1Matrix()}, {WithCauchyMatrix()},
-		{WithMaxGoroutines(1), WithMinSplitSize(500), withSSE3(false), withAVX2(false)},
-		{WithMaxGoroutines(5000), WithMinSplitSize(50), withSSE3(false), withAVX2(false)},
-		{WithMaxGoroutines(5000), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
-		{WithMaxGoroutines(1), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(1), WithMinSplitSize(500), withSSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(5000), WithMinSplitSize(50), withSSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(5000), WithMinSplitSize(500000), withSSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(1), WithMinSplitSize(500000), withSSSE3(false), withAVX2(false)},
 		{WithAutoGoroutines(50000), WithMinSplitSize(500)},
 	}
 	for _, o := range opts[:] {
 		if defaultOptions.useSSSE3 {
 			n := make([]Option, len(o), len(o)+1)
 			copy(n, o)
-			n = append(n, withSSE3(true))
+			n = append(n, withSSSE3(true))
 			opts = append(opts, n)
 		}
 		if defaultOptions.useAVX2 {
@ -143,7 +171,7 @@ func TestEncoding(t *testing.T) {

 func testEncoding(t *testing.T, o ...Option) {
 	perShard := 50000
-	r, err := New(10, 3, o...)
+	r, err := New(10, 3, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -193,7 +221,7 @@ func TestUpdate(t *testing.T) {

 func testUpdate(t *testing.T, o ...Option) {
 	perShard := 50000
-	r, err := New(10, 3, o...)
+	r, err := New(10, 3, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -297,7 +325,7 @@ func TestReconstruct(t *testing.T) {

 func testReconstruct(t *testing.T, o ...Option) {
 	perShard := 50000
-	r, err := New(10, 3, o...)
+	r, err := New(10, 3, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -378,7 +406,7 @@ func TestReconstructData(t *testing.T) {

 func testReconstructData(t *testing.T, o ...Option) {
 	perShard := 100000
-	r, err := New(8, 5, o...)
+	r, err := New(8, 5, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -486,7 +514,7 @@ func testReconstructData(t *testing.T, o ...Option) {

 func TestReconstructPAR1Singular(t *testing.T) {
 	perShard := 50
-	r, err := New(4, 4, WithPAR1Matrix())
+	r, err := New(4, 4, testOptions(WithPAR1Matrix())...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -532,7 +560,7 @@ func TestVerify(t *testing.T) {

 func testVerify(t *testing.T, o ...Option) {
 	perShard := 33333
-	r, err := New(10, 4, o...)
+	r, err := New(10, 4, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -594,7 +622,7 @@ func testVerify(t *testing.T, o ...Option) {
 }

 func TestOneEncode(t *testing.T) {
-	codec, err := New(5, 5)
+	codec, err := New(5, 5, testOptions()...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -656,7 +684,7 @@ func fillRandom(p []byte) {
 }

 func benchmarkEncode(b *testing.B, dataShards, parityShards, shardSize int) {
-	r, err := New(dataShards, parityShards, WithAutoGoroutines(shardSize))
+	r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
 	if err != nil {
 		b.Fatal(err)
 	}
@ -722,8 +750,29 @@ func BenchmarkEncode17x3x16M(b *testing.B) {
 	benchmarkEncode(b, 17, 3, 16*1024*1024)
 }

+func BenchmarkEncode_8x4x8M(b *testing.B)   { benchmarkEncode(b, 8, 4, 8*1024*1024) }
+func BenchmarkEncode_12x4x12M(b *testing.B) { benchmarkEncode(b, 12, 4, 12*1024*1024) }
+func BenchmarkEncode_16x4x16M(b *testing.B) { benchmarkEncode(b, 16, 4, 16*1024*1024) }
+func BenchmarkEncode_16x4x32M(b *testing.B) { benchmarkEncode(b, 16, 4, 32*1024*1024) }
+func BenchmarkEncode_16x4x64M(b *testing.B) { benchmarkEncode(b, 16, 4, 64*1024*1024) }
+
+func BenchmarkEncode_8x5x8M(b *testing.B)  { benchmarkEncode(b, 8, 5, 8*1024*1024) }
+func BenchmarkEncode_8x6x8M(b *testing.B)  { benchmarkEncode(b, 8, 6, 8*1024*1024) }
+func BenchmarkEncode_8x7x8M(b *testing.B)  { benchmarkEncode(b, 8, 7, 8*1024*1024) }
+func BenchmarkEncode_8x9x8M(b *testing.B)  { benchmarkEncode(b, 8, 9, 8*1024*1024) }
+func BenchmarkEncode_8x10x8M(b *testing.B) { benchmarkEncode(b, 8, 10, 8*1024*1024) }
+func BenchmarkEncode_8x11x8M(b *testing.B) { benchmarkEncode(b, 8, 11, 8*1024*1024) }
+
+func BenchmarkEncode_8x8x05M(b *testing.B) { benchmarkEncode(b, 8, 8, 1*1024*1024/2) }
+func BenchmarkEncode_8x8x1M(b *testing.B)  { benchmarkEncode(b, 8, 8, 1*1024*1024) }
+func BenchmarkEncode_8x8x8M(b *testing.B)  { benchmarkEncode(b, 8, 8, 8*1024*1024) }
+func BenchmarkEncode_8x8x32M(b *testing.B) { benchmarkEncode(b, 8, 8, 32*1024*1024) }
+
+func BenchmarkEncode_24x8x24M(b *testing.B) { benchmarkEncode(b, 24, 8, 24*1024*1024) }
+func BenchmarkEncode_24x8x48M(b *testing.B) { benchmarkEncode(b, 24, 8, 48*1024*1024) }
+
 func benchmarkVerify(b *testing.B, dataShards, parityShards, shardSize int) {
-	r, err := New(dataShards, parityShards, WithAutoGoroutines(shardSize))
+	r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
 	if err != nil {
 		b.Fatal(err)
 	}
@ -794,7 +843,7 @@ func corruptRandom(shards [][]byte, dataShards, parityShards int) {
 }

 func benchmarkReconstruct(b *testing.B, dataShards, parityShards, shardSize int) {
-	r, err := New(dataShards, parityShards, WithAutoGoroutines(shardSize))
+	r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
 	if err != nil {
 		b.Fatal(err)
 	}
@ -867,7 +916,7 @@ func corruptRandomData(shards [][]byte, dataShards, parityShards int) {
 }

 func benchmarkReconstructData(b *testing.B, dataShards, parityShards, shardSize int) {
-	r, err := New(dataShards, parityShards, WithAutoGoroutines(shardSize))
+	r, err := New(dataShards, parityShards, testOptions(WithAutoGoroutines(shardSize))...)
 	if err != nil {
 		b.Fatal(err)
 	}
@ -933,7 +982,7 @@ func BenchmarkReconstructData10x4x16M(b *testing.B) {
 }

 func benchmarkReconstructP(b *testing.B, dataShards, parityShards, shardSize int) {
-	r, err := New(dataShards, parityShards, WithMaxGoroutines(1))
+	r, err := New(dataShards, parityShards, testOptions(WithMaxGoroutines(1))...)
 	if err != nil {
 		b.Fatal(err)
 	}
@ -990,7 +1039,7 @@ func testEncoderReconstruct(t *testing.T, o ...Option) {
 	fillRandom(data)

 	// Create 5 data slices of 50000 elements each
-	enc, err := New(5, 3, o...)
+	enc, err := New(5, 3, testOptions(o...)...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -1066,7 +1115,7 @@ func TestSplitJoin(t *testing.T) {
 	rand.Seed(0)
 	fillRandom(data)

-	enc, _ := New(5, 3)
+	enc, _ := New(5, 3, testOptions()...)
 	shards, err := enc.Split(data)
 	if err != nil {
 		t.Fatal(err)
@ -1106,7 +1155,7 @@ func TestSplitJoin(t *testing.T) {
 func TestCodeSomeShards(t *testing.T) {
 	var data = make([]byte, 250000)
 	fillRandom(data)
-	enc, _ := New(5, 3)
+	enc, _ := New(5, 3, testOptions()...)
 	r := enc.(*reedSolomon) // need to access private methods
 	shards, _ := enc.Split(data)

@ -1141,7 +1190,7 @@ func TestStandardMatrices(t *testing.T) {
 					continue
 				}
 				sh := shards[:i+j]
-				r, err := New(i, j, WithCauchyMatrix())
+				r, err := New(i, j, testOptions(WithCauchyMatrix())...)
 				if err != nil {
 					// We are not supposed to write to t from goroutines.
 					t.Fatal("creating matrix size", i, j, ":", err)
@ -1267,7 +1316,7 @@ func TestPar1Matrices(t *testing.T) {
 					continue
 				}
 				sh := shards[:i+j]
-				r, err := New(i, j, WithPAR1Matrix())
+				r, err := New(i, j, testOptions(WithPAR1Matrix())...)
 				if err != nil {
 					// We are not supposed to write to t from goroutines.
 					t.Fatal("creating matrix size", i, j, ":", err)
@ -1334,7 +1383,7 @@ func TestNew(t *testing.T) {
 		{256, int(^uint(0) >> 1), errInvalidRowSize},
 	}
 	for _, test := range tests {
-		_, err := New(test.data, test.parity)
+		_, err := New(test.data, test.parity, testOptions()...)
 		if err != test.err {
 			t.Errorf("New(%v, %v): expected %v, got %v", test.data, test.parity, test.err, err)
 		}
@ -1372,7 +1421,7 @@ func BenchmarkSplit17x3x272M(b *testing.B) {
 }

 func benchmarkSplit(b *testing.B, shards, parity, dataSize int) {
-	r, err := New(shards, parity)
+	r, err := New(shards, parity, testOptions(WithAutoGoroutines(dataSize))...)
 	if err != nil {
 		b.Fatal(err)
 	}