reedsolomon-go/options.go

121 lines
3.0 KiB
Go
Raw Normal View History

package reedsolomon
import (
"runtime"
"github.com/klauspost/cpuid"
)
// Option allows to override processing parameters.
type Option func(*options)
type options struct {
maxGoroutines int
minSplitSize int
useAVX512, useAVX2, useSSSE3, useSSE2 bool
usePAR1Matrix bool
useCauchy bool
shardSize int
}
var defaultOptions = options{
maxGoroutines: 384,
minSplitSize: -1,
// Detect CPU capabilities.
useSSSE3: cpuid.CPU.SSSE3(),
useSSE2: cpuid.CPU.SSE2(),
useAVX2: cpuid.CPU.AVX2(),
useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(),
}
func init() {
if runtime.GOMAXPROCS(0) <= 1 {
defaultOptions.maxGoroutines = 1
}
}
// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
// Jobs will be split into this many parts, unless each goroutine would have to process
// less than minSplitSize bytes (set with WithMinSplitSize).
// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
// scheduling.
// If n <= 0, it is ignored.
func WithMaxGoroutines(n int) Option {
return func(o *options) {
if n > 0 {
o.maxGoroutines = n
}
}
}
Split blocks into size divisible by 16 Older systems (typically without AVX2) are more sensitive to misaligned load+stores. Add parameter to automatically set the number of goroutines. name old time/op new time/op delta Encode10x2x10000-8 18.4µs ± 1% 16.1µs ± 1% -12.43% (p=0.000 n=9+9) Encode100x20x10000-8 692µs ± 1% 608µs ± 1% -12.10% (p=0.000 n=10+10) Encode17x3x1M-8 1.78ms ± 5% 1.49ms ± 1% -16.63% (p=0.000 n=10+10) Encode10x4x16M-8 21.5ms ± 5% 19.6ms ± 4% -8.74% (p=0.000 n=10+9) Encode5x2x1M-8 343µs ± 2% 267µs ± 2% -22.22% (p=0.000 n=9+10) Encode10x2x1M-8 858µs ± 5% 701µs ± 5% -18.34% (p=0.000 n=10+10) Encode10x4x1M-8 1.34ms ± 1% 1.16ms ± 1% -13.19% (p=0.000 n=9+9) Encode50x20x1M-8 30.3ms ± 4% 25.0ms ± 2% -17.51% (p=0.000 n=10+8) Encode17x3x16M-8 26.9ms ± 1% 24.5ms ± 4% -9.13% (p=0.000 n=8+10) name old speed new speed delta Encode10x2x10000-8 5.45GB/s ± 1% 6.22GB/s ± 1% +14.20% (p=0.000 n=9+9) Encode100x20x10000-8 1.44GB/s ± 1% 1.64GB/s ± 1% +13.77% (p=0.000 n=10+10) Encode17x3x1M-8 10.0GB/s ± 5% 12.0GB/s ± 1% +19.88% (p=0.000 n=10+10) Encode10x4x16M-8 7.81GB/s ± 5% 8.56GB/s ± 5% +9.58% (p=0.000 n=10+9) Encode5x2x1M-8 15.3GB/s ± 2% 19.6GB/s ± 2% +28.57% (p=0.000 n=9+10) Encode10x2x1M-8 12.2GB/s ± 5% 15.0GB/s ± 5% +22.45% (p=0.000 n=10+10) Encode10x4x1M-8 7.84GB/s ± 1% 9.03GB/s ± 1% +15.19% (p=0.000 n=9+9) Encode50x20x1M-8 1.73GB/s ± 4% 2.09GB/s ± 4% +20.59% (p=0.000 n=10+9) Encode17x3x16M-8 10.6GB/s ± 1% 11.7GB/s ± 4% +10.12% (p=0.000 n=8+10)
2017-11-18 19:37:40 +03:00
// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
// specific shard size.
// Send in the shard size you expect to send. Other shard sizes will work, but may not
// run at the optimal speed.
// Overwrites WithMaxGoroutines.
// If shardSize <= 0, it is ignored.
func WithAutoGoroutines(shardSize int) Option {
return func(o *options) {
o.shardSize = shardSize
}
}
// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
// By default this parameter is determined by CPU cache characteristics.
// See WithMaxGoroutines on how jobs are split.
// If n <= 0, it is ignored.
func WithMinSplitSize(n int) Option {
return func(o *options) {
if n > 0 {
o.minSplitSize = n
}
}
}
func withSSSE3(enabled bool) Option {
return func(o *options) {
o.useSSSE3 = enabled
}
}
func withAVX2(enabled bool) Option {
return func(o *options) {
o.useAVX2 = enabled
}
}
func withSSE2(enabled bool) Option {
return func(o *options) {
o.useSSE2 = enabled
}
}
func withAVX512(enabled bool) Option {
return func(o *options) {
o.useAVX512 = enabled
}
}
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
// does. Note that the method they use is buggy, and may lead to cases
// where recovery is impossible, even if there are enough parity
// shards.
func WithPAR1Matrix() Option {
return func(o *options) {
o.usePAR1Matrix = true
o.useCauchy = false
}
}
// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
// The output of this is not compatible with the standard output.
// A Cauchy matrix is faster to generate. This does not affect data throughput,
// but will result in slightly faster start-up time.
func WithCauchyMatrix() Option {
return func(o *options) {
o.useCauchy = true
o.usePAR1Matrix = false
}
}