reedsolomon-go/galois_gen_switch_amd64.go

294 lines
5.9 KiB
Go
Raw Normal View History

Generate AVX2 code (#141) Replaces AVX2 up to 10x8 configurations with specific generated functions. If code size is a concern `-tags=nogen` can be used. Biggest speedup when not memory constrained. ``` benchmark old MB/s new MB/s speedup BenchmarkEncode_8x5x8M 5895.75 9648.18 1.64x BenchmarkEncode_8x5x8M-4 16773.41 17220.67 1.03x BenchmarkEncode_8x5x8M-16 18263.12 17176.28 0.94x BenchmarkEncode_8x6x8M 5075.89 8548.39 1.68x BenchmarkEncode_8x6x8M-4 14559.83 15370.95 1.06x BenchmarkEncode_8x6x8M-16 16183.37 15291.98 0.94x BenchmarkEncode_8x7x8M 4481.18 7015.60 1.57x BenchmarkEncode_8x7x8M-4 12835.35 13695.90 1.07x BenchmarkEncode_8x7x8M-16 14246.94 13737.36 0.96x BenchmarkEncode_8x8x05M 5569.95 7947.70 1.43x BenchmarkEncode_8x8x05M-4 17334.91 25271.37 1.46x BenchmarkEncode_8x8x05M-16 29349.42 35043.36 1.19x BenchmarkEncode_8x8x1M 4830.58 7891.32 1.63x BenchmarkEncode_8x8x1M-4 17531.36 27371.42 1.56x BenchmarkEncode_8x8x1M-16 29593.98 39241.09 1.33x BenchmarkEncode_8x8x8M 3953.66 6584.26 1.67x BenchmarkEncode_8x8x8M-4 11527.34 12331.23 1.07x BenchmarkEncode_8x8x8M-16 12718.89 12173.08 0.96x BenchmarkEncode_8x8x32M 3927.51 6195.91 1.58x BenchmarkEncode_8x8x32M-4 11490.85 11424.39 0.99x BenchmarkEncode_8x8x32M-16 12506.09 11888.55 0.95x benchmark old MB/s new MB/s speedup BenchmarkParallel_8x8x64K 5490.24 6959.57 1.27x BenchmarkParallel_8x8x64K-4 21078.94 29557.51 1.40x BenchmarkParallel_8x8x64K-16 57508.45 73672.54 1.28x BenchmarkParallel_8x8x1M 4755.49 7667.84 1.61x BenchmarkParallel_8x8x1M-4 11818.66 12013.49 1.02x BenchmarkParallel_8x8x1M-16 12923.12 12109.42 0.94x BenchmarkParallel_8x8x8M 3973.94 6525.85 1.64x BenchmarkParallel_8x8x8M-4 11725.68 11312.46 0.96x BenchmarkParallel_8x8x8M-16 12608.20 11484.98 0.91x BenchmarkParallel_8x3x1M 14139.71 17993.04 1.27x BenchmarkParallel_8x3x1M-4 21805.97 23053.92 1.06x BenchmarkParallel_8x3x1M-16 24673.05 23596.71 0.96x BenchmarkParallel_8x4x1M 10617.88 14474.54 1.36x BenchmarkParallel_8x4x1M-4 18635.82 18965.65 1.02x BenchmarkParallel_8x4x1M-16 21518.12 20171.47 0.94x BenchmarkParallel_8x5x1M 8669.88 11833.96 1.36x BenchmarkParallel_8x5x1M-4 16321.00 17500.30 1.07x BenchmarkParallel_8x5x1M-16 17267.16 17191.04 1.00x ```
2020-05-20 13:48:34 +03:00
// Code generated by command: go generate gen.go. DO NOT EDIT.
// +build !appengine
// +build !noasm
// +build gc
// +build !nogen
package reedsolomon
import "fmt"
const avx2CodeGen = true
const maxAvx2Inputs = 10
const maxAvx2Outputs = 8
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := stop - start
n = (n >> 5) << 5
switch len(in) {
case 1:
switch len(out) {
case 1:
mulAvxTwo_1x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_1x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_1x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_1x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_1x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_1x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_1x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_1x8(matrix, in, out, start, n)
return n
}
case 2:
switch len(out) {
case 1:
mulAvxTwo_2x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_2x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_2x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_2x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_2x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_2x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_2x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_2x8(matrix, in, out, start, n)
return n
}
case 3:
switch len(out) {
case 1:
mulAvxTwo_3x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_3x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_3x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_3x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_3x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_3x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_3x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_3x8(matrix, in, out, start, n)
return n
}
case 4:
switch len(out) {
case 1:
mulAvxTwo_4x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_4x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_4x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_4x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_4x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_4x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_4x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_4x8(matrix, in, out, start, n)
return n
}
case 5:
switch len(out) {
case 1:
mulAvxTwo_5x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_5x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_5x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_5x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_5x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_5x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_5x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_5x8(matrix, in, out, start, n)
return n
}
case 6:
switch len(out) {
case 1:
mulAvxTwo_6x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_6x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_6x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_6x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_6x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_6x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_6x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_6x8(matrix, in, out, start, n)
return n
}
case 7:
switch len(out) {
case 1:
mulAvxTwo_7x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_7x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_7x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_7x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_7x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_7x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_7x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_7x8(matrix, in, out, start, n)
return n
}
case 8:
switch len(out) {
case 1:
mulAvxTwo_8x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_8x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_8x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_8x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_8x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_8x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_8x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_8x8(matrix, in, out, start, n)
return n
}
case 9:
switch len(out) {
case 1:
mulAvxTwo_9x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_9x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_9x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_9x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_9x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_9x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_9x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_9x8(matrix, in, out, start, n)
return n
}
case 10:
switch len(out) {
case 1:
mulAvxTwo_10x1(matrix, in, out, start, n)
return n
case 2:
mulAvxTwo_10x2(matrix, in, out, start, n)
return n
case 3:
mulAvxTwo_10x3(matrix, in, out, start, n)
return n
case 4:
mulAvxTwo_10x4(matrix, in, out, start, n)
return n
case 5:
mulAvxTwo_10x5(matrix, in, out, start, n)
return n
case 6:
mulAvxTwo_10x6(matrix, in, out, start, n)
return n
case 7:
mulAvxTwo_10x7(matrix, in, out, start, n)
return n
case 8:
mulAvxTwo_10x8(matrix, in, out, start, n)
return n
}
}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}