Reduce generated code (#185)

* Reduce generated code

Use a define (with hacks)
master
Klaus Post 2022-03-24 05:25:40 -07:00 committed by GitHub
parent daf81ef0bd
commit 2f19c81be4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 6743 additions and 58408 deletions

View File

@ -87,4 +87,5 @@ jobs:
run: go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
- name: Test Microarch v4
shell: bash {0}
run: go run testlevel.go 4;if [ $? -eq 0 ]; then GOAMD64=v4 go test -no-avx512 ./...; else true; fi

View File

@ -21,7 +21,19 @@ func main() {
if err != nil {
log.Fatalln(err)
}
data = bytes.Replace(data, []byte("\t// #"), []byte("#"), -1)
data = bytes.ReplaceAll(data, []byte("\t// #"), []byte("#"))
data = bytes.ReplaceAll(data, []byte("\t// @"), []byte(""))
data = bytes.ReplaceAll(data, []byte("VPTERNLOGQ"), []byte("XOR3WAY("))
split := bytes.Split(data, []byte("\n"))
// Add closing ')'
want := []byte("\tXOR3WAY(")
for i, b := range split {
if bytes.Contains(b, want) {
b = []byte(string(b) + ")")
split[i] = b
}
}
data = bytes.Join(split, []byte("\n"))
data, err = asmfmt.Format(bytes.NewBuffer(data))
if err != nil {
log.Fatalln(err)

View File

@ -38,6 +38,17 @@ func main() {
Constraint(buildtags.Not("nogen").ToConstraint())
Constraint(buildtags.Term("gc").ToConstraint())
TEXT("_dummy_", 0, "func()")
Comment("#ifdef GOAMD64_v4")
Comment("#define XOR3WAY(ignore, a, b, dst)\\")
Comment("@\tVPTERNLOGD $0x96, a, b, dst")
Comment("#else")
Comment("#define XOR3WAY(ignore, a, b, dst)\\")
Comment("@\tVPXOR a, dst, dst\\")
Comment("@\tVPXOR b, dst, dst")
Comment("#endif")
RET()
const perLoopBits = 6
const perLoop = 1 << perLoopBits
@ -123,13 +134,8 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
// VPXOR3way will 3-way xor a and b and dst.
func VPXOR3way(a, b, dst reg.VecVirtual) {
Comment("#ifdef GOAMD64_v4")
// AVX512F and AVX512VL required
VPTERNLOGD(U8(0x96), a, b, dst)
Comment("#else")
VPXOR(a, dst, dst) // dst = a^dst
VPXOR(b, dst, dst) // dst = (a^dst)^b
Comment("#endif")
// VPTERNLOGQ is replaced by XOR3WAY - we just use an equivalent operation
VPTERNLOGQ(U8(0), a, b, dst)
}
func genMulAvx2(name string, inputs int, outputs int, xor bool) {

View File

@ -5,6 +5,8 @@
package reedsolomon
func _dummy_()
// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
// The output is initialized to 0.
//go:noescape

File diff suppressed because it is too large Load Diff