parent
daf81ef0bd
commit
2f19c81be4
|
@ -87,4 +87,5 @@ jobs:
|
|||
run: go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
|
||||
|
||||
- name: Test Microarch v4
|
||||
shell: bash {0}
|
||||
run: go run testlevel.go 4;if [ $? -eq 0 ]; then GOAMD64=v4 go test -no-avx512 ./...; else true; fi
|
||||
|
|
|
@ -21,7 +21,19 @@ func main() {
|
|||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
data = bytes.Replace(data, []byte("\t// #"), []byte("#"), -1)
|
||||
data = bytes.ReplaceAll(data, []byte("\t// #"), []byte("#"))
|
||||
data = bytes.ReplaceAll(data, []byte("\t// @"), []byte(""))
|
||||
data = bytes.ReplaceAll(data, []byte("VPTERNLOGQ"), []byte("XOR3WAY("))
|
||||
split := bytes.Split(data, []byte("\n"))
|
||||
// Add closing ')'
|
||||
want := []byte("\tXOR3WAY(")
|
||||
for i, b := range split {
|
||||
if bytes.Contains(b, want) {
|
||||
b = []byte(string(b) + ")")
|
||||
split[i] = b
|
||||
}
|
||||
}
|
||||
data = bytes.Join(split, []byte("\n"))
|
||||
data, err = asmfmt.Format(bytes.NewBuffer(data))
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
|
|
20
_gen/gen.go
20
_gen/gen.go
|
@ -38,6 +38,17 @@ func main() {
|
|||
Constraint(buildtags.Not("nogen").ToConstraint())
|
||||
Constraint(buildtags.Term("gc").ToConstraint())
|
||||
|
||||
TEXT("_dummy_", 0, "func()")
|
||||
Comment("#ifdef GOAMD64_v4")
|
||||
Comment("#define XOR3WAY(ignore, a, b, dst)\\")
|
||||
Comment("@\tVPTERNLOGD $0x96, a, b, dst")
|
||||
Comment("#else")
|
||||
Comment("#define XOR3WAY(ignore, a, b, dst)\\")
|
||||
Comment("@\tVPXOR a, dst, dst\\")
|
||||
Comment("@\tVPXOR b, dst, dst")
|
||||
Comment("#endif")
|
||||
RET()
|
||||
|
||||
const perLoopBits = 6
|
||||
const perLoop = 1 << perLoopBits
|
||||
|
||||
|
@ -123,13 +134,8 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
|
|||
|
||||
// VPXOR3way will 3-way xor a and b and dst.
|
||||
func VPXOR3way(a, b, dst reg.VecVirtual) {
|
||||
Comment("#ifdef GOAMD64_v4")
|
||||
// AVX512F and AVX512VL required
|
||||
VPTERNLOGD(U8(0x96), a, b, dst)
|
||||
Comment("#else")
|
||||
VPXOR(a, dst, dst) // dst = a^dst
|
||||
VPXOR(b, dst, dst) // dst = (a^dst)^b
|
||||
Comment("#endif")
|
||||
// VPTERNLOGQ is replaced by XOR3WAY - we just use an equivalent operation
|
||||
VPTERNLOGQ(U8(0), a, b, dst)
|
||||
}
|
||||
|
||||
func genMulAvx2(name string, inputs int, outputs int, xor bool) {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
package reedsolomon
|
||||
|
||||
func _dummy_()
|
||||
|
||||
// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
|
||||
// The output is initialized to 0.
|
||||
//go:noescape
|
||||
|
|
65114
galois_gen_amd64.s
65114
galois_gen_amd64.s
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue