From cb7a0b5aef66fe985815e7ece52a934d8d354f19 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 6 May 2020 11:14:25 +0200 Subject: [PATCH] Do fast by one multiplication (#130) When multiplying by one we can use faster math. --- galois_amd64.go | 13 +++++++++++-- galois_arm64.go | 10 +++++++++- galois_noasm.go | 16 +++++++++++++--- galois_ppc64le.go | 10 +++++++++- galois_test.go | 6 ++++-- reedsolomon.go | 4 ++-- reedsolomon_test.go | 5 ++++- 7 files changed, 52 insertions(+), 12 deletions(-) diff --git a/galois_amd64.go b/galois_amd64.go index bf7faca..f757f9d 100644 --- a/galois_amd64.go +++ b/galois_amd64.go @@ -53,6 +53,10 @@ func galMulSSSE3Xor(low, high, in, out []byte) { const bigSwitchover = 128 func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } if o.useAVX2 { if len(in) >= bigSwitchover { galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) @@ -80,6 +84,11 @@ func galMulSlice(c byte, in, out []byte, o *options) { } func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } + if o.useAVX2 { if len(in) >= bigSwitchover { galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) @@ -107,8 +116,8 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } // slice galois add -func sliceXor(in, out []byte, sse2 bool) { - if sse2 { +func sliceXor(in, out []byte, o *options) { + if o.useSSE2 { if len(in) >= bigSwitchover { sSE2XorSlice_64(in, out) done := (len(in) >> 6) << 6 diff --git a/galois_arm64.go b/galois_arm64.go index a85ef85..898d112 100644 --- a/galois_arm64.go +++ b/galois_arm64.go @@ -14,6 +14,10 @@ func galMulNEON(c uint64, in, out []byte) func galMulXorNEON(c uint64, in, out []byte) func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } var done int galMulNEON(uint64(c), in, out) done = (len(in) >> 5) << 5 @@ -28,6 +32,10 @@ func galMulSlice(c byte, in, out []byte, o *options) { } func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } var done int galMulXorNEON(uint64(c), in, out) done = (len(in) >> 5) << 5 @@ -42,7 +50,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } // slice galois add -func sliceXor(in, out []byte, sse2 bool) { +func sliceXor(in, out []byte, o *options) { for n, input := range in { out[n] ^= input } diff --git a/galois_noasm.go b/galois_noasm.go index 7f88de2..1d00e06 100644 --- a/galois_noasm.go +++ b/galois_noasm.go @@ -7,23 +7,33 @@ package reedsolomon func galMulSlice(c byte, in, out []byte, o *options) { - mt := mulTable[c][:256] out = out[:len(in)] + if c == 1 { + copy(out, in) + return + } + mt := mulTable[c][:256] for n, input := range in { out[n] = mt[input] } } func galMulSliceXor(c byte, in, out []byte, o *options) { - mt := mulTable[c][:256] out = out[:len(in)] + if c == 1 { + for n, input := range in { + out[n] ^= input + } + return + } + mt := mulTable[c][:256] for n, input := range in { out[n] ^= mt[input] } } // slice galois add -func sliceXor(in, out []byte, sse2 bool) { +func sliceXor(in, out []byte, o *options) { for n, input := range in { out[n] ^= input } diff --git a/galois_ppc64le.go b/galois_ppc64le.go index bd5dc0b..70f93d6 100644 --- a/galois_ppc64le.go +++ b/galois_ppc64le.go @@ -32,6 +32,10 @@ func galMulPpcXor(low, high, in, out []byte) { */ func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } done := (len(in) >> 4) << 4 if done > 0 { galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) @@ -46,6 +50,10 @@ func galMulSlice(c byte, in, out []byte, o *options) { } func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } done := (len(in) >> 4) << 4 if done > 0 { galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) @@ -60,7 +68,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } // slice galois add -func sliceXor(in, out []byte, sse2 bool) { +func sliceXor(in, out []byte, o *options) { for n, input := range in { out[n] ^= input } diff --git a/galois_test.go b/galois_test.go index 1e1a5d1..c7889d4 100644 --- a/galois_test.go +++ b/galois_test.go @@ -190,7 +190,9 @@ func TestSliceGalAdd(t *testing.T) { for i := range expect { expect[i] = in[i] ^ out[i] } - sliceXor(in, out, false) + noSSE2 := defaultOptions + noSSE2.useSSE2 = false + sliceXor(in, out, &noSSE2) if 0 != bytes.Compare(out, expect) { t.Errorf("got %#v, expected %#v", out, expect) } @@ -198,7 +200,7 @@ func TestSliceGalAdd(t *testing.T) { for i := range expect { expect[i] = in[i] ^ out[i] } - sliceXor(in, out, true) + sliceXor(in, out, &defaultOptions) if 0 != bytes.Compare(out, expect) { t.Errorf("got %#v, expected %#v", out, expect) } diff --git a/reedsolomon.go b/reedsolomon.go index 9bfbb26..b2d1145 100644 --- a/reedsolomon.go +++ b/reedsolomon.go @@ -392,7 +392,7 @@ func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, output } oldin := oldinputs[c] // oldinputs data will be change - sliceXor(in, oldin, r.o.useSSE2) + sliceXor(in, oldin, &r.o) for iRow := 0; iRow < outputCount; iRow++ { galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o) } @@ -419,7 +419,7 @@ func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outpu } oldin := oldinputs[c] // oldinputs data will be change - sliceXor(in[start:stop], oldin[start:stop], r.o.useSSE2) + sliceXor(in[start:stop], oldin[start:stop], &r.o) for iRow := 0; iRow < outputCount; iRow++ { galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o) } diff --git a/reedsolomon_test.go b/reedsolomon_test.go index a39d379..cf6daea 100644 --- a/reedsolomon_test.go +++ b/reedsolomon_test.go @@ -1171,7 +1171,10 @@ func TestCodeSomeShards(t *testing.T) { } func TestStandardMatrices(t *testing.T) { - t.Skip("Skipping slow matrix check (~2 min)") + if testing.Short() || runtime.GOMAXPROCS(0) < 4 { + // Runtime ~15s. + t.Skip("Skipping slow matrix check") + } var wg sync.WaitGroup wg.Add(256 - 1) for i := 1; i < 256; i++ {