From b933ef1add0b9ebb756b52a03ba5017b68c486e4 Mon Sep 17 00:00:00 2001
From: Vitaliy Filippov <vitalif@yourcmc.ru>
Date: Mon, 15 Aug 2022 14:27:17 +0300
Subject: [PATCH] Implement jerasure algorithm of matrix generation for
 interoperability

---
 options.go          | 17 +++++++++-
 reedsolomon.go      | 83 +++++++++++++++++++++++++++++++++++++++++++++
 reedsolomon_test.go | 29 ++++++++++++++++
 3 files changed, 128 insertions(+), 1 deletion(-)

diff --git a/options.go b/options.go
index 0a33397..ec79ec7 100644
--- a/options.go
+++ b/options.go
@@ -16,6 +16,7 @@ type options struct {
 	perRound      int
 
 	useAVX512, useAVX2, useSSSE3, useSSE2 bool
+	useJerasureMatrix                     bool
 	usePAR1Matrix                         bool
 	useCauchy                             bool
 	fastOneParity                         bool
@@ -163,12 +164,25 @@ func WithAVX512(enabled bool) Option {
 	}
 }
 
+// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
+// matrix in the same way as done by the Jerasure library.
+// The first row and column of the coding matrix only contains 1's in this method
+// so the first parity chunk is always equal to XOR of all data chunks.
+func WithJerasureMatrix() Option {
+	return func(o *options) {
+		o.useJerasureMatrix = true
+		o.usePAR1Matrix = false
+		o.useCauchy = false
+	}
+}
+
 // WithPAR1Matrix causes the encoder to build the matrix how PARv1
 // does. Note that the method they use is buggy, and may lead to cases
 // where recovery is impossible, even if there are enough parity
 // shards.
 func WithPAR1Matrix() Option {
 	return func(o *options) {
+		o.useJerasureMatrix = false
 		o.usePAR1Matrix = true
 		o.useCauchy = false
 	}
@@ -180,8 +194,9 @@ func WithPAR1Matrix() Option {
 // but will result in slightly faster start-up time.
 func WithCauchyMatrix() Option {
 	return func(o *options) {
-		o.useCauchy = true
+		o.useJerasureMatrix = false
 		o.usePAR1Matrix = false
+		o.useCauchy = true
 	}
 }
 
diff --git a/reedsolomon.go b/reedsolomon.go
index 3c8e03a..4f6afb6 100644
--- a/reedsolomon.go
+++ b/reedsolomon.go
@@ -191,6 +191,87 @@ func buildMatrix(dataShards, totalShards int) (matrix, error) {
 	return vm.Multiply(topInv)
 }
 
+// buildMatrixJerasure creates the same encoding matrix as Jerasure library
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
+	// Start with a Vandermonde matrix.  This matrix would work,
+	// in theory, but doesn't have the property that the data
+	// shards are unchanged after encoding.
+	vm, err := vandermonde(totalShards, dataShards)
+	if err != nil {
+		return nil, err
+	}
+
+	// Jerasure does this:
+	// first row is always 100..00
+	vm[0][0] = 1
+	for i := 1; i < dataShards; i++ {
+		vm[0][i] = 0
+	}
+	// last row is always 000..01
+	for i := 0; i < dataShards-1; i++ {
+		vm[totalShards-1][i] = 0
+	}
+	vm[totalShards-1][dataShards-1] = 1
+
+	for i := 0; i < dataShards; i++ {
+		// Find the row where i'th col is not 0
+		r := i
+		for ; r < totalShards && vm[r][i] == 0; r++ {
+		}
+		if r != i {
+			// Swap it with i'th row if not already
+			t := vm[r]
+			vm[r] = vm[i]
+			vm[i] = t
+		}
+		// Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
+		if vm[i][i] != 1 {
+			// Make vm[i][i] = 1 by dividing the column by vm[i][i]
+			tmp := galDivide(1, vm[i][i])
+			for j := 0; j < totalShards; j++ {
+				vm[j][i] = galMultiply(vm[j][i], tmp)
+			}
+		}
+		for j := 0; j < dataShards; j++ {
+			// Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
+			tmp := vm[i][j]
+			if j != i && tmp != 0 {
+				for r := 0; r < totalShards; r++ {
+					vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
+				}
+			}
+		}
+	}
+
+	// Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
+	for j := 0; j < dataShards; j++ {
+		tmp := vm[dataShards][j]
+		if tmp != 1 {
+			tmp = galDivide(1, tmp)
+			for i := dataShards; i < totalShards; i++ {
+				vm[i][j] = galMultiply(vm[i][j], tmp)
+			}
+		}
+	}
+
+	// Make vm[dataShards...totalShards-1][0] column all ones - divide each row
+	for i := dataShards + 1; i < totalShards; i++ {
+		tmp := vm[i][0]
+		if tmp != 1 {
+			tmp = galDivide(1, tmp)
+			for j := 0; j < dataShards; j++ {
+				vm[i][j] = galMultiply(vm[i][j], tmp)
+			}
+		}
+	}
+
+	return vm, nil
+}
+
 // buildMatrixPAR1 creates the matrix to use for encoding according to
 // the PARv1 spec, given the number of data shards and the number of
 // total shards. Note that the method they use is buggy, and may lead
@@ -323,6 +404,8 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
 		r.m, err = buildMatrixCauchy(dataShards, r.Shards)
 	case r.o.usePAR1Matrix:
 		r.m, err = buildMatrixPAR1(dataShards, r.Shards)
+	case r.o.useJerasureMatrix:
+		r.m, err = buildMatrixJerasure(dataShards, r.Shards)
 	default:
 		r.m, err = buildMatrix(dataShards, r.Shards)
 	}
diff --git a/reedsolomon_test.go b/reedsolomon_test.go
index 6004985..57cb0bd 100644
--- a/reedsolomon_test.go
+++ b/reedsolomon_test.go
@@ -110,6 +110,35 @@ func findSingularSubMatrix(m matrix) (matrix, error) {
 	return nil, nil
 }
 
+func TestBuildMatrixJerasure(t *testing.T) {
+	totalShards := 12
+	dataShards := 8
+	m, err := buildMatrixJerasure(dataShards, totalShards)
+	if err != nil {
+		t.Fatal(err)
+	}
+	refMatrix := matrix{
+		{1, 1, 1, 1, 1, 1, 1, 1},
+		{1, 55, 39, 73, 84, 181, 225, 217},
+		{1, 39, 217, 161, 92, 60, 172, 90},
+		{1, 172, 70, 235, 143, 34, 200, 101},
+	}
+	for i := 0; i < 8; i++ {
+		for j := 0; j < 8; j++ {
+			if i != j && m[i][j] != 0 || i == j && m[i][j] != 1 {
+				t.Fatal("Top part of the matrix is not identity")
+			}
+		}
+	}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 8; j++ {
+			if m[8+i][j] != refMatrix[i][j] {
+				t.Fatal("Coding matrix for EC 8+4 differs from Jerasure")
+			}
+		}
+	}
+}
+
 func TestBuildMatrixPAR1Singular(t *testing.T) {
 	totalShards := 8
 	dataShards := 4