Add streaming API examples.

master
klauspost 2015-10-27 13:56:36 +01:00
parent 7d91d8d953
commit 1d573f2541
3 changed files with 320 additions and 0 deletions

View File

@ -15,6 +15,17 @@ go build simple-decoder.go
go build simple-encoder.go
```
# Streamin API examples
There are streaming examples of the same functionality, which streams data instead of keeping it in memory.
To build the executables use:
```bash
go build stream-decoder.go
go build stream-encoder.go
```
## Shortcomings
* If the file size of the input isn't diviable by the number of data shards
the output will contain extra zeroes

167
examples/stream-decoder.go Normal file
View File

@ -0,0 +1,167 @@
//+build ignore
// Copyright 2015, Klaus Post, see LICENSE for details.
//
// Stream decoder example.
//
// The decoder reverses the process of "stream-encoder.go"
//
// To build an executable use:
//
// go build stream-decoder.go
//
// Simple Encoder/Decoder Shortcomings:
// * If the file size of the input isn't dividable by the number of data shards
// the output will contain extra zeroes
//
// * If the shard numbers isn't the same for the decoder as in the
// encoder, invalid output will be generated.
//
// * If values have changed in a shard, it cannot be reconstructed.
//
// * If two shards have been swapped, reconstruction will always fail.
// You need to supply the shards in the same order as they were given to you.
//
// The solution for this is to save a metadata file containing:
//
// * File size.
// * The number of data/parity shards.
// * HASH of each shard.
// * Order of the shards.
//
// If you save these properties, you should abe able to detect file corruption
// in a shard and be able to reconstruct your data if you have the needed number of shards left.
package main
import (
"flag"
"fmt"
"io"
"os"
"path/filepath"
"github.com/klauspost/reedsolomon"
)
var dataShards = flag.Int("data", 4, "Number of shards to split the data into")
var parShards = flag.Int("par", 2, "Number of parity shards")
var outFile = flag.String("out", "", "Alternative output path/file")
func init() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s [-flags] basefile.ext\nDo not add the number to the filename.\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Valid flags:\n")
flag.PrintDefaults()
}
}
func main() {
// Parse flags
flag.Parse()
args := flag.Args()
if len(args) != 1 {
fmt.Fprintf(os.Stderr, "Error: No filenames given\n")
flag.Usage()
os.Exit(1)
}
fname := args[0]
// Create matrix
enc, err := reedsolomon.NewStream(*dataShards, *parShards)
checkErr(err)
// Open the inputs
shards, size, err := openInput(*dataShards, *parShards, fname)
checkErr(err)
// Verify the shards
ok, err := enc.Verify(shards)
if ok {
fmt.Println("No reconstruction needed")
} else {
fmt.Println("Verification failed. Reconstructing data")
shards, size, err = openInput(*dataShards, *parShards, fname)
checkErr(err)
// Create out destination writers
out := make([]io.Writer, len(shards))
for i := range out {
if shards[i] == nil {
dir, _ := filepath.Split(fname)
outfn := fmt.Sprintf("%s.%d", fname, i)
fmt.Println("Creating", outfn)
out[i], err = os.Create(filepath.Join(dir, outfn))
checkErr(err)
}
}
err = enc.Reconstruct(shards, out)
if err != nil {
fmt.Println("Reconstruct failed -", err)
os.Exit(1)
}
// Close output.
for i := range out {
if out[i] != nil {
err := out[i].(*os.File).Close()
checkErr(err)
}
}
shards, size, err = openInput(*dataShards, *parShards, fname)
ok, err = enc.Verify(shards)
if !ok {
fmt.Println("Verification failed after reconstruction, data likely corrupted:", err)
os.Exit(1)
}
checkErr(err)
}
// Join the shards and write them
outfn := *outFile
if outfn == "" {
outfn = fname
}
fmt.Println("Writing data to", outfn)
f, err := os.Create(outfn)
checkErr(err)
shards, size, err = openInput(*dataShards, *parShards, fname)
checkErr(err)
// We don't know the exact filesize.
err = enc.Join(f, shards, int64(*dataShards)*size)
checkErr(err)
}
func openInput(dataShards, parShards int, fname string) (r []io.Reader, size int64, err error) {
// Create shards and load the data.
shards := make([]io.Reader, dataShards+parShards)
for i := range shards {
infn := fmt.Sprintf("%s.%d", fname, i)
fmt.Println("Opening", infn)
f, err := os.Open(infn)
if err != nil {
fmt.Println("Error reading file", err)
shards[i] = nil
continue
} else {
shards[i] = f
}
stat, err := f.Stat()
checkErr(err)
if stat.Size() > 0 {
size = stat.Size()
} else {
shards[i] = nil
}
}
return shards, size, nil
}
func checkErr(err error) {
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
os.Exit(2)
}
}

142
examples/stream-encoder.go Normal file
View File

@ -0,0 +1,142 @@
//+build ignore
// Copyright 2015, Klaus Post, see LICENSE for details.
//
// Simple stream encoder example
//
// The encoder encodes a single file into a number of shards
// To reverse the process see "stream-decoder.go"
//
// To build an executable use:
//
// go build stream-encoder.go
//
// Simple Encoder/Decoder Shortcomings:
// * If the file size of the input isn't dividable by the number of data shards
// the output will contain extra zeroes
//
// * If the shard numbers isn't the same for the decoder as in the
// encoder, invalid output will be generated.
//
// * If values have changed in a shard, it cannot be reconstructed.
//
// * If two shards have been swapped, reconstruction will always fail.
// You need to supply the shards in the same order as they were given to you.
//
// The solution for this is to save a metadata file containing:
//
// * File size.
// * The number of data/parity shards.
// * HASH of each shard.
// * Order of the shards.
//
// If you save these properties, you should abe able to detect file corruption
// in a shard and be able to reconstruct your data if you have the needed number of shards left.
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
"io"
"github.com/klauspost/reedsolomon"
)
var dataShards = flag.Int("data", 4, "Number of shards to split the data into, must be below 257.")
var parShards = flag.Int("par", 2, "Number of parity shards")
var outDir = flag.String("out", "", "Alternative output directory")
func init() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s [-flags] filename.ext\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Valid flags:\n")
flag.PrintDefaults()
}
}
func main() {
// Parse command line parameters.
flag.Parse()
args := flag.Args()
if len(args) != 1 {
fmt.Fprintf(os.Stderr, "Error: No input filename given\n")
flag.Usage()
os.Exit(1)
}
if *dataShards > 257 {
fmt.Fprintf(os.Stderr, "Error: Too many data shards\n")
os.Exit(1)
}
fname := args[0]
// Create encoding matrix.
enc, err := reedsolomon.NewStream(*dataShards, *parShards)
checkErr(err)
fmt.Println("Opening", fname)
f, err := os.Open(fname)
checkErr(err)
instat, err := f.Stat()
checkErr(err)
shards := *dataShards + *parShards
out := make([]*os.File, shards)
// Create the resulting files.
dir, file := filepath.Split(fname)
if *outDir != "" {
dir = *outDir
}
for i := range out {
outfn := fmt.Sprintf("%s.%d", file, i)
fmt.Println("Creating", outfn)
out[i], err = os.Create(filepath.Join(dir, outfn))
checkErr(err)
}
// Split into files.
data := make([]io.Writer, *dataShards)
for i := range data {
data[i] = out[i]
}
// Do the split
err = enc.Split(f, data, instat.Size())
checkErr(err)
// Close and re-open the files.
input := make([]io.Reader, *dataShards)
for i := range data {
out[i].Close()
f, err := os.Open(out[i].Name())
checkErr(err)
input[i] = f
defer f.Close()
}
// Create parity output writers
parity := make([]io.Writer, *parShards)
for i := range parity {
parity[i] = out[*dataShards+i]
defer out[*dataShards+i].Close()
}
// Encode parity
err = enc.Encode(input, parity)
checkErr(err)
fmt.Printf("File split into %d data + %d parity shards.\n", *dataShards, *parShards)
}
func checkErr(err error) {
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
os.Exit(2)
}
}