Merge pull request #5368 from heyitsanthony/sshot-hash

v3rpc, etcdctl: snapshot integrity hash
release-3.0
Anthony Romano 2016-05-16 13:09:02 -07:00
commit f6e5fe6877
4 changed files with 99 additions and 2 deletions

View File

@ -18,6 +18,8 @@ $ etcdctl --endpoints $ENDPOINT snapshot save snapshot.db
To restore a cluster, all that is needed is a single snapshot "db" file. A cluster restore with `etcdctl snapshot restore` creates new etcd data directories; all members should restore using the same snapshot. Restoring overwrites some snapshot metadata (specifically, the member ID and cluster ID); the member loses its former identity. This metadata overwrite prevents the new member from inadvertently joining an existing cluster. Therefore in order to start a cluster from a snapshot, the restore must start a new logical cluster.
Snapshot integrity may be optionally verified at restore time. If the snapshot is taken with `etcdctl snapshot save`, it will have an integrity hash that is checked by `etcdctl snapshot restore`. If the snapshot is copied from the data directory, there is no integrity hash and it will only restore by using `--skip-hash-check`.
A restore initializes a new member of a new cluster, with a fresh cluster configuration using `etcd`'s cluster configuration flags, but preserves the contents of the etcd keyspace. Continuing from the previous example, the following creates new etcd data directories (`m1.etcd`, `m2.etcd`, `m3.etcd`) for a three member cluster:
```sh

View File

@ -52,6 +52,38 @@ func snapshotTest(cx ctlCtx) {
}
}
func TestCtlV3SnapshotCorrupt(t *testing.T) { testCtl(t, snapshotCorruptTest) }
func snapshotCorruptTest(cx ctlCtx) {
fpath := "test.snapshot"
defer os.RemoveAll(fpath)
if err := ctlV3SnapshotSave(cx, fpath); err != nil {
cx.t.Fatalf("snapshotTest ctlV3SnapshotSave error (%v)", err)
}
// corrupt file
f, oerr := os.OpenFile(fpath, os.O_WRONLY, 0)
if oerr != nil {
cx.t.Fatal(oerr)
}
if _, err := f.Write(make([]byte, 512)); err != nil {
cx.t.Fatal(err)
}
f.Close()
defer os.RemoveAll("snap.etcd")
serr := spawnWithExpect(
append(cx.PrefixArgs(), "snapshot", "restore",
"--data-dir", "snap.etcd",
fpath),
"expected sha256")
if serr != nil {
cx.t.Fatal(serr)
}
}
func ctlV3SnapshotSave(cx ctlCtx, fpath string) error {
cmdArgs := append(cx.PrefixArgs(), "snapshot", "save", fpath)
return spawnWithExpect(cmdArgs, fmt.Sprintf("Snapshot saved at %s", fpath))

View File

@ -15,6 +15,7 @@
package command
import (
"crypto/sha256"
"encoding/binary"
"encoding/json"
"fmt"
@ -22,6 +23,7 @@ import (
"io"
"os"
"path"
"reflect"
"strings"
"github.com/boltdb/bolt"
@ -50,6 +52,7 @@ var (
restoreDataDir string
restorePeerURLs string
restoreName string
skipHashCheck bool
)
// NewSnapshotCommand returns the cobra command for "snapshot".
@ -94,6 +97,7 @@ func NewSnapshotRestoreCommand() *cobra.Command {
cmd.Flags().StringVar(&restoreClusterToken, "initial-cluster-token", "etcd-cluster", "Initial cluster token for the etcd cluster during restore bootstrap.")
cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster.")
cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member.")
cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory).")
return cmd
}
@ -191,7 +195,7 @@ func initialClusterFromName(name string) string {
if name == "" {
n = defaultName
}
return fmt.Sprintf("%s=http://localhost:2380", n, n)
return fmt.Sprintf("%s=http://localhost:2380", n)
}
// makeWAL creates a WAL for the initial cluster
@ -261,18 +265,65 @@ func makeDB(snapdir, dbfile string) {
}
defer f.Close()
// get snapshot integrity hash
if _, err := f.Seek(-sha256.Size, os.SEEK_END); err != nil {
ExitWithError(ExitIO, err)
}
sha := make([]byte, sha256.Size)
if _, err := f.Read(sha); err != nil {
ExitWithError(ExitIO, err)
}
if _, err := f.Seek(0, os.SEEK_SET); err != nil {
ExitWithError(ExitIO, err)
}
if err := os.MkdirAll(snapdir, 0755); err != nil {
ExitWithError(ExitIO, err)
}
dbpath := path.Join(snapdir, "db")
db, dberr := os.OpenFile(dbpath, os.O_WRONLY|os.O_CREATE, 0600)
db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600)
if dberr != nil {
ExitWithError(ExitIO, dberr)
}
if _, err := io.Copy(db, f); err != nil {
ExitWithError(ExitIO, err)
}
// truncate away integrity hash, if any.
off, serr := db.Seek(0, os.SEEK_END)
if serr != nil {
ExitWithError(ExitIO, serr)
}
hasHash := (off % 512) == sha256.Size
if hasHash {
if err := db.Truncate(off - sha256.Size); err != nil {
ExitWithError(ExitIO, err)
}
}
if !hasHash && !skipHashCheck {
err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false")
ExitWithError(ExitBadArgs, err)
}
if hasHash && !skipHashCheck {
// check for match
if _, err := db.Seek(0, os.SEEK_SET); err != nil {
ExitWithError(ExitIO, err)
}
h := sha256.New()
if _, err := io.Copy(h, db); err != nil {
ExitWithError(ExitIO, err)
}
dbsha := h.Sum(nil)
if !reflect.DeepEqual(sha, dbsha) {
err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha)
ExitWithError(ExitInvalidInput, err)
}
}
// db hash is OK, can now modify DB so it can be part of a new cluster
db.Close()
// update consistentIndex so applies go through on etcdserver despite
@ -285,6 +336,7 @@ func makeDB(snapdir, dbfile string) {
_, _, err := s.TxnDeleteRange(id, k, nil)
return err
}
// delete stored members from old cluster since using new members
btx.UnsafeForEach([]byte("members"), del)
btx.UnsafeForEach([]byte("members_removed"), del)

View File

@ -15,6 +15,7 @@
package v3rpc
import (
"crypto/sha256"
"io"
"github.com/coreos/etcd/etcdserver"
@ -81,6 +82,8 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance
pw.Close()
}()
// send file data
h := sha256.New()
br := int64(0)
buf := make([]byte, 32*1024)
sz := snap.Size()
@ -97,6 +100,14 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance
if err = srv.Send(resp); err != nil {
return togRPCError(err)
}
h.Write(buf[:n])
}
// send sha
sha := h.Sum(nil)
hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha}
if err := srv.Send(hresp); err != nil {
return togRPCError(err)
}
return nil