From 4ddcc36057a42cf7c53a9cbe7bbbc8e5ea36f70b Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 14 May 2020 18:39:42 -0700 Subject: [PATCH] etcdserver/api/v3rpc: document, clean up snapshot sends Signed-off-by: Gyuho Lee --- etcdserver/api/v3rpc/maintenance.go | 54 ++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/etcdserver/api/v3rpc/maintenance.go b/etcdserver/api/v3rpc/maintenance.go index da0043206..076f7dad6 100644 --- a/etcdserver/api/v3rpc/maintenance.go +++ b/etcdserver/api/v3rpc/maintenance.go @@ -18,7 +18,9 @@ import ( "context" "crypto/sha256" "io" + "time" + "github.com/dustin/go-humanize" "go.etcd.io/etcd/v3/auth" "go.etcd.io/etcd/v3/etcdserver" "go.etcd.io/etcd/v3/etcdserver/api/v3rpc/rpctypes" @@ -94,6 +96,9 @@ func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRe return &pb.DefragmentResponse{}, nil } +// big enough size to hold >1 OS pages in the buffer +const snapshotSendBufferSize = 32 * 1024 + func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error { snap := ms.bg.Backend().Snapshot() pr, pw := io.Pipe() @@ -108,19 +113,41 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance pw.Close() }() - // send file data + // record SHA digest of snapshot data + // used for integrity checks during snapshot restore operation h := sha256.New() - br := int64(0) - buf := make([]byte, 32*1024) - sz := snap.Size() - for br < sz { + + // buffer just holds read bytes from stream + // response size is multiple of OS page size, fetched in boltdb + // e.g. 4*1024 + buf := make([]byte, snapshotSendBufferSize) + + sent := int64(0) + total := snap.Size() + size := humanize.Bytes(uint64(total)) + + start := time.Now() + ms.lg.Info("sending database snapshot to client", + zap.Int64("total-bytes", total), + zap.String("size", size), + ) + for total-sent > 0 { n, err := io.ReadFull(pr, buf) if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { return togRPCError(err) } - br += int64(n) + sent += int64(n) + + // if total is x * snapshotSendBufferSize. it is possible that + // resp.RemainingBytes == 0 + // resp.Blob == zero byte but not nil + // does this make server response sent to client nil in proto + // and client stops receiving from snapshot stream before + // server sends snapshot SHA? + // No, the client will still receive non-nil response + // until server closes the stream with EOF resp := &pb.SnapshotResponse{ - RemainingBytes: uint64(sz - br), + RemainingBytes: uint64(total - sent), Blob: buf[:n], } if err = srv.Send(resp); err != nil { @@ -129,13 +156,24 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance h.Write(buf[:n]) } - // send sha + // send SHA digest for integrity checks + // during snapshot restore operation sha := h.Sum(nil) + + ms.lg.Info("sending database sha256 checksum to client", + zap.Int64("total-bytes", total), + zap.Int("checksum-size", len(sha)), + ) hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha} if err := srv.Send(hresp); err != nil { return togRPCError(err) } + ms.lg.Info("successfully sent database snapshot to client", + zap.Int64("total-bytes", total), + zap.String("size", size), + zap.String("took", humanize.Time(start)), + ) return nil }