rafthttp: support structured logger

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
release-3.4
Gyuho Lee 2018-04-16 03:58:46 -07:00
parent fdbedacc83
commit c68f625353
15 changed files with 748 additions and 132 deletions

View File

@ -28,6 +28,8 @@ import (
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/raftsnap" "github.com/coreos/etcd/raftsnap"
"github.com/coreos/etcd/version" "github.com/coreos/etcd/version"
"go.uber.org/zap"
) )
const ( const (
@ -59,6 +61,8 @@ type writerToResponse interface {
} }
type pipelineHandler struct { type pipelineHandler struct {
lg *zap.Logger
localID types.ID
tr Transporter tr Transporter
r Raft r Raft
cid types.ID cid types.ID
@ -69,9 +73,11 @@ type pipelineHandler struct {
// //
// The handler reads out the raft message from request body, // The handler reads out the raft message from request body,
// and forwards it to the given raft state machine for processing. // and forwards it to the given raft state machine for processing.
func newPipelineHandler(tr Transporter, r Raft, cid types.ID) http.Handler { func newPipelineHandler(t *Transport, r Raft, cid types.ID) http.Handler {
return &pipelineHandler{ return &pipelineHandler{
tr: tr, lg: t.Logger,
localID: t.ID,
tr: t,
r: r, r: r,
cid: cid, cid: cid,
} }
@ -86,7 +92,7 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { if err := checkClusterCompatibilityFromHeader(h.lg, h.localID, r.Header, h.cid); err != nil {
http.Error(w, err.Error(), http.StatusPreconditionFailed) http.Error(w, err.Error(), http.StatusPreconditionFailed)
return return
} }
@ -98,7 +104,15 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
limitedr := pioutil.NewLimitedBufferReader(r.Body, connReadLimitByte) limitedr := pioutil.NewLimitedBufferReader(r.Body, connReadLimitByte)
b, err := ioutil.ReadAll(limitedr) b, err := ioutil.ReadAll(limitedr)
if err != nil { if err != nil {
if h.lg != nil {
h.lg.Warn(
"failed to read Raft message",
zap.String("local-member-id", h.localID.String()),
zap.Error(err),
)
} else {
plog.Errorf("failed to read raft message (%v)", err) plog.Errorf("failed to read raft message (%v)", err)
}
http.Error(w, "error reading raft message", http.StatusBadRequest) http.Error(w, "error reading raft message", http.StatusBadRequest)
recvFailures.WithLabelValues(r.RemoteAddr).Inc() recvFailures.WithLabelValues(r.RemoteAddr).Inc()
return return
@ -106,7 +120,15 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var m raftpb.Message var m raftpb.Message
if err := m.Unmarshal(b); err != nil { if err := m.Unmarshal(b); err != nil {
if h.lg != nil {
h.lg.Warn(
"failed to unmarshal Raft message",
zap.String("local-member-id", h.localID.String()),
zap.Error(err),
)
} else {
plog.Errorf("failed to unmarshal raft message (%v)", err) plog.Errorf("failed to unmarshal raft message (%v)", err)
}
http.Error(w, "error unmarshaling raft message", http.StatusBadRequest) http.Error(w, "error unmarshaling raft message", http.StatusBadRequest)
recvFailures.WithLabelValues(r.RemoteAddr).Inc() recvFailures.WithLabelValues(r.RemoteAddr).Inc()
return return
@ -119,7 +141,15 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
case writerToResponse: case writerToResponse:
v.WriteTo(w) v.WriteTo(w)
default: default:
if h.lg != nil {
h.lg.Warn(
"failed to process Raft message",
zap.String("local-member-id", h.localID.String()),
zap.Error(err),
)
} else {
plog.Warningf("failed to process raft message (%v)", err) plog.Warningf("failed to process raft message (%v)", err)
}
http.Error(w, "error processing raft message", http.StatusInternalServerError) http.Error(w, "error processing raft message", http.StatusInternalServerError)
w.(http.Flusher).Flush() w.(http.Flusher).Flush()
// disconnect the http stream // disconnect the http stream
@ -134,17 +164,22 @@ func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
} }
type snapshotHandler struct { type snapshotHandler struct {
lg *zap.Logger
tr Transporter tr Transporter
r Raft r Raft
snapshotter *raftsnap.Snapshotter snapshotter *raftsnap.Snapshotter
localID types.ID
cid types.ID cid types.ID
} }
func newSnapshotHandler(tr Transporter, r Raft, snapshotter *raftsnap.Snapshotter, cid types.ID) http.Handler { func newSnapshotHandler(t *Transport, r Raft, snapshotter *raftsnap.Snapshotter, cid types.ID) http.Handler {
return &snapshotHandler{ return &snapshotHandler{
tr: tr, lg: t.Logger,
tr: t,
r: r, r: r,
snapshotter: snapshotter, snapshotter: snapshotter,
localID: t.ID,
cid: cid, cid: cid,
} }
} }
@ -167,7 +202,7 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { if err := checkClusterCompatibilityFromHeader(h.lg, h.localID, r.Header, h.cid); err != nil {
http.Error(w, err.Error(), http.StatusPreconditionFailed) http.Error(w, err.Error(), http.StatusPreconditionFailed)
return return
} }
@ -179,7 +214,16 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
m, err := dec.decodeLimit(uint64(1 << 63)) m, err := dec.decodeLimit(uint64(1 << 63))
if err != nil { if err != nil {
msg := fmt.Sprintf("failed to decode raft message (%v)", err) msg := fmt.Sprintf("failed to decode raft message (%v)", err)
plog.Errorf(msg) if h.lg != nil {
h.lg.Warn(
"failed to decode Raft message",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.Error(err),
)
} else {
plog.Error(msg)
}
http.Error(w, msg, http.StatusBadRequest) http.Error(w, msg, http.StatusBadRequest)
recvFailures.WithLabelValues(r.RemoteAddr).Inc() recvFailures.WithLabelValues(r.RemoteAddr).Inc()
return return
@ -188,22 +232,61 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size())) receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size()))
if m.Type != raftpb.MsgSnap { if m.Type != raftpb.MsgSnap {
if h.lg != nil {
h.lg.Warn(
"unexpected Raft message type",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.String("message-type", m.Type.String()),
)
} else {
plog.Errorf("unexpected raft message type %s on snapshot path", m.Type) plog.Errorf("unexpected raft message type %s on snapshot path", m.Type)
}
http.Error(w, "wrong raft message type", http.StatusBadRequest) http.Error(w, "wrong raft message type", http.StatusBadRequest)
return return
} }
if h.lg != nil {
h.lg.Info(
"receiving database snapshot",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
)
} else {
plog.Infof("receiving database snapshot [index:%d, from %s] ...", m.Snapshot.Metadata.Index, types.ID(m.From)) plog.Infof("receiving database snapshot [index:%d, from %s] ...", m.Snapshot.Metadata.Index, types.ID(m.From))
}
// save incoming database snapshot. // save incoming database snapshot.
n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index) n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index)
if err != nil { if err != nil {
msg := fmt.Sprintf("failed to save KV snapshot (%v)", err) msg := fmt.Sprintf("failed to save KV snapshot (%v)", err)
if h.lg != nil {
h.lg.Warn(
"failed to save KV snapshot",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.Error(err),
)
} else {
plog.Error(msg) plog.Error(msg)
}
http.Error(w, msg, http.StatusInternalServerError) http.Error(w, msg, http.StatusInternalServerError)
return return
} }
receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(n)) receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(n))
if h.lg != nil {
h.lg.Info(
"received and saved database snapshot",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
)
} else {
plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From)) plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From))
}
if err := h.r.Process(context.TODO(), m); err != nil { if err := h.r.Process(context.TODO(), m); err != nil {
switch v := err.(type) { switch v := err.(type) {
@ -213,17 +296,28 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
v.WriteTo(w) v.WriteTo(w)
default: default:
msg := fmt.Sprintf("failed to process raft message (%v)", err) msg := fmt.Sprintf("failed to process raft message (%v)", err)
plog.Warningf(msg) if h.lg != nil {
h.lg.Warn(
"failed to process Raft message",
zap.String("local-member-id", h.localID.String()),
zap.String("remote-snapshot-sender-id", types.ID(m.From).String()),
zap.Error(err),
)
} else {
plog.Error(msg)
}
http.Error(w, msg, http.StatusInternalServerError) http.Error(w, msg, http.StatusInternalServerError)
} }
return return
} }
// Write StatusNoContent header after the message has been processed by // Write StatusNoContent header after the message has been processed by
// raft, which facilitates the client to report MsgSnap status. // raft, which facilitates the client to report MsgSnap status.
w.WriteHeader(http.StatusNoContent) w.WriteHeader(http.StatusNoContent)
} }
type streamHandler struct { type streamHandler struct {
lg *zap.Logger
tr *Transport tr *Transport
peerGetter peerGetter peerGetter peerGetter
r Raft r Raft
@ -231,9 +325,10 @@ type streamHandler struct {
cid types.ID cid types.ID
} }
func newStreamHandler(tr *Transport, pg peerGetter, r Raft, id, cid types.ID) http.Handler { func newStreamHandler(t *Transport, pg peerGetter, r Raft, id, cid types.ID) http.Handler {
return &streamHandler{ return &streamHandler{
tr: tr, lg: t.Logger,
tr: t,
peerGetter: pg, peerGetter: pg,
r: r, r: r,
id: id, id: id,
@ -251,7 +346,7 @@ func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Server-Version", version.Version) w.Header().Set("X-Server-Version", version.Version)
w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { if err := checkClusterCompatibilityFromHeader(h.lg, h.tr.ID, r.Header, h.cid); err != nil {
http.Error(w, err.Error(), http.StatusPreconditionFailed) http.Error(w, err.Error(), http.StatusPreconditionFailed)
return return
} }
@ -263,7 +358,16 @@ func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
case streamTypeMessage.endpoint(): case streamTypeMessage.endpoint():
t = streamTypeMessage t = streamTypeMessage
default: default:
if h.lg != nil {
h.lg.Debug(
"ignored unexpected streaming request path",
zap.String("local-member-id", h.tr.ID.String()),
zap.String("remote-peer-id-stream-handler", h.id.String()),
zap.String("path", r.URL.Path),
)
} else {
plog.Debugf("ignored unexpected streaming request path %s", r.URL.Path) plog.Debugf("ignored unexpected streaming request path %s", r.URL.Path)
}
http.Error(w, "invalid path", http.StatusNotFound) http.Error(w, "invalid path", http.StatusNotFound)
return return
} }
@ -271,12 +375,31 @@ func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fromStr := path.Base(r.URL.Path) fromStr := path.Base(r.URL.Path)
from, err := types.IDFromString(fromStr) from, err := types.IDFromString(fromStr)
if err != nil { if err != nil {
if h.lg != nil {
h.lg.Warn(
"failed to parse path into ID",
zap.String("local-member-id", h.tr.ID.String()),
zap.String("remote-peer-id-stream-handler", h.id.String()),
zap.String("path", fromStr),
zap.Error(err),
)
} else {
plog.Errorf("failed to parse from %s into ID (%v)", fromStr, err) plog.Errorf("failed to parse from %s into ID (%v)", fromStr, err)
}
http.Error(w, "invalid from", http.StatusNotFound) http.Error(w, "invalid from", http.StatusNotFound)
return return
} }
if h.r.IsIDRemoved(uint64(from)) { if h.r.IsIDRemoved(uint64(from)) {
if h.lg != nil {
h.lg.Warn(
"rejected stream from remote peer because it was removed",
zap.String("local-member-id", h.tr.ID.String()),
zap.String("remote-peer-id-stream-handler", h.id.String()),
zap.String("remote-peer-id-from", from.String()),
)
} else {
plog.Warningf("rejected the stream from peer %s since it was removed", from) plog.Warningf("rejected the stream from peer %s since it was removed", from)
}
http.Error(w, "removed member", http.StatusGone) http.Error(w, "removed member", http.StatusGone)
return return
} }
@ -290,14 +413,35 @@ func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if urls := r.Header.Get("X-PeerURLs"); urls != "" { if urls := r.Header.Get("X-PeerURLs"); urls != "" {
h.tr.AddRemote(from, strings.Split(urls, ",")) h.tr.AddRemote(from, strings.Split(urls, ","))
} }
if h.lg != nil {
h.lg.Warn(
"failed to find remote peer in cluster",
zap.String("local-member-id", h.tr.ID.String()),
zap.String("remote-peer-id-stream-handler", h.id.String()),
zap.String("remote-peer-id-from", from.String()),
zap.String("cluster-id", h.cid.String()),
)
} else {
plog.Errorf("failed to find member %s in cluster %s", from, h.cid) plog.Errorf("failed to find member %s in cluster %s", from, h.cid)
}
http.Error(w, "error sender not found", http.StatusNotFound) http.Error(w, "error sender not found", http.StatusNotFound)
return return
} }
wto := h.id.String() wto := h.id.String()
if gto := r.Header.Get("X-Raft-To"); gto != wto { if gto := r.Header.Get("X-Raft-To"); gto != wto {
if h.lg != nil {
h.lg.Warn(
"ignored streaming request; ID mismatch",
zap.String("local-member-id", h.tr.ID.String()),
zap.String("remote-peer-id-stream-handler", h.id.String()),
zap.String("remote-peer-id-header", gto),
zap.String("remote-peer-id-from", from.String()),
zap.String("cluster-id", h.cid.String()),
)
} else {
plog.Errorf("streaming request ignored (ID mismatch got %s want %s)", gto, wto) plog.Errorf("streaming request ignored (ID mismatch got %s want %s)", gto, wto)
}
http.Error(w, "to field mismatch", http.StatusPreconditionFailed) http.Error(w, "to field mismatch", http.StatusPreconditionFailed)
return return
} }
@ -321,13 +465,66 @@ func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// It checks whether the version of local member is compatible with // It checks whether the version of local member is compatible with
// the versions in the header, and whether the cluster ID of local member // the versions in the header, and whether the cluster ID of local member
// matches the one in the header. // matches the one in the header.
func checkClusterCompatibilityFromHeader(header http.Header, cid types.ID) error { func checkClusterCompatibilityFromHeader(lg *zap.Logger, localID types.ID, header http.Header, cid types.ID) error {
if err := checkVersionCompability(header.Get("X-Server-From"), serverVersion(header), minClusterVersion(header)); err != nil { remoteName := header.Get("X-Server-From")
remoteServer := serverVersion(header)
remoteVs := ""
if remoteServer != nil {
remoteVs = remoteServer.String()
}
remoteMinClusterVer := minClusterVersion(header)
remoteMinClusterVs := ""
if remoteMinClusterVer != nil {
remoteMinClusterVs = remoteMinClusterVer.String()
}
localServer, localMinCluster, err := checkVersionCompatibility(remoteName, remoteServer, remoteMinClusterVer)
localVs := ""
if localServer != nil {
localVs = localServer.String()
}
localMinClusterVs := ""
if localMinCluster != nil {
localMinClusterVs = localMinCluster.String()
}
if err != nil {
if lg != nil {
lg.Warn(
"failed to check version compatibility",
zap.String("local-member-id", localID.String()),
zap.String("local-member-cluster-id", cid.String()),
zap.String("local-member-server-version", localVs),
zap.String("local-member-server-minimum-cluster-version", localMinClusterVs),
zap.String("remote-peer-server-name", remoteName),
zap.String("remote-peer-server-version", remoteVs),
zap.String("remote-peer-server-minimum-cluster-version", remoteMinClusterVs),
zap.Error(err),
)
} else {
plog.Errorf("request version incompatibility (%v)", err) plog.Errorf("request version incompatibility (%v)", err)
}
return errIncompatibleVersion return errIncompatibleVersion
} }
if gcid := header.Get("X-Etcd-Cluster-ID"); gcid != cid.String() { if gcid := header.Get("X-Etcd-Cluster-ID"); gcid != cid.String() {
if lg != nil {
lg.Warn(
"request cluster ID mismatch",
zap.String("local-member-id", localID.String()),
zap.String("local-member-cluster-id", cid.String()),
zap.String("local-member-server-version", localVs),
zap.String("local-member-server-minimum-cluster-version", localMinClusterVs),
zap.String("remote-peer-server-name", remoteName),
zap.String("remote-peer-server-version", remoteVs),
zap.String("remote-peer-server-minimum-cluster-version", remoteMinClusterVs),
zap.String("remote-peer-cluster-id", gcid),
)
} else {
plog.Errorf("request cluster ID mismatch (got %s want %s)", gcid, cid) plog.Errorf("request cluster ID mismatch (got %s want %s)", gcid, cid)
}
return errClusterIDMismatch return errClusterIDMismatch
} }
return nil return nil

View File

@ -31,6 +31,8 @@ import (
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/raftsnap" "github.com/coreos/etcd/raftsnap"
"github.com/coreos/etcd/version" "github.com/coreos/etcd/version"
"go.uber.org/zap"
) )
func TestServeRaftPrefix(t *testing.T) { func TestServeRaftPrefix(t *testing.T) {
@ -151,7 +153,7 @@ func TestServeRaftPrefix(t *testing.T) {
req.Header.Set("X-Etcd-Cluster-ID", tt.clusterID) req.Header.Set("X-Etcd-Cluster-ID", tt.clusterID)
req.Header.Set("X-Server-Version", version.Version) req.Header.Set("X-Server-Version", version.Version)
rw := httptest.NewRecorder() rw := httptest.NewRecorder()
h := newPipelineHandler(NewNopTransporter(), tt.p, types.ID(0)) h := newPipelineHandler(&Transport{Logger: zap.NewExample()}, tt.p, types.ID(0))
// goroutine because the handler panics to disconnect on raft error // goroutine because the handler panics to disconnect on raft error
donec := make(chan struct{}) donec := make(chan struct{})

View File

@ -25,6 +25,7 @@ import (
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/raftsnap" "github.com/coreos/etcd/raftsnap"
"go.uber.org/zap"
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
@ -93,8 +94,12 @@ type Peer interface {
// A pipeline is a series of http clients that send http requests to the remote. // A pipeline is a series of http clients that send http requests to the remote.
// It is only used when the stream has not been established. // It is only used when the stream has not been established.
type peer struct { type peer struct {
lg *zap.Logger
localID types.ID
// id of the remote raft peer node // id of the remote raft peer node
id types.ID id types.ID
r Raft r Raft
status *peerStatus status *peerStatus
@ -118,17 +123,27 @@ type peer struct {
stopc chan struct{} stopc chan struct{}
} }
func startPeer(transport *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer { func startPeer(t *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer {
if t.Logger != nil {
t.Logger.Info("starting remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("starting peer %s...", peerID) plog.Infof("starting peer %s...", peerID)
defer plog.Infof("started peer %s", peerID) }
defer func() {
if t.Logger != nil {
t.Logger.Info("started remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("started peer %s", peerID)
}
}()
status := newPeerStatus(peerID) status := newPeerStatus(t.Logger, peerID)
picker := newURLPicker(urls) picker := newURLPicker(urls)
errorc := transport.ErrorC errorc := t.ErrorC
r := transport.Raft r := t.Raft
pipeline := &pipeline{ pipeline := &pipeline{
peerID: peerID, peerID: peerID,
tr: transport, tr: t,
picker: picker, picker: picker,
status: status, status: status,
followerStats: fs, followerStats: fs,
@ -138,14 +153,16 @@ func startPeer(transport *Transport, urls types.URLs, peerID types.ID, fs *stats
pipeline.start() pipeline.start()
p := &peer{ p := &peer{
lg: t.Logger,
localID: t.ID,
id: peerID, id: peerID,
r: r, r: r,
status: status, status: status,
picker: picker, picker: picker,
msgAppV2Writer: startStreamWriter(peerID, status, fs, r), msgAppV2Writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
writer: startStreamWriter(peerID, status, fs, r), writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
pipeline: pipeline, pipeline: pipeline,
snapSender: newSnapshotSender(transport, picker, peerID, status), snapSender: newSnapshotSender(t, picker, peerID, status),
recvc: make(chan raftpb.Message, recvBufSize), recvc: make(chan raftpb.Message, recvBufSize),
propc: make(chan raftpb.Message, maxPendingProposals), propc: make(chan raftpb.Message, maxPendingProposals),
stopc: make(chan struct{}), stopc: make(chan struct{}),
@ -158,8 +175,12 @@ func startPeer(transport *Transport, urls types.URLs, peerID types.ID, fs *stats
select { select {
case mm := <-p.recvc: case mm := <-p.recvc:
if err := r.Process(ctx, mm); err != nil { if err := r.Process(ctx, mm); err != nil {
if t.Logger != nil {
t.Logger.Warn("failed to process Raft message", zap.Error(err))
} else {
plog.Warningf("failed to process raft message (%v)", err) plog.Warningf("failed to process raft message (%v)", err)
} }
}
case <-p.stopc: case <-p.stopc:
return return
} }
@ -183,24 +204,26 @@ func startPeer(transport *Transport, urls types.URLs, peerID types.ID, fs *stats
}() }()
p.msgAppV2Reader = &streamReader{ p.msgAppV2Reader = &streamReader{
lg: t.Logger,
peerID: peerID, peerID: peerID,
typ: streamTypeMsgAppV2, typ: streamTypeMsgAppV2,
tr: transport, tr: t,
picker: picker, picker: picker,
status: status, status: status,
recvc: p.recvc, recvc: p.recvc,
propc: p.propc, propc: p.propc,
rl: rate.NewLimiter(transport.DialRetryFrequency, 1), rl: rate.NewLimiter(t.DialRetryFrequency, 1),
} }
p.msgAppReader = &streamReader{ p.msgAppReader = &streamReader{
lg: t.Logger,
peerID: peerID, peerID: peerID,
typ: streamTypeMessage, typ: streamTypeMessage,
tr: transport, tr: t,
picker: picker, picker: picker,
status: status, status: status,
recvc: p.recvc, recvc: p.recvc,
propc: p.propc, propc: p.propc,
rl: rate.NewLimiter(transport.DialRetryFrequency, 1), rl: rate.NewLimiter(t.DialRetryFrequency, 1),
} }
p.msgAppV2Reader.start() p.msgAppV2Reader.start()
@ -227,9 +250,32 @@ func (p *peer) send(m raftpb.Message) {
p.r.ReportSnapshot(m.To, raft.SnapshotFailure) p.r.ReportSnapshot(m.To, raft.SnapshotFailure)
} }
if p.status.isActive() { if p.status.isActive() {
if p.lg != nil {
p.lg.Warn(
"dropped internal Raft message since sending buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", p.localID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(p.id).String()),
zap.Bool("remote-peer-active", p.status.isActive()),
)
} else {
plog.MergeWarningf("dropped internal raft message to %s since %s's sending buffer is full (bad/overloaded network)", p.id, name) plog.MergeWarningf("dropped internal raft message to %s since %s's sending buffer is full (bad/overloaded network)", p.id, name)
} }
} else {
if p.lg != nil {
p.lg.Warn(
"dropped internal Raft message since sending buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", p.localID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(p.id).String()),
zap.Bool("remote-peer-active", p.status.isActive()),
)
} else {
plog.Debugf("dropped %s to %s since %s's sending buffer is full", m.Type, p.id, name) plog.Debugf("dropped %s to %s since %s's sending buffer is full", m.Type, p.id, name)
}
}
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc() sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
} }
} }
@ -250,8 +296,12 @@ func (p *peer) attachOutgoingConn(conn *outgoingConn) {
case streamTypeMessage: case streamTypeMessage:
ok = p.writer.attach(conn) ok = p.writer.attach(conn)
default: default:
if p.lg != nil {
p.lg.Panic("unknown stream type", zap.String("type", conn.t.String()))
} else {
plog.Panicf("unhandled stream type %s", conn.t) plog.Panicf("unhandled stream type %s", conn.t)
} }
}
if !ok { if !ok {
conn.Close() conn.Close()
} }
@ -279,8 +329,19 @@ func (p *peer) Resume() {
} }
func (p *peer) stop() { func (p *peer) stop() {
if p.lg != nil {
p.lg.Info("stopping remote peer", zap.String("remote-peer-id", p.id.String()))
} else {
plog.Infof("stopping peer %s...", p.id) plog.Infof("stopping peer %s...", p.id)
defer plog.Infof("stopped peer %s", p.id) }
defer func() {
if p.lg != nil {
p.lg.Info("stopped remote peer", zap.String("remote-peer-id", p.id.String()))
} else {
plog.Infof("stopped peer %s", p.id)
}
}()
close(p.stopc) close(p.stopc)
p.cancel() p.cancel()

View File

@ -15,11 +15,14 @@
package rafthttp package rafthttp
import ( import (
"errors"
"fmt" "fmt"
"sync" "sync"
"time" "time"
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
"go.uber.org/zap"
) )
type failureType struct { type failureType struct {
@ -28,23 +31,26 @@ type failureType struct {
} }
type peerStatus struct { type peerStatus struct {
lg *zap.Logger
id types.ID id types.ID
mu sync.Mutex // protect variables below mu sync.Mutex // protect variables below
active bool active bool
since time.Time since time.Time
} }
func newPeerStatus(id types.ID) *peerStatus { func newPeerStatus(lg *zap.Logger, id types.ID) *peerStatus {
return &peerStatus{ return &peerStatus{lg: lg, id: id}
id: id,
}
} }
func (s *peerStatus) activate() { func (s *peerStatus) activate() {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if !s.active { if !s.active {
if s.lg != nil {
s.lg.Info("peer became active", zap.String("peer-id", s.id.String()))
} else {
plog.Infof("peer %s became active", s.id) plog.Infof("peer %s became active", s.id)
}
s.active = true s.active = true
s.since = time.Now() s.since = time.Now()
} }
@ -55,13 +61,19 @@ func (s *peerStatus) deactivate(failure failureType, reason string) {
defer s.mu.Unlock() defer s.mu.Unlock()
msg := fmt.Sprintf("failed to %s %s on %s (%s)", failure.action, s.id, failure.source, reason) msg := fmt.Sprintf("failed to %s %s on %s (%s)", failure.action, s.id, failure.source, reason)
if s.active { if s.active {
if s.lg != nil {
s.lg.Warn("peer became inactive", zap.String("peer-id", s.id.String()), zap.Error(errors.New(msg)))
} else {
plog.Errorf(msg) plog.Errorf(msg)
plog.Infof("peer %s became inactive", s.id) plog.Infof("peer %s became inactive", s.id)
}
s.active = false s.active = false
s.since = time.Time{} s.since = time.Time{}
return return
} }
plog.Debugf(msg) if s.lg != nil {
s.lg.Warn("peer deactivated again", zap.String("peer-id", s.id.String()), zap.Error(errors.New(msg)))
}
} }
func (s *peerStatus) isActive() bool { func (s *peerStatus) isActive() bool {

View File

@ -27,6 +27,8 @@ import (
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft" "github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"go.uber.org/zap"
) )
const ( const (
@ -64,14 +66,32 @@ func (p *pipeline) start() {
for i := 0; i < connPerPipeline; i++ { for i := 0; i < connPerPipeline; i++ {
go p.handle() go p.handle()
} }
if p.tr != nil && p.tr.Logger != nil {
p.tr.Logger.Info(
"started HTTP pipelining with remote peer",
zap.String("local-member-id", p.tr.ID.String()),
zap.String("remote-peer-id", p.peerID.String()),
)
} else {
plog.Infof("started HTTP pipelining with peer %s", p.peerID) plog.Infof("started HTTP pipelining with peer %s", p.peerID)
} }
}
func (p *pipeline) stop() { func (p *pipeline) stop() {
close(p.stopc) close(p.stopc)
p.wg.Wait() p.wg.Wait()
if p.tr != nil && p.tr.Logger != nil {
p.tr.Logger.Info(
"stopped HTTP pipelining with remote peer",
zap.String("local-member-id", p.tr.ID.String()),
zap.String("remote-peer-id", p.peerID.String()),
)
} else {
plog.Infof("stopped HTTP pipelining with peer %s", p.peerID) plog.Infof("stopped HTTP pipelining with peer %s", p.peerID)
} }
}
func (p *pipeline) handle() { func (p *pipeline) handle() {
defer p.wg.Done() defer p.wg.Done()

View File

@ -24,6 +24,8 @@ import (
"testing" "testing"
"time" "time"
"go.uber.org/zap"
"github.com/coreos/etcd/etcdserver/stats" "github.com/coreos/etcd/etcdserver/stats"
"github.com/coreos/etcd/pkg/testutil" "github.com/coreos/etcd/pkg/testutil"
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
@ -301,7 +303,7 @@ func startTestPipeline(tr *Transport, picker *urlPicker) *pipeline {
peerID: types.ID(1), peerID: types.ID(1),
tr: tr, tr: tr,
picker: picker, picker: picker,
status: newPeerStatus(types.ID(1)), status: newPeerStatus(zap.NewExample(), types.ID(1)),
raft: &fakeRaft{}, raft: &fakeRaft{},
followerStats: &stats.FollowerStats{}, followerStats: &stats.FollowerStats{},
errorc: make(chan error, 1), errorc: make(chan error, 1),

View File

@ -18,6 +18,7 @@ import (
"time" "time"
"github.com/xiang90/probing" "github.com/xiang90/probing"
"go.uber.org/zap"
) )
var ( var (
@ -28,7 +29,7 @@ var (
statusErrorInterval = 5 * time.Second statusErrorInterval = 5 * time.Second
) )
func addPeerToProber(p probing.Prober, id string, us []string) { func addPeerToProber(lg *zap.Logger, p probing.Prober, id string, us []string) {
hus := make([]string, len(us)) hus := make([]string, len(us))
for i := range us { for i := range us {
hus[i] = us[i] + ProbingPrefix hus[i] = us[i] + ProbingPrefix
@ -38,27 +39,50 @@ func addPeerToProber(p probing.Prober, id string, us []string) {
s, err := p.Status(id) s, err := p.Status(id)
if err != nil { if err != nil {
plog.Errorf("failed to add peer %s into prober", id) if lg != nil {
lg.Warn("failed to add peer into prober", zap.String("remote-peer-id", id))
} else { } else {
go monitorProbingStatus(s, id) plog.Errorf("failed to add peer %s into prober", id)
}
} else {
go monitorProbingStatus(lg, s, id)
} }
} }
func monitorProbingStatus(s probing.Status, id string) { func monitorProbingStatus(lg *zap.Logger, s probing.Status, id string) {
// set the first interval short to log error early. // set the first interval short to log error early.
interval := statusErrorInterval interval := statusErrorInterval
for { for {
select { select {
case <-time.After(interval): case <-time.After(interval):
if !s.Health() { if !s.Health() {
if lg != nil {
lg.Warn(
"prober detected unhealthy status",
zap.String("remote-peer-id", id),
zap.Duration("rtt", s.SRTT()),
zap.Error(s.Err()),
)
} else {
plog.Warningf("health check for peer %s could not connect: %v", id, s.Err()) plog.Warningf("health check for peer %s could not connect: %v", id, s.Err())
}
interval = statusErrorInterval interval = statusErrorInterval
} else { } else {
interval = statusMonitoringInterval interval = statusMonitoringInterval
} }
if s.ClockDiff() > time.Second { if s.ClockDiff() > time.Second {
if lg != nil {
lg.Warn(
"prober found high clock drift",
zap.String("remote-peer-id", id),
zap.Duration("clock-drift", s.SRTT()),
zap.Duration("rtt", s.ClockDiff()),
zap.Error(s.Err()),
)
} else {
plog.Warningf("the clock difference against peer %s is too high [%v > %v]", id, s.ClockDiff(), time.Second) plog.Warningf("the clock difference against peer %s is too high [%v > %v]", id, s.ClockDiff(), time.Second)
} }
}
rtts.WithLabelValues(id).Observe(s.SRTT().Seconds()) rtts.WithLabelValues(id).Observe(s.SRTT().Seconds())
case <-s.StopNotify(): case <-s.StopNotify():
return return

View File

@ -17,9 +17,13 @@ package rafthttp
import ( import (
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"go.uber.org/zap"
) )
type remote struct { type remote struct {
lg *zap.Logger
localID types.ID
id types.ID id types.ID
status *peerStatus status *peerStatus
pipeline *pipeline pipeline *pipeline
@ -27,7 +31,7 @@ type remote struct {
func startRemote(tr *Transport, urls types.URLs, id types.ID) *remote { func startRemote(tr *Transport, urls types.URLs, id types.ID) *remote {
picker := newURLPicker(urls) picker := newURLPicker(urls)
status := newPeerStatus(id) status := newPeerStatus(tr.Logger, id)
pipeline := &pipeline{ pipeline := &pipeline{
peerID: id, peerID: id,
tr: tr, tr: tr,
@ -39,6 +43,8 @@ func startRemote(tr *Transport, urls types.URLs, id types.ID) *remote {
pipeline.start() pipeline.start()
return &remote{ return &remote{
lg: tr.Logger,
localID: tr.ID,
id: id, id: id,
status: status, status: status,
pipeline: pipeline, pipeline: pipeline,
@ -50,9 +56,32 @@ func (g *remote) send(m raftpb.Message) {
case g.pipeline.msgc <- m: case g.pipeline.msgc <- m:
default: default:
if g.status.isActive() { if g.status.isActive() {
if g.lg != nil {
g.lg.Warn(
"dropped internal Raft message since sending buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", g.localID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(g.id).String()),
zap.Bool("remote-peer-active", g.status.isActive()),
)
} else {
plog.MergeWarningf("dropped internal raft message to %s since sending buffer is full (bad/overloaded network)", g.id) plog.MergeWarningf("dropped internal raft message to %s since sending buffer is full (bad/overloaded network)", g.id)
} }
} else {
if g.lg != nil {
g.lg.Warn(
"dropped Raft message since sending buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", g.localID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(g.id).String()),
zap.Bool("remote-peer-active", g.status.isActive()),
)
} else {
plog.Debugf("dropped %s to %s since sending buffer is full", m.Type, g.id) plog.Debugf("dropped %s to %s since sending buffer is full", m.Type, g.id)
}
}
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc() sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
} }
} }

View File

@ -27,6 +27,8 @@ import (
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft" "github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raftsnap" "github.com/coreos/etcd/raftsnap"
"go.uber.org/zap"
) )
var ( var (
@ -66,18 +68,35 @@ func (s *snapshotSender) stop() { close(s.stopc) }
func (s *snapshotSender) send(merged raftsnap.Message) { func (s *snapshotSender) send(merged raftsnap.Message) {
m := merged.Message m := merged.Message
body := createSnapBody(merged) body := createSnapBody(s.tr.Logger, merged)
defer body.Close() defer body.Close()
u := s.picker.pick() u := s.picker.pick()
req := createPostRequest(u, RaftSnapshotPrefix, body, "application/octet-stream", s.tr.URLs, s.from, s.cid) req := createPostRequest(u, RaftSnapshotPrefix, body, "application/octet-stream", s.tr.URLs, s.from, s.cid)
if s.tr.Logger != nil {
s.tr.Logger.Info(
"sending database snapshot",
zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
zap.String("remote-peer-id", types.ID(m.To).String()),
)
} else {
plog.Infof("start to send database snapshot [index: %d, to %s]...", m.Snapshot.Metadata.Index, types.ID(m.To)) plog.Infof("start to send database snapshot [index: %d, to %s]...", m.Snapshot.Metadata.Index, types.ID(m.To))
}
err := s.post(req) err := s.post(req)
defer merged.CloseWithError(err) defer merged.CloseWithError(err)
if err != nil { if err != nil {
if s.tr.Logger != nil {
s.tr.Logger.Warn(
"failed to send database snapshot",
zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
zap.String("remote-peer-id", types.ID(m.To).String()),
zap.Error(err),
)
} else {
plog.Warningf("database snapshot [index: %d, to: %s] failed to be sent out (%v)", m.Snapshot.Metadata.Index, types.ID(m.To), err) plog.Warningf("database snapshot [index: %d, to: %s] failed to be sent out (%v)", m.Snapshot.Metadata.Index, types.ID(m.To), err)
}
// errMemberRemoved is a critical error since a removed member should // errMemberRemoved is a critical error since a removed member should
// always be stopped. So we use reportCriticalError to report it to errorc. // always be stopped. So we use reportCriticalError to report it to errorc.
@ -97,7 +116,16 @@ func (s *snapshotSender) send(merged raftsnap.Message) {
} }
s.status.activate() s.status.activate()
s.r.ReportSnapshot(m.To, raft.SnapshotFinish) s.r.ReportSnapshot(m.To, raft.SnapshotFinish)
if s.tr.Logger != nil {
s.tr.Logger.Info(
"sent database snapshot",
zap.Uint64("snapshot-index", m.Snapshot.Metadata.Index),
zap.String("remote-peer-id", types.ID(m.To).String()),
)
} else {
plog.Infof("database snapshot [index: %d, to: %s] sent out successfully", m.Snapshot.Metadata.Index, types.ID(m.To)) plog.Infof("database snapshot [index: %d, to: %s] sent out successfully", m.Snapshot.Metadata.Index, types.ID(m.To))
}
sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(merged.TotalSize)) sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(merged.TotalSize))
} }
@ -142,13 +170,17 @@ func (s *snapshotSender) post(req *http.Request) (err error) {
} }
} }
func createSnapBody(merged raftsnap.Message) io.ReadCloser { func createSnapBody(lg *zap.Logger, merged raftsnap.Message) io.ReadCloser {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
enc := &messageEncoder{w: buf} enc := &messageEncoder{w: buf}
// encode raft message // encode raft message
if err := enc.encode(&merged.Message); err != nil { if err := enc.encode(&merged.Message); err != nil {
if lg != nil {
lg.Panic("failed to encode message", zap.Error(err))
} else {
plog.Panicf("encode message error (%v)", err) plog.Panicf("encode message error (%v)", err)
} }
}
return &pioutil.ReaderAndCloser{ return &pioutil.ReaderAndCloser{
Reader: io.MultiReader(buf, merged.ReadCloser), Reader: io.MultiReader(buf, merged.ReadCloser),

View File

@ -28,6 +28,8 @@ import (
"github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/raftsnap" "github.com/coreos/etcd/raftsnap"
"go.uber.org/zap"
) )
type strReaderCloser struct{ *strings.Reader } type strReaderCloser struct{ *strings.Reader }
@ -102,12 +104,12 @@ func testSnapshotSend(t *testing.T, sm *raftsnap.Message) (bool, []os.FileInfo)
r := &fakeRaft{} r := &fakeRaft{}
tr := &Transport{pipelineRt: &http.Transport{}, ClusterID: types.ID(1), Raft: r} tr := &Transport{pipelineRt: &http.Transport{}, ClusterID: types.ID(1), Raft: r}
ch := make(chan struct{}, 1) ch := make(chan struct{}, 1)
h := &syncHandler{newSnapshotHandler(tr, r, raftsnap.New(d), types.ID(1)), ch} h := &syncHandler{newSnapshotHandler(tr, r, raftsnap.New(zap.NewExample(), d), types.ID(1)), ch}
srv := httptest.NewServer(h) srv := httptest.NewServer(h)
defer srv.Close() defer srv.Close()
picker := mustNewURLPicker(t, []string{srv.URL}) picker := mustNewURLPicker(t, []string{srv.URL})
snapsend := newSnapshotSender(tr, picker, types.ID(1), newPeerStatus(types.ID(1))) snapsend := newSnapshotSender(tr, picker, types.ID(1), newPeerStatus(zap.NewExample(), types.ID(1)))
defer snapsend.stop() defer snapsend.stop()
snapsend.send(*sm) snapsend.send(*sm)

View File

@ -25,8 +25,6 @@ import (
"sync" "sync"
"time" "time"
"golang.org/x/time/rate"
"github.com/coreos/etcd/etcdserver/stats" "github.com/coreos/etcd/etcdserver/stats"
"github.com/coreos/etcd/pkg/httputil" "github.com/coreos/etcd/pkg/httputil"
"github.com/coreos/etcd/pkg/transport" "github.com/coreos/etcd/pkg/transport"
@ -35,6 +33,8 @@ import (
"github.com/coreos/etcd/version" "github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver" "github.com/coreos/go-semver/semver"
"go.uber.org/zap"
"golang.org/x/time/rate"
) )
const ( const (
@ -105,7 +105,11 @@ type outgoingConn struct {
// streamWriter writes messages to the attached outgoingConn. // streamWriter writes messages to the attached outgoingConn.
type streamWriter struct { type streamWriter struct {
lg *zap.Logger
localID types.ID
peerID types.ID peerID types.ID
status *peerStatus status *peerStatus
fs *stats.FollowerStats fs *stats.FollowerStats
r Raft r Raft
@ -122,9 +126,13 @@ type streamWriter struct {
// startStreamWriter creates a streamWrite and starts a long running go-routine that accepts // startStreamWriter creates a streamWrite and starts a long running go-routine that accepts
// messages and writes to the attached outgoing connection. // messages and writes to the attached outgoing connection.
func startStreamWriter(id types.ID, status *peerStatus, fs *stats.FollowerStats, r Raft) *streamWriter { func startStreamWriter(lg *zap.Logger, local, id types.ID, status *peerStatus, fs *stats.FollowerStats, r Raft) *streamWriter {
w := &streamWriter{ w := &streamWriter{
lg: lg,
localID: local,
peerID: id, peerID: id,
status: status, status: status,
fs: fs, fs: fs,
r: r, r: r,
@ -150,7 +158,15 @@ func (cw *streamWriter) run() {
defer tickc.Stop() defer tickc.Stop()
unflushed := 0 unflushed := 0
if cw.lg != nil {
cw.lg.Info(
"started stream writer with remote peer",
zap.String("local-member-id", cw.localID.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Infof("started streaming with peer %s (writer)", cw.peerID) plog.Infof("started streaming with peer %s (writer)", cw.peerID)
}
for { for {
select { select {
@ -169,7 +185,16 @@ func (cw *streamWriter) run() {
sentFailures.WithLabelValues(cw.peerID.String()).Inc() sentFailures.WithLabelValues(cw.peerID.String()).Inc()
cw.close() cw.close()
if cw.lg != nil {
cw.lg.Warn(
"lost TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("local-member-id", cw.localID.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t) plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
}
heartbeatc, msgc = nil, nil heartbeatc, msgc = nil, nil
case m := <-msgc: case m := <-msgc:
@ -191,7 +216,16 @@ func (cw *streamWriter) run() {
cw.status.deactivate(failureType{source: t.String(), action: "write"}, err.Error()) cw.status.deactivate(failureType{source: t.String(), action: "write"}, err.Error())
cw.close() cw.close()
if cw.lg != nil {
cw.lg.Warn(
"lost TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("local-member-id", cw.localID.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t) plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
}
heartbeatc, msgc = nil, nil heartbeatc, msgc = nil, nil
cw.r.ReportUnreachable(m.To) cw.r.ReportUnreachable(m.To)
sentFailures.WithLabelValues(cw.peerID.String()).Inc() sentFailures.WithLabelValues(cw.peerID.String()).Inc()
@ -216,15 +250,50 @@ func (cw *streamWriter) run() {
cw.mu.Unlock() cw.mu.Unlock()
if closed { if closed {
if cw.lg != nil {
cw.lg.Warn(
"closed TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("local-member-id", cw.localID.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Warningf("closed an existing TCP streaming connection with peer %s (%s writer)", cw.peerID, t) plog.Warningf("closed an existing TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
} }
}
if cw.lg != nil {
cw.lg.Warn(
"established TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("local-member-id", cw.localID.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Infof("established a TCP streaming connection with peer %s (%s writer)", cw.peerID, t) plog.Infof("established a TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
}
heartbeatc, msgc = tickc.C, cw.msgc heartbeatc, msgc = tickc.C, cw.msgc
case <-cw.stopc: case <-cw.stopc:
if cw.close() { if cw.close() {
if cw.lg != nil {
cw.lg.Warn(
"closed TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Infof("closed the TCP streaming connection with peer %s (%s writer)", cw.peerID, t) plog.Infof("closed the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
} }
}
if cw.lg != nil {
cw.lg.Warn(
"stopped TCP streaming connection with remote peer",
zap.String("stream-writer-type", t.String()),
zap.String("remote-peer-id", cw.peerID.String()),
)
} else {
plog.Infof("stopped streaming with peer %s (writer)", cw.peerID) plog.Infof("stopped streaming with peer %s (writer)", cw.peerID)
}
close(cw.done) close(cw.done)
return return
} }
@ -248,8 +317,16 @@ func (cw *streamWriter) closeUnlocked() bool {
return false return false
} }
if err := cw.closer.Close(); err != nil { if err := cw.closer.Close(); err != nil {
if cw.lg != nil {
cw.lg.Warn(
"failed to close connection with remote peer",
zap.String("remote-peer-id", cw.peerID.String()),
zap.Error(err),
)
} else {
plog.Errorf("peer %s (writer) connection close error: %v", cw.peerID, err) plog.Errorf("peer %s (writer) connection close error: %v", cw.peerID, err)
} }
}
if len(cw.msgc) > 0 { if len(cw.msgc) > 0 {
cw.r.ReportUnreachable(uint64(cw.peerID)) cw.r.ReportUnreachable(uint64(cw.peerID))
} }
@ -275,6 +352,8 @@ func (cw *streamWriter) stop() {
// streamReader is a long-running go-routine that dials to the remote stream // streamReader is a long-running go-routine that dials to the remote stream
// endpoint and reads messages from the response body returned. // endpoint and reads messages from the response body returned.
type streamReader struct { type streamReader struct {
lg *zap.Logger
peerID types.ID peerID types.ID
typ streamType typ streamType
@ -310,7 +389,18 @@ func (cr *streamReader) start() {
func (cr *streamReader) run() { func (cr *streamReader) run() {
t := cr.typ t := cr.typ
if cr.lg != nil {
cr.lg.Info(
"started stream reader with remote peer",
zap.String("stream-reader-type", t.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
)
} else {
plog.Infof("started streaming with peer %s (%s reader)", cr.peerID, t) plog.Infof("started streaming with peer %s (%s reader)", cr.peerID, t)
}
for { for {
rc, err := cr.dial(t) rc, err := cr.dial(t)
if err != nil { if err != nil {
@ -319,9 +409,28 @@ func (cr *streamReader) run() {
} }
} else { } else {
cr.status.activate() cr.status.activate()
if cr.lg != nil {
cr.lg.Info(
"established TCP streaming connection with remote peer",
zap.String("stream-reader-type", cr.typ.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
)
} else {
plog.Infof("established a TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ) plog.Infof("established a TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ)
}
err = cr.decodeLoop(rc, t) err = cr.decodeLoop(rc, t)
if cr.lg != nil {
cr.lg.Warn(
"lost TCP streaming connection with remote peer",
zap.String("stream-reader-type", cr.typ.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
zap.Error(err),
)
} else {
plog.Warningf("lost the TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ) plog.Warningf("lost the TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ)
}
switch { switch {
// all data is read out // all data is read out
case err == io.EOF: case err == io.EOF:
@ -334,15 +443,34 @@ func (cr *streamReader) run() {
// Wait for a while before new dial attempt // Wait for a while before new dial attempt
err = cr.rl.Wait(cr.ctx) err = cr.rl.Wait(cr.ctx)
if cr.ctx.Err() != nil { if cr.ctx.Err() != nil {
if cr.lg != nil {
cr.lg.Info(
"stopped stream reader with remote peer",
zap.String("stream-reader-type", t.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
)
} else {
plog.Infof("stopped streaming with peer %s (%s reader)", cr.peerID, t) plog.Infof("stopped streaming with peer %s (%s reader)", cr.peerID, t)
}
close(cr.done) close(cr.done)
return return
} }
if err != nil { if err != nil {
if cr.lg != nil {
cr.lg.Warn(
"rate limit on stream reader with remote peer",
zap.String("stream-reader-type", t.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
zap.Error(err),
)
} else {
plog.Errorf("streaming with peer %s (%s reader) rate limiter error: %v", cr.peerID, t, err) plog.Errorf("streaming with peer %s (%s reader) rate limiter error: %v", cr.peerID, t, err)
} }
} }
} }
}
func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error { func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
var dec decoder var dec decoder
@ -353,8 +481,12 @@ func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
case streamTypeMessage: case streamTypeMessage:
dec = &messageDecoder{r: rc} dec = &messageDecoder{r: rc}
default: default:
if cr.lg != nil {
cr.lg.Panic("unknown stream type", zap.String("type", t.String()))
} else {
plog.Panicf("unhandled stream type %s", t) plog.Panicf("unhandled stream type %s", t)
} }
}
select { select {
case <-cr.ctx.Done(): case <-cr.ctx.Done():
cr.mu.Unlock() cr.mu.Unlock()
@ -402,9 +534,32 @@ func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
case recvc <- m: case recvc <- m:
default: default:
if cr.status.isActive() { if cr.status.isActive() {
if cr.lg != nil {
cr.lg.Warn(
"dropped internal Raft message since receiving buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(m.To).String()),
zap.Bool("remote-peer-active", cr.status.isActive()),
)
} else {
plog.MergeWarningf("dropped internal raft message from %s since receiving buffer is full (overloaded network)", types.ID(m.From)) plog.MergeWarningf("dropped internal raft message from %s since receiving buffer is full (overloaded network)", types.ID(m.From))
} }
} else {
if cr.lg != nil {
cr.lg.Warn(
"dropped Raft message since receiving buffer is full (overloaded network)",
zap.String("message-type", m.Type.String()),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("from", types.ID(m.From).String()),
zap.String("remote-peer-id", types.ID(m.To).String()),
zap.Bool("remote-peer-active", cr.status.isActive()),
)
} else {
plog.Debugf("dropped %s from %s since receiving buffer is full", m.Type, types.ID(m.From)) plog.Debugf("dropped %s from %s since receiving buffer is full", m.Type, types.ID(m.From))
}
}
recvFailures.WithLabelValues(types.ID(m.From).String()).Inc() recvFailures.WithLabelValues(types.ID(m.From).String()).Inc()
} }
} }
@ -467,12 +622,15 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
cr.picker.unreachable(u) cr.picker.unreachable(u)
reportCriticalError(errMemberRemoved, cr.errorc) reportCriticalError(errMemberRemoved, cr.errorc)
return nil, errMemberRemoved return nil, errMemberRemoved
case http.StatusOK: case http.StatusOK:
return resp.Body, nil return resp.Body, nil
case http.StatusNotFound: case http.StatusNotFound:
httputil.GracefulClose(resp) httputil.GracefulClose(resp)
cr.picker.unreachable(u) cr.picker.unreachable(u)
return nil, fmt.Errorf("peer %s failed to find local node %s", cr.peerID, cr.tr.ID) return nil, fmt.Errorf("peer %s failed to find local node %s", cr.peerID, cr.tr.ID)
case http.StatusPreconditionFailed: case http.StatusPreconditionFailed:
b, err := ioutil.ReadAll(resp.Body) b, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
@ -484,15 +642,38 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
switch strings.TrimSuffix(string(b), "\n") { switch strings.TrimSuffix(string(b), "\n") {
case errIncompatibleVersion.Error(): case errIncompatibleVersion.Error():
if cr.lg != nil {
cr.lg.Warn(
"request sent was ignored by remote peer due to server version incompatibility",
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
zap.Error(errIncompatibleVersion),
)
} else {
plog.Errorf("request sent was ignored by peer %s (server version incompatible)", cr.peerID) plog.Errorf("request sent was ignored by peer %s (server version incompatible)", cr.peerID)
}
return nil, errIncompatibleVersion return nil, errIncompatibleVersion
case errClusterIDMismatch.Error(): case errClusterIDMismatch.Error():
if cr.lg != nil {
cr.lg.Warn(
"request sent was ignored by remote peer due to cluster ID mismatch",
zap.String("remote-peer-id", cr.peerID.String()),
zap.String("remote-peer-cluster-id", resp.Header.Get("X-Etcd-Cluster-ID")),
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("local-member-cluster-id", cr.tr.ClusterID.String()),
zap.Error(errClusterIDMismatch),
)
} else {
plog.Errorf("request sent was ignored (cluster ID mismatch: peer[%s]=%s, local=%s)", plog.Errorf("request sent was ignored (cluster ID mismatch: peer[%s]=%s, local=%s)",
cr.peerID, resp.Header.Get("X-Etcd-Cluster-ID"), cr.tr.ClusterID) cr.peerID, resp.Header.Get("X-Etcd-Cluster-ID"), cr.tr.ClusterID)
}
return nil, errClusterIDMismatch return nil, errClusterIDMismatch
default: default:
return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b)) return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b))
} }
default: default:
httputil.GracefulClose(resp) httputil.GracefulClose(resp)
cr.picker.unreachable(u) cr.picker.unreachable(u)
@ -503,9 +684,18 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
func (cr *streamReader) close() { func (cr *streamReader) close() {
if cr.closer != nil { if cr.closer != nil {
if err := cr.closer.Close(); err != nil { if err := cr.closer.Close(); err != nil {
if cr.lg != nil {
cr.lg.Warn(
"failed to close remote peer connection",
zap.String("local-member-id", cr.tr.ID.String()),
zap.String("remote-peer-id", cr.peerID.String()),
zap.Error(err),
)
} else {
plog.Errorf("peer %s (reader) connection close error: %v", cr.peerID, err) plog.Errorf("peer %s (reader) connection close error: %v", cr.peerID, err)
} }
} }
}
cr.closer = nil cr.closer = nil
} }

View File

@ -33,6 +33,7 @@ import (
"github.com/coreos/etcd/version" "github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver" "github.com/coreos/go-semver/semver"
"go.uber.org/zap"
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
@ -40,7 +41,7 @@ import (
// to streamWriter. After that, streamWriter can use it to send messages // to streamWriter. After that, streamWriter can use it to send messages
// continuously, and closes it when stopped. // continuously, and closes it when stopped.
func TestStreamWriterAttachOutgoingConn(t *testing.T) { func TestStreamWriterAttachOutgoingConn(t *testing.T) {
sw := startStreamWriter(types.ID(1), newPeerStatus(types.ID(1)), &stats.FollowerStats{}, &fakeRaft{}) sw := startStreamWriter(zap.NewExample(), types.ID(0), types.ID(1), newPeerStatus(zap.NewExample(), types.ID(1)), &stats.FollowerStats{}, &fakeRaft{})
// the expected initial state of streamWriter is not working // the expected initial state of streamWriter is not working
if _, ok := sw.writec(); ok { if _, ok := sw.writec(); ok {
t.Errorf("initial working status = %v, want false", ok) t.Errorf("initial working status = %v, want false", ok)
@ -92,7 +93,7 @@ func TestStreamWriterAttachOutgoingConn(t *testing.T) {
// TestStreamWriterAttachBadOutgoingConn tests that streamWriter with bad // TestStreamWriterAttachBadOutgoingConn tests that streamWriter with bad
// outgoingConn will close the outgoingConn and fall back to non-working status. // outgoingConn will close the outgoingConn and fall back to non-working status.
func TestStreamWriterAttachBadOutgoingConn(t *testing.T) { func TestStreamWriterAttachBadOutgoingConn(t *testing.T) {
sw := startStreamWriter(types.ID(1), newPeerStatus(types.ID(1)), &stats.FollowerStats{}, &fakeRaft{}) sw := startStreamWriter(zap.NewExample(), types.ID(0), types.ID(1), newPeerStatus(zap.NewExample(), types.ID(1)), &stats.FollowerStats{}, &fakeRaft{})
defer sw.stop() defer sw.stop()
wfc := newFakeWriteFlushCloser(errors.New("blah")) wfc := newFakeWriteFlushCloser(errors.New("blah"))
sw.attach(&outgoingConn{t: streamTypeMessage, Writer: wfc, Flusher: wfc, Closer: wfc}) sw.attach(&outgoingConn{t: streamTypeMessage, Writer: wfc, Flusher: wfc, Closer: wfc})
@ -196,7 +197,7 @@ func TestStreamReaderStopOnDial(t *testing.T) {
picker: mustNewURLPicker(t, []string{"http://localhost:2380"}), picker: mustNewURLPicker(t, []string{"http://localhost:2380"}),
errorc: make(chan error, 1), errorc: make(chan error, 1),
typ: streamTypeMessage, typ: streamTypeMessage,
status: newPeerStatus(types.ID(2)), status: newPeerStatus(zap.NewExample(), types.ID(2)),
rl: rate.NewLimiter(rate.Every(100*time.Millisecond), 1), rl: rate.NewLimiter(rate.Every(100*time.Millisecond), 1),
} }
tr.onResp = func() { tr.onResp = func() {
@ -303,7 +304,7 @@ func TestStream(t *testing.T) {
srv := httptest.NewServer(h) srv := httptest.NewServer(h)
defer srv.Close() defer srv.Close()
sw := startStreamWriter(types.ID(1), newPeerStatus(types.ID(1)), &stats.FollowerStats{}, &fakeRaft{}) sw := startStreamWriter(zap.NewExample(), types.ID(0), types.ID(1), newPeerStatus(zap.NewExample(), types.ID(1)), &stats.FollowerStats{}, &fakeRaft{})
defer sw.stop() defer sw.stop()
h.sw = sw h.sw = sw
@ -315,7 +316,7 @@ func TestStream(t *testing.T) {
typ: tt.t, typ: tt.t,
tr: tr, tr: tr,
picker: picker, picker: picker,
status: newPeerStatus(types.ID(2)), status: newPeerStatus(zap.NewExample(), types.ID(2)),
recvc: recvc, recvc: recvc,
propc: propc, propc: propc,
rl: rate.NewLimiter(rate.Every(100*time.Millisecond), 1), rl: rate.NewLimiter(rate.Every(100*time.Millisecond), 1),

View File

@ -30,6 +30,7 @@ import (
"github.com/coreos/pkg/capnslog" "github.com/coreos/pkg/capnslog"
"github.com/xiang90/probing" "github.com/xiang90/probing"
"go.uber.org/zap"
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
@ -98,6 +99,8 @@ type Transporter interface {
// User needs to call Start before calling other functions, and call // User needs to call Start before calling other functions, and call
// Stop when the Transport is no longer used. // Stop when the Transport is no longer used.
type Transport struct { type Transport struct {
Logger *zap.Logger
DialTimeout time.Duration // maximum duration before timing out dial of the request DialTimeout time.Duration // maximum duration before timing out dial of the request
// DialRetryFrequency defines the frequency of streamReader dial retrial attempts; // DialRetryFrequency defines the frequency of streamReader dial retrial attempts;
// a distinct rate limiter is created per every peer (default value: 10 events/sec) // a distinct rate limiter is created per every peer (default value: 10 events/sec)
@ -197,9 +200,17 @@ func (t *Transport) Send(msgs []raftpb.Message) {
continue continue
} }
if t.Logger != nil {
t.Logger.Debug(
"ignored message send request; unknown remote peer target",
zap.String("type", m.Type.String()),
zap.String("unknown-target-peer-id", to.String()),
)
} else {
plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to) plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to)
} }
} }
}
func (t *Transport) Stop() { func (t *Transport) Stop() {
t.mu.Lock() t.mu.Lock()
@ -268,8 +279,12 @@ func (t *Transport) AddRemote(id types.ID, us []string) {
} }
urls, err := types.NewURLs(us) urls, err := types.NewURLs(us)
if err != nil { if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
} else {
plog.Panicf("newURLs %+v should never fail: %+v", us, err) plog.Panicf("newURLs %+v should never fail: %+v", us, err)
} }
}
t.remotes[id] = startRemote(t, urls, id) t.remotes[id] = startRemote(t, urls, id)
} }
@ -285,14 +300,22 @@ func (t *Transport) AddPeer(id types.ID, us []string) {
} }
urls, err := types.NewURLs(us) urls, err := types.NewURLs(us)
if err != nil { if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
} else {
plog.Panicf("newURLs %+v should never fail: %+v", us, err) plog.Panicf("newURLs %+v should never fail: %+v", us, err)
} }
}
fs := t.LeaderStats.Follower(id.String()) fs := t.LeaderStats.Follower(id.String())
t.peers[id] = startPeer(t, urls, id, fs) t.peers[id] = startPeer(t, urls, id, fs)
addPeerToProber(t.prober, id.String(), us) addPeerToProber(t.Logger, t.prober, id.String(), us)
if t.Logger != nil {
t.Logger.Info("added remote peer", zap.String("remote-peer-id", id.String()))
} else {
plog.Infof("added peer %s", id) plog.Infof("added peer %s", id)
} }
}
func (t *Transport) RemovePeer(id types.ID) { func (t *Transport) RemovePeer(id types.ID) {
t.mu.Lock() t.mu.Lock()
@ -312,14 +335,23 @@ func (t *Transport) RemoveAllPeers() {
func (t *Transport) removePeer(id types.ID) { func (t *Transport) removePeer(id types.ID) {
if peer, ok := t.peers[id]; ok { if peer, ok := t.peers[id]; ok {
peer.stop() peer.stop()
} else {
if t.Logger != nil {
t.Logger.Panic("unexpected removal of unknown remote peer", zap.String("remote-peer-id", id.String()))
} else { } else {
plog.Panicf("unexpected removal of unknown peer '%d'", id) plog.Panicf("unexpected removal of unknown peer '%d'", id)
} }
}
delete(t.peers, id) delete(t.peers, id)
delete(t.LeaderStats.Followers, id.String()) delete(t.LeaderStats.Followers, id.String())
t.prober.Remove(id.String()) t.prober.Remove(id.String())
if t.Logger != nil {
t.Logger.Info("removed remote peer", zap.String("remote-peer-id", id.String()))
} else {
plog.Infof("removed peer %s", id) plog.Infof("removed peer %s", id)
} }
}
func (t *Transport) UpdatePeer(id types.ID, us []string) { func (t *Transport) UpdatePeer(id types.ID, us []string) {
t.mu.Lock() t.mu.Lock()
@ -330,14 +362,23 @@ func (t *Transport) UpdatePeer(id types.ID, us []string) {
} }
urls, err := types.NewURLs(us) urls, err := types.NewURLs(us)
if err != nil { if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
} else {
plog.Panicf("newURLs %+v should never fail: %+v", us, err) plog.Panicf("newURLs %+v should never fail: %+v", us, err)
} }
}
t.peers[id].update(urls) t.peers[id].update(urls)
t.prober.Remove(id.String()) t.prober.Remove(id.String())
addPeerToProber(t.prober, id.String(), us) addPeerToProber(t.Logger, t.prober, id.String(), us)
if t.Logger != nil {
t.Logger.Info("updated remote peer", zap.String("remote-peer-id", id.String()))
} else {
plog.Infof("updated peer %s", id) plog.Infof("updated peer %s", id)
} }
}
func (t *Transport) ActiveSince(id types.ID) time.Time { func (t *Transport) ActiveSince(id types.ID) time.Time {
t.mu.RLock() t.mu.RLock()
@ -425,7 +466,7 @@ func NewSnapTransporter(snapDir string) (Transporter, <-chan raftsnap.Message) {
} }
func (s *snapTransporter) SendSnapshot(m raftsnap.Message) { func (s *snapTransporter) SendSnapshot(m raftsnap.Message) {
ss := raftsnap.New(s.snapDir) ss := raftsnap.New(zap.NewExample(), s.snapDir)
ss.SaveDBFrom(m.ReadCloser, m.Snapshot.Metadata.Index+1) ss.SaveDBFrom(m.ReadCloser, m.Snapshot.Metadata.Index+1)
m.CloseWithError(nil) m.CloseWithError(nil)
s.snapDoneC <- m s.snapDoneC <- m

View File

@ -150,18 +150,21 @@ func minClusterVersion(h http.Header) *semver.Version {
return semver.Must(semver.NewVersion(verStr)) return semver.Must(semver.NewVersion(verStr))
} }
// checkVersionCompability checks whether the given version is compatible // checkVersionCompatibility checks whether the given version is compatible
// with the local version. // with the local version.
func checkVersionCompability(name string, server, minCluster *semver.Version) error { func checkVersionCompatibility(name string, server, minCluster *semver.Version) (
localServer := semver.Must(semver.NewVersion(version.Version)) localServer *semver.Version,
localMinCluster := semver.Must(semver.NewVersion(version.MinClusterVersion)) localMinCluster *semver.Version,
err error) {
localServer = semver.Must(semver.NewVersion(version.Version))
localMinCluster = semver.Must(semver.NewVersion(version.MinClusterVersion))
if compareMajorMinorVersion(server, localMinCluster) == -1 { if compareMajorMinorVersion(server, localMinCluster) == -1 {
return fmt.Errorf("remote version is too low: remote[%s]=%s, local=%s", name, server, localServer) return localServer, localMinCluster, fmt.Errorf("remote version is too low: remote[%s]=%s, local=%s", name, server, localServer)
} }
if compareMajorMinorVersion(minCluster, localServer) == 1 { if compareMajorMinorVersion(minCluster, localServer) == 1 {
return fmt.Errorf("local version is too low: remote[%s]=%s, local=%s", name, server, localServer) return localServer, localMinCluster, fmt.Errorf("local version is too low: remote[%s]=%s, local=%s", name, server, localServer)
} }
return nil return localServer, localMinCluster, nil
} }
// setPeerURLsHeader reports local urls for peer discovery // setPeerURLsHeader reports local urls for peer discovery

View File

@ -188,7 +188,7 @@ func TestCheckVersionCompatibility(t *testing.T) {
}, },
} }
for i, tt := range tests { for i, tt := range tests {
err := checkVersionCompability("", tt.server, tt.minCluster) _, _, err := checkVersionCompatibility("", tt.server, tt.minCluster)
if ok := err == nil; ok != tt.wok { if ok := err == nil; ok != tt.wok {
t.Errorf("#%d: ok = %v, want %v", i, ok, tt.wok) t.Errorf("#%d: ok = %v, want %v", i, ok, tt.wok)
} }