Merge pull request #2439 from xiang90/metrics

Metrics
release-2.1
Xiang Li 2015-03-05 15:55:34 -08:00
commit 8e76ccf979
3 changed files with 31 additions and 9 deletions

View File

@ -23,23 +23,39 @@ import (
) )
var ( var (
msgWriteDuration = prometheus.NewSummaryVec( msgSentDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{ prometheus.SummaryOpts{
Name: "rafthttp_message_sending_latency_microseconds", Name: "rafthttp_message_sent_latency_microseconds",
Help: "message sending latency distributions.", Help: "message sent latency distributions.",
}, },
[]string{"channel", "remoteID", "msgType"}, []string{"channel", "remoteID", "msgType"},
) )
msgSentFailed = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "rafthttp_message_sent_failed_total",
Help: "The total number of failed messages sent.",
},
[]string{"channel", "remoteID", "msgType"},
)
) )
func init() { func init() {
prometheus.MustRegister(msgWriteDuration) prometheus.MustRegister(msgSentDuration)
prometheus.MustRegister(msgSentFailed)
} }
func reportSendingDuration(channel string, m raftpb.Message, duration time.Duration) { func reportSentDuration(channel string, m raftpb.Message, duration time.Duration) {
typ := m.Type.String() typ := m.Type.String()
if isLinkHeartbeatMessage(m) { if isLinkHeartbeatMessage(m) {
typ = "MsgLinkHeartbeat" typ = "MsgLinkHeartbeat"
} }
msgWriteDuration.WithLabelValues(channel, types.ID(m.To).String(), typ).Observe(float64(duration.Nanoseconds() / int64(time.Microsecond))) msgSentDuration.WithLabelValues(channel, types.ID(m.To).String(), typ).Observe(float64(duration.Nanoseconds() / int64(time.Microsecond)))
}
func reportSentFailure(channel string, m raftpb.Message) {
typ := m.Type.String()
if isLinkHeartbeatMessage(m) {
typ = "MsgLinkHeartbeat"
}
msgSentFailed.WithLabelValues(channel, types.ID(m.To).String(), typ).Inc()
} }

View File

@ -91,6 +91,8 @@ func (p *pipeline) handle() {
p.Lock() p.Lock()
if err != nil { if err != nil {
reportSentFailure(pipelineMsg, m)
if p.errored == nil || p.errored.Error() != err.Error() { if p.errored == nil || p.errored.Error() != err.Error() {
log.Printf("pipeline: error posting to %s: %v", p.id, err) log.Printf("pipeline: error posting to %s: %v", p.id, err)
p.errored = err p.errored = err
@ -118,7 +120,7 @@ func (p *pipeline) handle() {
if isMsgSnap(m) { if isMsgSnap(m) {
p.r.ReportSnapshot(m.To, raft.SnapshotFinish) p.r.ReportSnapshot(m.To, raft.SnapshotFinish)
} }
reportSendingDuration(pipelineMsg, m, time.Since(start)) reportSentDuration(pipelineMsg, m, time.Since(start))
} }
p.Unlock() p.Unlock()
} }

View File

@ -101,13 +101,15 @@ func (cw *streamWriter) run() {
case <-heartbeatc: case <-heartbeatc:
start := time.Now() start := time.Now()
if err := enc.encode(linkHeartbeatMessage); err != nil { if err := enc.encode(linkHeartbeatMessage); err != nil {
reportSentFailure(string(t), linkHeartbeatMessage)
log.Printf("rafthttp: failed to heartbeat on stream %s due to %v. waiting for a new stream to be established.", t, err) log.Printf("rafthttp: failed to heartbeat on stream %s due to %v. waiting for a new stream to be established.", t, err)
cw.resetCloser() cw.resetCloser()
heartbeatc, msgc = nil, nil heartbeatc, msgc = nil, nil
continue continue
} }
flusher.Flush() flusher.Flush()
reportSendingDuration(string(t), linkHeartbeatMessage, time.Since(start)) reportSentDuration(string(t), linkHeartbeatMessage, time.Since(start))
case m := <-msgc: case m := <-msgc:
if t == streamTypeMsgApp && m.Term != msgAppTerm { if t == streamTypeMsgApp && m.Term != msgAppTerm {
// TODO: reasonable retry logic // TODO: reasonable retry logic
@ -119,6 +121,8 @@ func (cw *streamWriter) run() {
} }
start := time.Now() start := time.Now()
if err := enc.encode(m); err != nil { if err := enc.encode(m); err != nil {
reportSentFailure(string(t), m)
log.Printf("rafthttp: failed to send message on stream %s due to %v. waiting for a new stream to be established.", t, err) log.Printf("rafthttp: failed to send message on stream %s due to %v. waiting for a new stream to be established.", t, err)
cw.resetCloser() cw.resetCloser()
heartbeatc, msgc = nil, nil heartbeatc, msgc = nil, nil
@ -126,7 +130,7 @@ func (cw *streamWriter) run() {
continue continue
} }
flusher.Flush() flusher.Flush()
reportSendingDuration(string(t), m, time.Since(start)) reportSentDuration(string(t), m, time.Since(start))
case conn := <-cw.connc: case conn := <-cw.connc:
cw.resetCloser() cw.resetCloser()
t = conn.t t = conn.t