Merge pull request #2439 from xiang90/metrics

Metrics
release-2.1
Xiang Li 2015-03-05 15:55:34 -08:00
commit 8e76ccf979
3 changed files with 31 additions and 9 deletions

View File

@ -23,23 +23,39 @@ import (
)
var (
msgWriteDuration = prometheus.NewSummaryVec(
msgSentDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "rafthttp_message_sending_latency_microseconds",
Help: "message sending latency distributions.",
Name: "rafthttp_message_sent_latency_microseconds",
Help: "message sent latency distributions.",
},
[]string{"channel", "remoteID", "msgType"},
)
msgSentFailed = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "rafthttp_message_sent_failed_total",
Help: "The total number of failed messages sent.",
},
[]string{"channel", "remoteID", "msgType"},
)
)
func init() {
prometheus.MustRegister(msgWriteDuration)
prometheus.MustRegister(msgSentDuration)
prometheus.MustRegister(msgSentFailed)
}
func reportSendingDuration(channel string, m raftpb.Message, duration time.Duration) {
func reportSentDuration(channel string, m raftpb.Message, duration time.Duration) {
typ := m.Type.String()
if isLinkHeartbeatMessage(m) {
typ = "MsgLinkHeartbeat"
}
msgWriteDuration.WithLabelValues(channel, types.ID(m.To).String(), typ).Observe(float64(duration.Nanoseconds() / int64(time.Microsecond)))
msgSentDuration.WithLabelValues(channel, types.ID(m.To).String(), typ).Observe(float64(duration.Nanoseconds() / int64(time.Microsecond)))
}
func reportSentFailure(channel string, m raftpb.Message) {
typ := m.Type.String()
if isLinkHeartbeatMessage(m) {
typ = "MsgLinkHeartbeat"
}
msgSentFailed.WithLabelValues(channel, types.ID(m.To).String(), typ).Inc()
}

View File

@ -91,6 +91,8 @@ func (p *pipeline) handle() {
p.Lock()
if err != nil {
reportSentFailure(pipelineMsg, m)
if p.errored == nil || p.errored.Error() != err.Error() {
log.Printf("pipeline: error posting to %s: %v", p.id, err)
p.errored = err
@ -118,7 +120,7 @@ func (p *pipeline) handle() {
if isMsgSnap(m) {
p.r.ReportSnapshot(m.To, raft.SnapshotFinish)
}
reportSendingDuration(pipelineMsg, m, time.Since(start))
reportSentDuration(pipelineMsg, m, time.Since(start))
}
p.Unlock()
}

View File

@ -101,13 +101,15 @@ func (cw *streamWriter) run() {
case <-heartbeatc:
start := time.Now()
if err := enc.encode(linkHeartbeatMessage); err != nil {
reportSentFailure(string(t), linkHeartbeatMessage)
log.Printf("rafthttp: failed to heartbeat on stream %s due to %v. waiting for a new stream to be established.", t, err)
cw.resetCloser()
heartbeatc, msgc = nil, nil
continue
}
flusher.Flush()
reportSendingDuration(string(t), linkHeartbeatMessage, time.Since(start))
reportSentDuration(string(t), linkHeartbeatMessage, time.Since(start))
case m := <-msgc:
if t == streamTypeMsgApp && m.Term != msgAppTerm {
// TODO: reasonable retry logic
@ -119,6 +121,8 @@ func (cw *streamWriter) run() {
}
start := time.Now()
if err := enc.encode(m); err != nil {
reportSentFailure(string(t), m)
log.Printf("rafthttp: failed to send message on stream %s due to %v. waiting for a new stream to be established.", t, err)
cw.resetCloser()
heartbeatc, msgc = nil, nil
@ -126,7 +130,7 @@ func (cw *streamWriter) run() {
continue
}
flusher.Flush()
reportSendingDuration(string(t), m, time.Since(start))
reportSentDuration(string(t), m, time.Since(start))
case conn := <-cw.connc:
cw.resetCloser()
t = conn.t