Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e211fb6de3 | ||
![]() |
fb7e274309 | ||
![]() |
4a61fcf42d | ||
![]() |
4c8fa30dda | ||
![]() |
01c4f35b30 | ||
![]() |
15e9510d2c | ||
![]() |
09b7fd4975 |
@@ -1,6 +1,50 @@
|
||||
# Monitoring etcd
|
||||
|
||||
Each etcd server exports metrics under the `/metrics` path on its client port.
|
||||
Each etcd server provides local monitoring information on its client port through http endpoints. The monitoring data is useful for both system health checking and cluster debugging.
|
||||
|
||||
## Debug endpoint
|
||||
|
||||
If `--debug` is set, the etcd server exports debugging information on its client port under the `/debug` path. Take care when setting `--debug`, since there will be degraded performance and verbose logging.
|
||||
|
||||
The `/debug/pprof` endpoint is the standard go runtime profiling endpoint. This can be used to profile CPU, heap, mutex, and goroutine utilization. For example, here `go tool pprof` gets the top 10 functions where etcd spends its time:
|
||||
|
||||
```sh
|
||||
$ go tool pprof http://localhost:2379/debug/pprof/profile
|
||||
Fetching profile from http://localhost:2379/debug/pprof/profile
|
||||
Please wait... (30s)
|
||||
Saved profile in /home/etcd/pprof/pprof.etcd.localhost:2379.samples.cpu.001.pb.gz
|
||||
Entering interactive mode (type "help" for commands)
|
||||
(pprof) top10
|
||||
310ms of 480ms total (64.58%)
|
||||
Showing top 10 nodes out of 157 (cum >= 10ms)
|
||||
flat flat% sum% cum cum%
|
||||
130ms 27.08% 27.08% 130ms 27.08% runtime.futex
|
||||
70ms 14.58% 41.67% 70ms 14.58% syscall.Syscall
|
||||
20ms 4.17% 45.83% 20ms 4.17% github.com/coreos/etcd/cmd/vendor/golang.org/x/net/http2/hpack.huffmanDecode
|
||||
20ms 4.17% 50.00% 30ms 6.25% runtime.pcvalue
|
||||
20ms 4.17% 54.17% 50ms 10.42% runtime.schedule
|
||||
10ms 2.08% 56.25% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.(*EtcdServer).AuthInfoFromCtx
|
||||
10ms 2.08% 58.33% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.(*EtcdServer).Lead
|
||||
10ms 2.08% 60.42% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/wait.(*timeList).Trigger
|
||||
10ms 2.08% 62.50% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/prometheus/client_golang/prometheus.(*MetricVec).hashLabelValues
|
||||
10ms 2.08% 64.58% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/golang.org/x/net/http2.(*Framer).WriteHeaders
|
||||
```
|
||||
|
||||
The `/debug/requests` endpoint gives gRPC traces and performance statistics through a web browser. For example, here is a `Range` request for the key `abc`:
|
||||
|
||||
```
|
||||
When Elapsed (s)
|
||||
2017/08/18 17:34:51.999317 0.000244 /etcdserverpb.KV/Range
|
||||
17:34:51.999382 . 65 ... RPC: from 127.0.0.1:47204 deadline:4.999377747s
|
||||
17:34:51.999395 . 13 ... recv: key:"abc"
|
||||
17:34:51.999499 . 104 ... OK
|
||||
17:34:51.999535 . 36 ... sent: header:<cluster_id:14841639068965178418 member_id:10276657743932975437 revision:15 raft_term:17 > kvs:<key:"abc" create_revision:6 mod_revision:14 version:9 value:"asda" > count:1
|
||||
```
|
||||
|
||||
## Metrics endpoint
|
||||
|
||||
Each etcd server exports metrics under the `/metrics` path on its client port and optionally on interfaces given by `--listen-metrics-urls`.
|
||||
>>>>>>> 607d0762e... Documentation/op-guide: remove grafana demo link
|
||||
|
||||
The metrics can be fetched with `curl`:
|
||||
|
||||
@@ -75,8 +119,6 @@ Access: proxy
|
||||
|
||||
Then import the default [etcd dashboard template][template] and customize. For instance, if Prometheus data source name is `my-etcd`, the `datasource` field values in JSON also need to be `my-etcd`.
|
||||
|
||||
See the [demo][demo].
|
||||
|
||||
Sample dashboard:
|
||||
|
||||

|
||||
@@ -85,4 +127,3 @@ Sample dashboard:
|
||||
[prometheus]: https://prometheus.io/
|
||||
[grafana]: http://grafana.org/
|
||||
[template]: ./grafana.json
|
||||
[demo]: http://dash.etcd.io/dashboard/db/test-etcd
|
||||
|
@@ -372,12 +372,7 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
|
||||
if err == context.Canceled || err == context.DeadlineExceeded {
|
||||
return nil, nil, err
|
||||
}
|
||||
if isOneShot {
|
||||
return nil, nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if resp.StatusCode/100 == 5 {
|
||||
} else if resp.StatusCode/100 == 5 {
|
||||
switch resp.StatusCode {
|
||||
case http.StatusInternalServerError, http.StatusServiceUnavailable:
|
||||
// TODO: make sure this is a no leader response
|
||||
@@ -385,11 +380,17 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
|
||||
default:
|
||||
cerr.Errors = append(cerr.Errors, fmt.Errorf("client: etcd member %s returns server error [%s]", eps[k].String(), http.StatusText(resp.StatusCode)))
|
||||
}
|
||||
if isOneShot {
|
||||
return nil, nil, cerr.Errors[0]
|
||||
err = cerr.Errors[0]
|
||||
}
|
||||
if err != nil {
|
||||
if !isOneShot {
|
||||
continue
|
||||
}
|
||||
c.Lock()
|
||||
c.pinned = (k + 1) % leps
|
||||
c.Unlock()
|
||||
return nil, nil, err
|
||||
}
|
||||
if k != pinned {
|
||||
c.Lock()
|
||||
c.pinned = k
|
||||
|
@@ -16,6 +16,7 @@ package client
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
@@ -305,6 +306,8 @@ func TestHTTPClusterClientDo(t *testing.T) {
|
||||
fakeURL := url.URL{}
|
||||
tests := []struct {
|
||||
client *httpClusterClient
|
||||
ctx context.Context
|
||||
|
||||
wantCode int
|
||||
wantErr error
|
||||
wantPinned int
|
||||
@@ -395,10 +398,30 @@ func TestHTTPClusterClientDo(t *testing.T) {
|
||||
wantCode: http.StatusTeapot,
|
||||
wantPinned: 1,
|
||||
},
|
||||
|
||||
// 500-level errors cause one shot Do to fallthrough to next endpoint
|
||||
{
|
||||
client: &httpClusterClient{
|
||||
endpoints: []url.URL{fakeURL, fakeURL},
|
||||
clientFactory: newStaticHTTPClientFactory(
|
||||
[]staticHTTPResponse{
|
||||
{resp: http.Response{StatusCode: http.StatusBadGateway}},
|
||||
{resp: http.Response{StatusCode: http.StatusTeapot}},
|
||||
},
|
||||
),
|
||||
rand: rand.New(rand.NewSource(0)),
|
||||
},
|
||||
ctx: context.WithValue(context.Background(), &oneShotCtxValue, &oneShotCtxValue),
|
||||
wantErr: fmt.Errorf("client: etcd member returns server error [Bad Gateway]"),
|
||||
wantPinned: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
resp, _, err := tt.client.Do(context.Background(), nil)
|
||||
if tt.ctx == nil {
|
||||
tt.ctx = context.Background()
|
||||
}
|
||||
resp, _, err := tt.client.Do(tt.ctx, nil)
|
||||
if !reflect.DeepEqual(tt.wantErr, err) {
|
||||
t.Errorf("#%d: got err=%v, want=%v", i, err, tt.wantErr)
|
||||
continue
|
||||
@@ -407,11 +430,9 @@ func TestHTTPClusterClientDo(t *testing.T) {
|
||||
if resp == nil {
|
||||
if tt.wantCode != 0 {
|
||||
t.Errorf("#%d: resp is nil, want=%d", i, tt.wantCode)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode != tt.wantCode {
|
||||
} else if resp.StatusCode != tt.wantCode {
|
||||
t.Errorf("#%d: resp code=%d, want=%d", i, resp.StatusCode, tt.wantCode)
|
||||
continue
|
||||
}
|
||||
|
@@ -198,21 +198,15 @@ func getRevTest(cx ctlCtx) {
|
||||
}
|
||||
|
||||
func getKeysOnlyTest(cx ctlCtx) {
|
||||
var (
|
||||
kvs = []kv{{"key1", "val1"}}
|
||||
)
|
||||
for i := range kvs {
|
||||
if err := ctlV3Put(cx, kvs[i].key, kvs[i].val, ""); err != nil {
|
||||
cx.t.Fatalf("getKeysOnlyTest #%d: ctlV3Put error (%v)", i, err)
|
||||
if err := ctlV3Put(cx, "key", "val", ""); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
}
|
||||
cmdArgs := append(cx.PrefixArgs(), []string{"get", "--keys-only", "key"}...)
|
||||
if err := spawnWithExpect(cmdArgs, "key"); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
}
|
||||
|
||||
cmdArgs := append(cx.PrefixArgs(), "get")
|
||||
cmdArgs = append(cmdArgs, []string{"--prefix", "--keys-only", "key"}...)
|
||||
|
||||
err := spawnWithExpects(cmdArgs, []string{"key1", ""}...)
|
||||
if err != nil {
|
||||
cx.t.Fatalf("getKeysOnlyTest : error (%v)", err)
|
||||
if err := spawnWithExpects(cmdArgs, "val"); err == nil {
|
||||
cx.t.Fatalf("got value but passed --keys-only")
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -189,7 +189,9 @@ func RangeRequestToOp(r *pb.RangeRequest) clientv3.Op {
|
||||
if r.CountOnly {
|
||||
opts = append(opts, clientv3.WithCountOnly())
|
||||
}
|
||||
|
||||
if r.KeysOnly {
|
||||
opts = append(opts, clientv3.WithKeysOnly())
|
||||
}
|
||||
if r.Serializable {
|
||||
opts = append(opts, clientv3.WithSerializable())
|
||||
}
|
||||
|
@@ -26,7 +26,7 @@ import (
|
||||
var (
|
||||
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
|
||||
MinClusterVersion = "3.0.0"
|
||||
Version = "3.2.7"
|
||||
Version = "3.2.8"
|
||||
APIVersion = "unknown"
|
||||
|
||||
// Git SHA Value will be set during build
|
||||
|
Reference in New Issue
Block a user