Compare commits

...

14 Commits

Author SHA1 Message Date
Gyuho Lee
ba92a0e70f version: 3.2.31
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-08-18 09:37:40 -07:00
Gyuho Lee
55db5b49e8 etcdserver: add OS level FD metrics
Similar counts are exposed via Prometheus.
This adds the one that are perceived by etcd server.

e.g.

os_fd_limit 120000
os_fd_used 14
process_cpu_seconds_total 0.31
process_max_fds 120000
process_open_fds 17

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-08-18 09:37:11 -07:00
Gyuho Lee
b7243f0175 pkg/runtime: optimize FDUsage by removing sort
No need sort when we just want the counts.

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-08-18 09:35:58 -07:00
Gyuho Lee
7619b2f744 etcdserver/etcdserverpb: change protobuf field type from int to int64
ref. https://github.com/etcd-io/etcd/pull/12106

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-08-18 09:34:43 -07:00
Jingyi Hu
17acb61209 Merge pull request #11691 from wswcfan/fix-etcd-3.2-to-3.3-upgrade-bug
etcdserver: fix LeaseRevoke may fail to apply when authentication is enabled and upgrading cluster from etcd-3.2 to etcd-3.3
2020-05-22 03:28:16 +08:00
shawwang
6e77b87c06 auth, etcdserver: attaching a fake root token when calling LeaseRevoke
fix LeaseRevoke may fail to apply when authentication is enabled
and upgrading cluster from etcd-3.2 to etcd-3.3 (#11691)
2020-05-11 23:49:14 +08:00
Gyuho Lee
b7644ae5f0 version: 3.2.30
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:48:30 -07:00
Gyuho Lee
325f2d253c wal: add "etcd_wal_writes_bytes_total"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:48:27 -07:00
Gyuho Lee
b05103392d pkg/ioutil: add "FlushN"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:44:18 -07:00
Gyuho Lee
b69c0023c9 version: 3.2.29
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:19:14 -07:00
Gyuho Lee
35d1d2d58e etcdserver/api/v3rpc: handle api version metadata, add metrics
ref.
https://github.com/etcd-io/etcd/pull/11687

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:19:14 -07:00
Gyuho Lee
20b27a887d clientv3: embed api version in metadata
ref.
https://github.com/etcd-io/etcd/pull/11687

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>

clientv3: fix racy writes to context key

=== RUN   TestWatchOverlapContextCancel

==================

WARNING: DATA RACE

Write at 0x00c42110dd40 by goroutine 99:

  runtime.mapassign()

      /usr/local/go/src/runtime/hashmap.go:485 +0x0

  github.com/coreos/etcd/clientv3.metadataSet()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:61 +0x8c

  github.com/coreos/etcd/clientv3.withVersion()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:47 +0x137

  github.com/coreos/etcd/clientv3.newStreamClientInterceptor.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/client.go:309 +0x81

  google.golang.org/grpc.NewClientStream()

      /go/src/github.com/coreos/etcd/gopath/src/google.golang.org/grpc/stream.go:101 +0x10e

  github.com/coreos/etcd/etcdserver/etcdserverpb.(*watchClient).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/etcdserver/etcdserverpb/rpc.pb.go:3193 +0xe9

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).openWatchClient()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:788 +0x143

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).newWatchClient()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:700 +0x5c3

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).run()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:431 +0x12b

Previous read at 0x00c42110dd40 by goroutine 130:

  reflect.maplen()

      /usr/local/go/src/runtime/hashmap.go:1165 +0x0

  reflect.Value.MapKeys()

      /usr/local/go/src/reflect/value.go:1090 +0x43b

  fmt.(*pp).printValue()

      /usr/local/go/src/fmt/print.go:741 +0x1885

  fmt.(*pp).printArg()

      /usr/local/go/src/fmt/print.go:682 +0x1b1

  fmt.(*pp).doPrintf()

      /usr/local/go/src/fmt/print.go:998 +0x1cad

  fmt.Sprintf()

      /usr/local/go/src/fmt/print.go:196 +0x77

  github.com/coreos/etcd/clientv3.streamKeyFromCtx()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:825 +0xc8

  github.com/coreos/etcd/clientv3.(*watcher).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:265 +0x426

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e

Goroutine 99 (running) created at:

  github.com/coreos/etcd/clientv3.(*watcher).newWatcherGrpcStream()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:236 +0x59d

  github.com/coreos/etcd/clientv3.(*watcher).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:278 +0xbb6

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e

Goroutine 130 (running) created at:

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:979 +0x76d

  github.com/coreos/etcd/clientv3/integration.TestWatchOverlapContextCancel()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:922 +0x44

  testing.tRunner()

      /usr/local/go/src/testing/testing.go:657 +0x107

==================

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:19:14 -07:00
Gyuho Lee
42d749057d etcdserver/api/etcdhttp: log server-side /health checks
ref.
https://github.com/etcd-io/etcd/pull/11704

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:19:14 -07:00
Joe Betz
a0d497b54a mvcc/backend: Delete orphaned db.tmp files before defrag 2020-02-13 12:44:59 -08:00
20 changed files with 363 additions and 34 deletions

View File

@@ -162,6 +162,9 @@ type AuthStore interface {
// AuthInfoFromTLS gets AuthInfo from TLS info of gRPC's context
AuthInfoFromTLS(ctx context.Context) *AuthInfo
// WithRoot generates and installs a token that can be used as a root credential
WithRoot(ctx context.Context) context.Context
}
type TokenProvider interface {
@@ -1070,3 +1073,40 @@ func NewTokenProvider(tokenOpts string, indexWaiter func(uint64) <-chan struct{}
return nil, ErrInvalidAuthOpts
}
}
func (as *authStore) WithRoot(ctx context.Context) context.Context {
if !as.isAuthEnabled() {
return ctx
}
var ctxForAssign context.Context
if ts, ok := as.tokenProvider.(*tokenSimple); ok && ts != nil {
ctx1 := context.WithValue(ctx, "index", uint64(0))
prefix, err := ts.genTokenPrefix()
if err != nil {
plog.Errorf("failed to generate prefix of internally used token")
return ctx
}
ctxForAssign = context.WithValue(ctx1, "simpleToken", prefix)
} else {
ctxForAssign = ctx
}
token, err := as.tokenProvider.assign(ctxForAssign, "root", as.Revision())
if err != nil {
// this must not happen
plog.Errorf("failed to assign token for lease revoking: %s", err)
return ctx
}
mdMap := map[string]string{
"token": token,
}
tokenMD := metadata.New(mdMap)
// clean up tls info to ensure using root credential
ctx = peer.NewContext(ctx, nil)
// use "mdIncomingKey{}" since it's called from local etcdserver
return metadata.NewIncomingContext(ctx, tokenMD)
}

View File

@@ -32,7 +32,6 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
)
@@ -269,7 +268,11 @@ func (c *Client) dialSetupOpts(endpoint string, dopts ...grpc.DialOption) (opts
}
return conn, err
}
opts = append(opts, grpc.WithDialer(f))
opts = append(opts,
grpc.WithDialer(f),
grpc.WithUnaryInterceptor(newUnaryClientInterceptor()),
grpc.WithStreamInterceptor(newStreamClientInterceptor()),
)
creds := c.creds
if _, _, scheme := parseEndpoint(endpoint); len(scheme) != 0 {
@@ -284,6 +287,30 @@ func (c *Client) dialSetupOpts(endpoint string, dopts ...grpc.DialOption) (opts
return opts
}
func newUnaryClientInterceptor() grpc.UnaryClientInterceptor {
return func(ctx context.Context,
method string,
req, reply interface{},
cc *grpc.ClientConn,
invoker grpc.UnaryInvoker,
opts ...grpc.CallOption) error {
ctx = withVersion(ctx)
return invoker(ctx, method, req, reply, cc, opts...)
}
}
func newStreamClientInterceptor() grpc.StreamClientInterceptor {
return func(ctx context.Context,
desc *grpc.StreamDesc,
cc *grpc.ClientConn,
method string,
streamer grpc.Streamer,
opts ...grpc.CallOption) (grpc.ClientStream, error) {
ctx = withVersion(ctx)
return streamer(ctx, desc, cc, method, opts...)
}
}
// Dial connects to a single endpoint using the client's config.
func (c *Client) Dial(endpoint string) (*grpc.ClientConn, error) {
return c.dial(endpoint)
@@ -356,13 +383,6 @@ func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientCo
return conn, nil
}
// WithRequireLeader requires client requests to only succeed
// when the cluster has a leader.
func WithRequireLeader(ctx context.Context) context.Context {
md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, md)
}
func newClient(cfg *Config) (*Client, error) {
if cfg == nil {
cfg = &Config{}

64
clientv3/ctx.go Normal file
View File

@@ -0,0 +1,64 @@
// Copyright 2020 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"strings"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/version"
"google.golang.org/grpc/metadata"
)
// WithRequireLeader requires client requests to only succeed
// when the cluster has a leader.
func WithRequireLeader(ctx context.Context) context.Context {
md, ok := metadata.FromOutgoingContext(ctx)
if !ok { // no outgoing metadata ctx key, create one
md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, md)
}
copied := md.Copy() // avoid racey updates
// overwrite/add 'hasleader' key/value
metadataSet(copied, rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, copied)
}
// embeds client version
func withVersion(ctx context.Context) context.Context {
md, ok := metadata.FromOutgoingContext(ctx)
if !ok { // no outgoing metadata ctx key, create one
md = metadata.Pairs(rpctypes.MetadataClientAPIVersionKey, version.APIVersion)
return metadata.NewOutgoingContext(ctx, md)
}
copied := md.Copy() // avoid racey updates
// overwrite/add version key/value
metadataSet(copied, rpctypes.MetadataClientAPIVersionKey, version.APIVersion)
return metadata.NewOutgoingContext(ctx, copied)
}
func metadataGet(md metadata.MD, k string) []string {
k = strings.ToLower(k)
return md[k]
}
func metadataSet(md metadata.MD, k string, vals ...string) {
if len(vals) == 0 {
return
}
k = strings.ToLower(k)
md[k] = vals
}

67
clientv3/ctx_test.go Normal file
View File

@@ -0,0 +1,67 @@
// Copyright 2020 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"reflect"
"testing"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/version"
"google.golang.org/grpc/metadata"
)
func TestMetadataWithRequireLeader(t *testing.T) {
ctx := context.TODO()
md, ok := metadata.FromOutgoingContext(ctx)
if ok {
t.Fatal("expected no outgoing metadata ctx key")
}
// add a conflicting key with some other value
md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, "invalid")
// add a key, and expect not be overwritten
metadataSet(md, "hello", "1", "2")
ctx = metadata.NewOutgoingContext(ctx, md)
// expect overwrites but still keep other keys
ctx = WithRequireLeader(ctx)
md, ok = metadata.FromOutgoingContext(ctx)
if !ok {
t.Fatal("expected outgoing metadata ctx key")
}
if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss)
}
if ss := metadataGet(md, "hello"); !reflect.DeepEqual(ss, []string{"1", "2"}) {
t.Fatalf("unexpected metadata for 'hello' %v", ss)
}
}
func TestMetadataWithClientAPIVersion(t *testing.T) {
ctx := withVersion(WithRequireLeader(context.TODO()))
md, ok := metadata.FromOutgoingContext(ctx)
if !ok {
t.Fatal("expected outgoing metadata ctx key")
}
if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss)
}
if ss := metadataGet(md, rpctypes.MetadataClientAPIVersionKey); !reflect.DeepEqual(ss, []string{version.APIVersion}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataClientAPIVersionKey, ss)
}
}

View File

@@ -82,22 +82,29 @@ func init() {
func healthHandler(server *etcdserver.EtcdServer) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r, "GET") {
plog.Warningf("/health error (status code %d)", http.StatusMethodNotAllowed)
return
}
if uint64(server.Leader()) == raft.None {
http.Error(w, `{"health": "false"}`, http.StatusServiceUnavailable)
plog.Warningf("/health error; no leader (status code %d)", http.StatusServiceUnavailable)
healthFailed.Inc()
return
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if _, err := server.Do(ctx, etcdserverpb.Request{Method: "QGET"}); err != nil {
http.Error(w, `{"health": "false"}`, http.StatusServiceUnavailable)
plog.Warningf("/health error; QGET failed %v (status code %d)", err, http.StatusServiceUnavailable)
healthFailed.Inc()
return
}
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"health": "true"}`))
plog.Infof("/health OK (status code %d)", http.StatusOK)
healthSuccess.Inc()
}
}

View File

@@ -15,16 +15,16 @@
package v3rpc
import (
"strings"
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/pkg/capnslog"
prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"golang.org/x/net/context"
@@ -50,6 +50,12 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
md, ok := metadata.FromIncomingContext(ctx)
if ok {
ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey)
if len(vs) > 0 {
ver = vs[0]
}
clientRequests.WithLabelValues("unary", ver).Inc()
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return nil, rpctypes.ErrGRPCNoLeader
@@ -177,6 +183,12 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
md, ok := metadata.FromIncomingContext(ss.Context())
if ok {
ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey)
if len(vs) > 0 {
ver = vs[0]
}
clientRequests.WithLabelValues("stream", ver).Inc()
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return rpctypes.ErrGRPCNoLeader
@@ -195,7 +207,6 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
smap.mu.Unlock()
cancel()
}()
}
}
@@ -251,3 +262,8 @@ func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
return smap
}
func metadataGet(md metadata.MD, k string) []string {
k = strings.ToLower(k)
return md[k]
}

View File

@@ -30,9 +30,19 @@ var (
Name: "client_grpc_received_bytes_total",
Help: "The total number of bytes received from grpc clients.",
})
clientRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "client_requests_total",
Help: "The total number of client requests per client version.",
},
[]string{"type", "client_api_version"},
)
)
func init() {
prometheus.MustRegister(sentBytes)
prometheus.MustRegister(receivedBytes)
prometheus.MustRegister(clientRequests)
}

View File

@@ -17,4 +17,6 @@ package rpctypes
var (
MetadataRequireLeaderKey = "hasleader"
MetadataHasLeader = "true"
MetadataClientAPIVersionKey = "client-api-version"
)

View File

@@ -135,7 +135,7 @@ type loggableValueCompare struct {
Result Compare_CompareResult `protobuf:"varint,1,opt,name=result,proto3,enum=etcdserverpb.Compare_CompareResult"`
Target Compare_CompareTarget `protobuf:"varint,2,opt,name=target,proto3,enum=etcdserverpb.Compare_CompareTarget"`
Key []byte `protobuf:"bytes,3,opt,name=key,proto3"`
ValueSize int `protobuf:"bytes,7,opt,name=value_size,proto3"`
ValueSize int64 `protobuf:"varint,7,opt,name=value_size,proto3"`
}
func newLoggableValueCompare(c *Compare, cv *Compare_Value) *loggableValueCompare {
@@ -143,7 +143,7 @@ func newLoggableValueCompare(c *Compare, cv *Compare_Value) *loggableValueCompar
c.Result,
c.Target,
c.Key,
len(cv.Value),
int64(len(cv.Value)),
}
}
@@ -156,7 +156,7 @@ func (*loggableValueCompare) ProtoMessage() {}
// To preserve proto encoding of the key bytes, a faked out proto type is used here.
type loggablePutRequest struct {
Key []byte `protobuf:"bytes,1,opt,name=key,proto3"`
ValueSize int `protobuf:"varint,2,opt,name=value_size,proto3"`
ValueSize int64 `protobuf:"varint,2,opt,name=value_size,proto3"`
Lease int64 `protobuf:"varint,3,opt,name=lease,proto3"`
PrevKv bool `protobuf:"varint,4,opt,name=prev_kv,proto3"`
IgnoreValue bool `protobuf:"varint,5,opt,name=ignore_value,proto3"`
@@ -166,7 +166,7 @@ type loggablePutRequest struct {
func NewLoggablePutRequest(request *PutRequest) *loggablePutRequest {
return &loggablePutRequest{
request.Key,
len(request.Value),
int64(len(request.Value)),
request.Lease,
request.PrevKv,
request.IgnoreValue,

View File

@@ -123,6 +123,19 @@ var (
Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
},
[]string{"server_id"})
fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "used",
Help: "The number of used file descriptors.",
})
fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "limit",
Help: "The file descriptor limit.",
})
)
func init() {
@@ -142,6 +155,8 @@ func init() {
prometheus.MustRegister(currentVersion)
prometheus.MustRegister(currentGoVersion)
prometheus.MustRegister(serverID)
prometheus.MustRegister(fdUsed)
prometheus.MustRegister(fdLimit)
currentVersion.With(prometheus.Labels{
"server_version": version.Version,
@@ -152,7 +167,12 @@ func init() {
}
func monitorFileDescriptor(done <-chan struct{}) {
ticker := time.NewTicker(5 * time.Second)
// This ticker will check File Descriptor Requirements ,and count all fds in used.
// And recorded some logs when in used >= limit/5*4. Just recorded message.
// If fds was more than 10K,It's low performance due to FDUsage() works.
// So need to increase it.
// See https://github.com/etcd-io/etcd/issues/11969 for more detail.
ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop()
for {
used, err := runtime.FDUsage()
@@ -160,11 +180,13 @@ func monitorFileDescriptor(done <-chan struct{}) {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fdUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fdLimit.Set(float64(limit))
if used >= limit/5*4 {
plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
}

View File

@@ -222,6 +222,9 @@ func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*
}
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
// fix: LeaseRevoke may fail to apply when authentication is enabled and upgrading cluster from etcd-3.2 to etcd-3.3
// see https://github.com/etcd-io/etcd/issues/11689
ctx = s.authStore.WithRoot(ctx)
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
if err != nil {
return nil, err

View File

@@ -307,21 +307,36 @@ func (b *backend) defrag() error {
b.batchTx.unsafeCommit(true)
b.batchTx.tx = nil
tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions)
// Create a temporary file to ensure we start with a clean slate.
// Snapshotter.cleanupSnapdir cleans up any of these that are found during startup.
dir := filepath.Dir(b.db.Path())
temp, err := ioutil.TempFile(dir, "db.tmp.*")
if err != nil {
return err
}
// bbolt v1.3.1-coreos.5 does not provide options.OpenFile so we
// instead close the temp file and then let bolt reopen it.
tdbp := temp.Name()
if err := temp.Close(); err != nil {
return err
}
tmpdb, err := bolt.Open(tdbp, 0600, boltOpenOptions)
if err != nil {
return err
}
// gofail: var defragBeforeCopy struct{}
err = defragdb(b.db, tmpdb, defragLimit)
if err != nil {
tmpdb.Close()
os.RemoveAll(tmpdb.Path())
if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {
plog.Fatalf("failed to remove db.tmp after defragmentation completed: %v", rmErr)
}
return err
}
dbp := b.db.Path()
tdbp := tmpdb.Path()
err = b.db.Close()
if err != nil {
@@ -331,6 +346,7 @@ func (b *backend) defrag() error {
if err != nil {
plog.Fatalf("cannot close database (%s)", err)
}
// gofail: var defragBeforeRename struct{}
err = os.Rename(tdbp, dbp)
if err != nil {
plog.Fatalf("cannot rename database (%s)", err)

View File

@@ -95,12 +95,23 @@ func (pw *PageWriter) Write(p []byte) (n int, err error) {
return n, werr
}
// Flush flushes buffered data.
func (pw *PageWriter) Flush() error {
if pw.bufferedBytes == 0 {
return nil
}
_, err := pw.w.Write(pw.buf[:pw.bufferedBytes])
pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes
pw.bufferedBytes = 0
_, err := pw.flush()
return err
}
// FlushN flushes buffered data and returns the number of written bytes.
func (pw *PageWriter) FlushN() (int, error) {
return pw.flush()
}
func (pw *PageWriter) flush() (int, error) {
if pw.bufferedBytes == 0 {
return 0, nil
}
n, err := pw.w.Write(pw.buf[:pw.bufferedBytes])
pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes
pw.bufferedBytes = 0
return n, err
}

View File

@@ -16,7 +16,7 @@
package runtime
import (
"io/ioutil"
"os"
"syscall"
)
@@ -29,9 +29,20 @@ func FDLimit() (uint64, error) {
}
func FDUsage() (uint64, error) {
fds, err := ioutil.ReadDir("/proc/self/fd")
return countFiles("/proc/self/fd")
}
// countFiles reads the directory named by dirname and returns the count.
// This is same as stdlib "io/ioutil.ReadDir" but without sorting.
func countFiles(dirname string) (uint64, error) {
f, err := os.Open(dirname)
if err != nil {
return 0, err
}
return uint64(len(fds)), nil
list, err := f.Readdir(-1)
f.Close()
if err != nil {
return 0, err
}
return uint64(len(list)), nil
}

View File

@@ -172,6 +172,9 @@ func (s *Snapshotter) snapNames() ([]string, error) {
if err != nil {
return nil, err
}
if err = s.cleanupSnapdir(names); err != nil {
return nil, err
}
snaps := checkSuffix(names)
if len(snaps) == 0 {
return nil, ErrNoSnapshot
@@ -202,3 +205,17 @@ func renameBroken(path string) {
plog.Warningf("cannot rename broken snapshot file %v to %v: %v", path, brokenPath, err)
}
}
// cleanupSnapdir removes any files that should not be in the snapshot directory:
// - db.tmp prefixed files that can be orphaned by defragmentation
func (s *Snapshotter) cleanupSnapdir(filenames []string) error {
for _, filename := range filenames {
if strings.HasPrefix(filename, "db.tmp") {
plog.Infof("found orphaned defragmentation file; deleting: %s", filename)
if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) {
return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr)
}
}
}
return nil
}

View File

@@ -26,7 +26,7 @@ import (
var (
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
MinClusterVersion = "3.0.0"
Version = "3.2.28"
Version = "3.2.31"
APIVersion = "unknown"
// Git SHA Value will be set during build

View File

@@ -92,7 +92,8 @@ func (e *encoder) encode(rec *walpb.Record) error {
if padBytes != 0 {
data = append(data, make([]byte, padBytes)...)
}
_, err = e.bw.Write(data)
n, err = e.bw.Write(data)
walWriteBytes.Add(float64(n))
return err
}
@@ -108,13 +109,16 @@ func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) {
func (e *encoder) flush() error {
e.mu.Lock()
defer e.mu.Unlock()
return e.bw.Flush()
n, err := e.bw.FlushN()
e.mu.Unlock()
walWriteBytes.Add(float64(n))
return err
}
func writeUint64(w io.Writer, n uint64, buf []byte) error {
// http://golang.org/src/encoding/binary/binary.go
binary.LittleEndian.PutUint64(buf, n)
_, err := w.Write(buf)
nv, err := w.Write(buf)
walWriteBytes.Add(float64(nv))
return err
}

View File

@@ -24,8 +24,15 @@ var (
Help: "The latency distributions of fsync called by wal.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
})
walWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "disk",
Name: "wal_write_bytes_total",
Help: "Total number of bytes written in WAL.",
})
)
func init() {
prometheus.MustRegister(syncDurations)
prometheus.MustRegister(walWriteBytes)
}

View File

@@ -18,6 +18,7 @@ import (
"io"
"os"
"path/filepath"
"time"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/wal/walpb"
@@ -76,10 +77,14 @@ func Repair(dirpath string) bool {
plog.Errorf("could not repair %v, failed to truncate file", f.Name())
return false
}
start := time.Now()
if err = fileutil.Fsync(f.File); err != nil {
plog.Errorf("could not repair %v, failed to sync file", f.Name())
return false
}
syncDurations.Observe(time.Since(start).Seconds())
return true
default:
plog.Errorf("could not repair error (%v)", err)

View File

@@ -147,9 +147,13 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
if perr != nil {
return nil, perr
}
start := time.Now()
if perr = fileutil.Fsync(pdir); perr != nil {
return nil, perr
}
syncDurations.Observe(time.Since(start).Seconds())
if perr = pdir.Close(); err != nil {
return nil, perr
}
@@ -409,9 +413,12 @@ func (w *WAL) cut() error {
if err = os.Rename(newTail.Name(), fpath); err != nil {
return err
}
start := time.Now()
if err = fileutil.Fsync(w.dirFile); err != nil {
return err
}
syncDurations.Observe(time.Since(start).Seconds())
newTail.Close()