Compare commits

...

17 Commits

Author SHA1 Message Date
Joe Betz
b7ff47f9d5 version: bump up to 3.1.18 2018-06-15 09:47:04 -07:00
Gyuho Lee
fab24fbdab Merge pull request #9848 from wenjiaswe/automated-cherry-pick-of-#8960-upstream-release-3.1
Automated cherry pick of #8960
2018-06-13 16:49:48 -07:00
Joe Betz
b3ee996629 metrics: Add server_version metric 2018-06-13 16:31:18 -07:00
Gyuho Lee
06da6cf983 tests/semaphore.test.bash: update
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-13 14:42:45 -07:00
Gyuho Lee
9c00100550 Makefile: update
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-13 14:42:10 -07:00
Gyuho Lee
1d7a2ca520 Merge pull request #9838 from jpbetz/automated-cherry-pick-of-#9821-origin-release-3.1-1528833932
etcdserver: Automated cherry pick of detailed "took too long" warnings to release-3.1
2018-06-12 13:54:40 -07:00
Joe Betz
e90934ec71 etcdserver: Fix txn request 'took too long' warnings to use loggable request stringer 2018-06-12 13:22:45 -07:00
Joe Betz
23c5c71426 etcdserver: Add response byte size and range response count to took too long warning 2018-06-12 13:22:45 -07:00
Joe Betz
72a2483d42 etcdserver: Replace value contents with value_size in request took too long warning 2018-06-12 13:15:19 -07:00
Hitoshi Mitake
53eae781fe etcdserver: not print password in the warning message of expensive request
Fix https://github.com/coreos/etcd/issues/9635
2018-06-12 13:15:18 -07:00
Joe Betz
7b1b7def84 etcdserver: Fix to backport of #9288 for pre-RequestV2 code 2018-06-12 13:13:46 -07:00
Xiang
df000fd776 etcdserver: improve request took too long warning 2018-06-12 13:13:46 -07:00
Joe Betz
fd61be4814 version: bump up to 3.1.17+git 2018-06-06 10:36:22 -07:00
Joe Betz
781cc0be83 version: bump up to 3.1.17 2018-06-06 09:54:59 -07:00
Joe Betz
ebe351e3b4 Merge pull request #9808 from jpbetz/snapshot-recover-3.1
etcdserver: Backport snapshot recovery from #7917 to 3.1 branch
2018-06-05 16:13:40 -07:00
Joe Betz
e31510975a etcdserver: Backport snapshot recovery from #7917 to 3.1 branch 2018-06-04 21:52:26 -07:00
Joe Betz
43b0cafcb6 version: bump up to 3.1.16+git 2018-05-31 12:54:30 -07:00
11 changed files with 393 additions and 31 deletions

View File

@@ -20,12 +20,16 @@ clean:
rm -f ./codecov
rm -rf ./agent-*
rm -rf ./covdir
rm -f ./*.coverprofile
rm -f ./*.log
rm -f ./bin/Dockerfile-release
rm -rf ./bin/*.etcd
rm -rf ./default.etcd
rm -rf ./tests/e2e/default.etcd
rm -rf ./gopath
rm -rf ./gopath.proto
rm -rf ./release
rm -f ./snapshot/localhost:*
rm -f ./integration/127.0.0.1:* ./integration/localhost:*
rm -f ./clientv3/integration/127.0.0.1:* ./clientv3/integration/localhost:*
rm -f ./clientv3/ordering/127.0.0.1:* ./clientv3/ordering/localhost:*
@@ -46,7 +50,8 @@ docker-remove:
GO_VERSION ?= 1.10.1
# GO_VERSION ?= 1.10.3
GO_VERSION ?= 1.8.7
ETCD_VERSION ?= $(shell git rev-parse --short HEAD || echo "GitNotFound")
TEST_SUFFIX = $(shell date +%s | base64 | head -c 15)
@@ -61,16 +66,16 @@ endif
# Example:
# GO_VERSION=1.8.7 make build-docker-test
# GO_VERSION=1.9.5 make build-docker-test
# GO_VERSION=1.9.7 make build-docker-test
# make build-docker-test
#
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# GO_VERSION=1.8.7 make push-docker-test
# GO_VERSION=1.9.5 make push-docker-test
# GO_VERSION=1.9.7 make push-docker-test
# make push-docker-test
#
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
# GO_VERSION=1.9.5 make pull-docker-test
# GO_VERSION=1.9.7 make pull-docker-test
# make pull-docker-test
build-docker-test:
@@ -118,7 +123,7 @@ compile-setup-gopath-with-docker-test:
#
# Local machine:
# TEST_OPTS="PASSES='fmt'" make test
# TEST_OPTS="PASSES='fmt bom dep compile build unit'" make test
# TEST_OPTS="PASSES='fmt bom dep build unit'" make test
# TEST_OPTS="PASSES='build unit release integration_e2e functional'" make test
# TEST_OPTS="PASSES='build grpcproxy'" make test
#
@@ -128,7 +133,7 @@ compile-setup-gopath-with-docker-test:
# TEST_OPTS="VERBOSE=2 PASSES='unit'" make docker-test
#
# Travis CI (test with docker):
# TEST_OPTS="PASSES='fmt bom dep compile build unit'" make docker-test
# TEST_OPTS="PASSES='fmt bom dep build unit'" make docker-test
#
# Semaphore CI (test with docker):
# TEST_OPTS="PASSES='build unit release integration_e2e functional'" make docker-test

View File

@@ -95,6 +95,9 @@ func (s *EtcdServer) newApplierV3() applierV3 {
func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult {
ar := &applyResult{}
defer func(start time.Time) {
warnOfExpensiveRequest(start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err)
}(time.Now())
// call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls
switch {

View File

@@ -105,10 +105,12 @@ func (a *applierV2store) Sync(r *pb.Request) Response {
return Response{}
}
// applyV2Request interprets r as a call to store.X and returns a Response interpreted
// from store.Event
// applyV2Request interprets r as a call to v2store.X
// and returns a Response interpreted from v2store.Event
func (s *EtcdServer) applyV2Request(r *pb.Request) Response {
defer warnOfExpensiveRequest(time.Now(), r, nil, nil)
toTTLOptions(r)
switch r.Method {
case "POST":
return s.applyV2.Post(r)

83
etcdserver/backend.go Normal file
View File

@@ -0,0 +1,83 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"os"
"path/filepath"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
)
func newBackend(cfg *ServerConfig) backend.Backend {
return backend.NewDefaultBackend(backendPath(cfg))
}
func backendPath(cfg *ServerConfig) string {
return filepath.Join(cfg.SnapDir(), "db")
}
// openSnapshotBackend renames a snapshot db to the current etcd db and opens it.
func openSnapshotBackend(cfg *ServerConfig, ss *snap.Snapshotter, snapshot raftpb.Snapshot) (backend.Backend, error) {
snapPath, err := ss.DBFilePath(snapshot.Metadata.Index)
if err != nil {
return nil, fmt.Errorf("failed to find database snapshot file (%v)", err)
}
if err := os.Rename(snapPath, backendPath(cfg)); err != nil {
return nil, fmt.Errorf("failed to rename database snapshot file (%v)", err)
}
return openBackend(cfg), nil
}
// openBackend returns a backend using the current etcd db.
func openBackend(cfg *ServerConfig) backend.Backend {
fn := backendPath(cfg)
beOpened := make(chan backend.Backend)
go func() {
beOpened <- newBackend(cfg)
}()
select {
case be := <-beOpened:
return be
case <-time.After(10 * time.Second):
plog.Warningf("another etcd process is using %q and holds the file lock, or loading backend file is taking >10 seconds", fn)
plog.Warningf("waiting for it to exit before starting...")
}
return <-beOpened
}
// recoverBackendSnapshot recovers the DB from a snapshot in case etcd crashes
// before updating the backend db after persisting raft snapshot to disk,
// violating the invariant snapshot.Metadata.Index < db.consistentIndex. In this
// case, replace the db with the snapshot db sent by the leader.
func recoverSnapshotBackend(cfg *ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) {
var cIndex consistentIndex
kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex)
defer kv.Close()
if snapshot.Metadata.Index <= kv.ConsistentIndex() {
return oldbe, nil
}
oldbe.Close()
return openSnapshotBackend(cfg, snap.New(cfg.SnapDir()), snapshot)
}

View File

@@ -0,0 +1,175 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserverpb
import (
"fmt"
"strings"
proto "github.com/golang/protobuf/proto"
)
// InternalRaftStringer implements custom proto Stringer:
// redact password, replace value fields with value_size fields.
type InternalRaftStringer struct {
Request *InternalRaftRequest
}
func (as *InternalRaftStringer) String() string {
switch {
case as.Request.LeaseGrant != nil:
return fmt.Sprintf("header:<%s> lease_grant:<ttl:%d-second id:%016x>",
as.Request.Header.String(),
as.Request.LeaseGrant.TTL,
as.Request.LeaseGrant.ID,
)
case as.Request.LeaseRevoke != nil:
return fmt.Sprintf("header:<%s> lease_revoke:<id:%016x>",
as.Request.Header.String(),
as.Request.LeaseRevoke.ID,
)
case as.Request.Authenticate != nil:
return fmt.Sprintf("header:<%s> authenticate:<name:%s simple_token:%s>",
as.Request.Header.String(),
as.Request.Authenticate.Name,
as.Request.Authenticate.SimpleToken,
)
case as.Request.AuthUserAdd != nil:
return fmt.Sprintf("header:<%s> auth_user_add:<name:%s>",
as.Request.Header.String(),
as.Request.AuthUserAdd.Name,
)
case as.Request.AuthUserChangePassword != nil:
return fmt.Sprintf("header:<%s> auth_user_change_password:<name:%s>",
as.Request.Header.String(),
as.Request.AuthUserChangePassword.Name,
)
case as.Request.Put != nil:
return fmt.Sprintf("header:<%s> put:<%s>",
as.Request.Header.String(),
newLoggablePutRequest(as.Request.Put).String(),
)
case as.Request.Txn != nil:
return fmt.Sprintf("header:<%s> txn:<%s>",
as.Request.Header.String(),
NewLoggableTxnRequest(as.Request.Txn).String(),
)
default:
// nothing to redact
}
return as.Request.String()
}
// txnRequestStringer implements a custom proto String to replace value bytes fields with value size
// fields in any nested txn and put operations.
type txnRequestStringer struct {
Request *TxnRequest
}
func NewLoggableTxnRequest(request *TxnRequest) *txnRequestStringer {
return &txnRequestStringer{request}
}
func (as *txnRequestStringer) String() string {
var compare []string
for _, c := range as.Request.Compare {
switch cv := c.TargetUnion.(type) {
case *Compare_Value:
compare = append(compare, newLoggableValueCompare(c, cv).String())
default:
// nothing to redact
compare = append(compare, c.String())
}
}
var success []string
for _, s := range as.Request.Success {
success = append(success, newLoggableRequestOp(s).String())
}
var failure []string
for _, f := range as.Request.Failure {
failure = append(failure, newLoggableRequestOp(f).String())
}
return fmt.Sprintf("compare:<%s> success:<%s> failure:<%s>",
strings.Join(compare, " "),
strings.Join(success, " "),
strings.Join(failure, " "),
)
}
// requestOpStringer implements a custom proto String to replace value bytes fields with value
// size fields in any nested txn and put operations.
type requestOpStringer struct {
Op *RequestOp
}
func newLoggableRequestOp(op *RequestOp) *requestOpStringer {
return &requestOpStringer{op}
}
func (as *requestOpStringer) String() string {
switch op := as.Op.Request.(type) {
case *RequestOp_RequestPut:
return fmt.Sprintf("request_put:<%s>", newLoggablePutRequest(op.RequestPut).String())
default:
// nothing to redact
}
return as.Op.String()
}
// loggableValueCompare implements a custom proto String for Compare.Value union member types to
// replace the value bytes field with a value size field.
// To preserve proto encoding of the key and range_end bytes, a faked out proto type is used here.
type loggableValueCompare struct {
Result Compare_CompareResult `protobuf:"varint,1,opt,name=result,proto3,enum=etcdserverpb.Compare_CompareResult"`
Target Compare_CompareTarget `protobuf:"varint,2,opt,name=target,proto3,enum=etcdserverpb.Compare_CompareTarget"`
Key []byte `protobuf:"bytes,3,opt,name=key,proto3"`
ValueSize int `protobuf:"bytes,7,opt,name=value_size,proto3"`
}
func newLoggableValueCompare(c *Compare, cv *Compare_Value) *loggableValueCompare {
return &loggableValueCompare{
c.Result,
c.Target,
c.Key,
len(cv.Value),
}
}
func (m *loggableValueCompare) Reset() { *m = loggableValueCompare{} }
func (m *loggableValueCompare) String() string { return proto.CompactTextString(m) }
func (*loggableValueCompare) ProtoMessage() {}
// loggablePutRequest implements a custom proto String to replace value bytes field with a value
// size field.
// To preserve proto encoding of the key bytes, a faked out proto type is used here.
type loggablePutRequest struct {
Key []byte `protobuf:"bytes,1,opt,name=key,proto3"`
ValueSize int `protobuf:"varint,2,opt,name=value_size,proto3"`
Lease int64 `protobuf:"varint,3,opt,name=lease,proto3"`
PrevKv bool `protobuf:"varint,4,opt,name=prev_kv,proto3"`
}
func newLoggablePutRequest(request *PutRequest) *loggablePutRequest {
return &loggablePutRequest{
request.Key,
len(request.Value),
request.Lease,
request.PrevKv,
}
}
func (m *loggablePutRequest) Reset() { *m = loggablePutRequest{} }
func (m *loggablePutRequest) String() string { return proto.CompactTextString(m) }
func (*loggablePutRequest) ProtoMessage() {}

View File

@@ -18,6 +18,7 @@ import (
"time"
"github.com/coreos/etcd/pkg/runtime"
"github.com/coreos/etcd/version"
"github.com/prometheus/client_golang/prometheus"
)
@@ -64,6 +65,13 @@ var (
Name: "proposals_failed_total",
Help: "The total number of failed proposals seen.",
})
currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "version",
Help: "Which version is running. 1 for 'server_version' label with current version.",
},
[]string{"server_version"})
)
func init() {
@@ -74,6 +82,11 @@ func init() {
prometheus.MustRegister(proposalsApplied)
prometheus.MustRegister(proposalsPending)
prometheus.MustRegister(proposalsFailed)
prometheus.MustRegister(currentVersion)
currentVersion.With(prometheus.Labels{
"server_version": version.Version,
}).Set(1)
}
func monitorFileDescriptor(done <-chan struct{}) {

View File

@@ -378,6 +378,10 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
plog.Panicf("recovered store from snapshot error: %v", err)
}
plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
if be, err = recoverSnapshotBackend(cfg, be, *snapshot); err != nil {
plog.Panicf("recovering backend from snapshot error: %v", err)
}
}
cfg.Print()
if !cfg.ForceNewCluster {
@@ -796,14 +800,8 @@ func (s *EtcdServer) run() {
func (s *EtcdServer) applyAll(ep *etcdProgress, apply *apply) {
s.applySnapshot(ep, apply)
st := time.Now()
s.applyEntries(ep, apply)
d := time.Since(st)
entriesNum := len(apply.entries)
if entriesNum != 0 && d > time.Duration(entriesNum)*warnApplyDuration {
plog.Warningf("apply entries took too long [%v for %d entries]", d, len(apply.entries))
plog.Warningf("avoid queries with large range/delete range!")
}
proposalsApplied.Set(float64(ep.appliedi))
s.applyWait.Trigger(ep.appliedi)
// wait for the raft routine to finish the disk writes before triggering a

View File

@@ -15,11 +15,16 @@
package etcdserver
import (
"fmt"
"reflect"
"strings"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/rafthttp"
"github.com/golang/protobuf/proto"
)
// isConnectedToQuorumSince checks whether the local member is connected to the
@@ -95,3 +100,55 @@ func (nc *notifier) notify(err error) {
nc.err = err
close(nc.c)
}
func warnOfExpensiveRequest(now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) {
var resp string
if !isNil(respMsg) {
resp = fmt.Sprintf("size:%d", proto.Size(respMsg))
}
warnOfExpensiveGenericRequest(now, reqStringer, "", resp, err)
}
func warnOfExpensiveReadOnlyTxnRequest(now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) {
reqStringer := pb.NewLoggableTxnRequest(r)
var resp string
if !isNil(txnResponse) {
var resps []string
for _, r := range txnResponse.Responses {
switch op := r.Response.(type) {
case *pb.ResponseOp_ResponseRange:
resps = append(resps, fmt.Sprintf("range_response_count:%d", len(op.ResponseRange.Kvs)))
default:
// only range responses should be in a read only txn request
}
}
resp = fmt.Sprintf("responses:<%s> size:%d", strings.Join(resps, " "), proto.Size(txnResponse))
}
warnOfExpensiveGenericRequest(now, reqStringer, "read-only range ", resp, err)
}
func warnOfExpensiveReadOnlyRangeRequest(now time.Time, reqStringer fmt.Stringer, rangeResponse *pb.RangeResponse, err error) {
var resp string
if !isNil(rangeResponse) {
resp = fmt.Sprintf("range_response_count:%d size:%d", len(rangeResponse.Kvs), proto.Size(rangeResponse))
}
warnOfExpensiveGenericRequest(now, reqStringer, "read-only range ", resp, err)
}
func warnOfExpensiveGenericRequest(now time.Time, reqStringer fmt.Stringer, prefix string, resp string, err error) {
// TODO: add metrics
d := time.Since(now)
if d > warnApplyDuration {
var result string
if err != nil {
result = fmt.Sprintf("error:%v", err)
} else {
result = resp
}
plog.Warningf("%srequest %q with result %q took too long (%v) to execute", prefix, reqStringer.String(), result, d)
}
}
func isNil(msg proto.Message) bool {
return msg == nil || reflect.ValueOf(msg).IsNil()
}

View File

@@ -95,21 +95,25 @@ func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeRe
if s.ClusterVersion() == nil || s.ClusterVersion().LessThan(newRangeClusterVersion) {
return s.legacyRange(ctx, r)
}
var resp *pb.RangeResponse
var err error
defer func(start time.Time) {
warnOfExpensiveReadOnlyRangeRequest(start, r, resp, err)
}(time.Now())
if !r.Serializable {
err := s.linearizableReadNotify(ctx)
err = s.linearizableReadNotify(ctx)
if err != nil {
return nil, err
}
}
var resp *pb.RangeResponse
var err error
chk := func(ai *auth.AuthInfo) error {
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
}
get := func() { resp, err = s.applyV3Base.Range(noTxn, r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr
err = serr
return nil, err
}
return resp, err
}
@@ -178,6 +182,11 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse
chk := func(ai *auth.AuthInfo) error {
return checkTxnAuth(s.authStore, ai, r)
}
defer func(start time.Time) {
warnOfExpensiveReadOnlyTxnRequest(start, r, resp, err)
}(time.Now())
get := func() { resp, err = s.applyV3Base.Txn(r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr

View File

@@ -5,17 +5,34 @@ if ! [[ "$0" =~ "tests/semaphore.test.bash" ]]; then
exit 255
fi
TEST_SUFFIX=$(date +%s | base64 | head -c 15)
<<COMMENT
# amd64-e2e
bash tests/semaphore.test.bash
TEST_OPTS="PASSES='build unit release integration_e2e functional' MANUAL_VER=v3.1.12"
if [ "$TEST_ARCH" == "386" ]; then
TEST_OPTS="GOARCH=386 PASSES='build unit integration_e2e'"
# 386-e2e
TEST_ARCH=386 bash tests/semaphore.test.bash
# grpc-proxy
TEST_OPTS="PASSES='build grpcproxy'" bash tests/semaphore.test.bash
# coverage
TEST_OPTS="coverage" bash tests/semaphore.test.bash
COMMENT
if [ -z "${TEST_OPTS}" ]; then
TEST_OPTS="PASSES='build release e2e' MANUAL_VER=v3.1.17"
fi
if [ "${TEST_ARCH}" == "386" ]; then
TEST_OPTS="GOARCH=386 PASSES='build e2e'"
fi
docker run \
--rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go1.8.7 \
/bin/bash -c "${TEST_OPTS} ./test 2>&1 | tee test-${TEST_SUFFIX}.log"
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 test-${TEST_SUFFIX}.log
echo "Running tests with" ${TEST_OPTS}
if [ "${TEST_OPTS}" == "PASSES='build grpcproxy'" ]; then
echo "Skip proxy tests for this branch!"
exit 0
elif [ "${TEST_OPTS}" == "coverage" ]; then
echo "Skip coverage tests for this branch!"
exit 0
else
sudo HOST_TMP_DIR=/tmp TEST_OPTS="${TEST_OPTS}" make docker-test
fi

View File

@@ -26,7 +26,7 @@ import (
var (
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
MinClusterVersion = "3.0.0"
Version = "3.1.16"
Version = "3.1.18"
APIVersion = "unknown"
// Git SHA Value will be set during build