etcd-tester: match more grpc errors

To prevent stressers from returning from failure injections
2016-04-27 09:23:45 -07:00 · 2016-04-27 09:23:45 -07:00 · de7c18909f
parent 8a4c9c9da1
commit de7c18909f
1 changed files with 21 additions and 2 deletions
--- a/tools/functional-tester/etcd-tester/stresser.go
+++ b/tools/functional-tester/etcd-tester/stresser.go
@ -25,10 +25,12 @@ import (
 	"time"

 	clientV2 "github.com/coreos/etcd/client"
+	"github.com/coreos/etcd/etcdserver"
 	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
 	"golang.org/x/net/context"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/grpclog"
+	"google.golang.org/grpc/transport"
 )

 func init() {
@ -61,7 +63,8 @@ type stresser struct {
 }

 func (s *stresser) Stress() error {
-	conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure(), grpc.WithTimeout(5*time.Second))
+	// TODO: add backoff option
+	conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure())
 	if err != nil {
 		return fmt.Errorf("%v (%s)", err, s.Endpoint)
 	}
@ -93,13 +96,29 @@ func (s *stresser) Stress() error {
 				})
 				putcancel()
 				if err != nil {
-					if grpc.ErrorDesc(err) == context.DeadlineExceeded.Error() {
+					shouldContinue := false
+					switch grpc.ErrorDesc(err) {
+					case context.DeadlineExceeded.Error():
 						// This retries when request is triggered at the same time as
 						// leader failure. When we terminate the leader, the request to
 						// that leader cannot be processed, and times out. Also requests
 						// to followers cannot be forwarded to the old leader, so timing out
 						// as well. We want to keep stressing until the cluster elects a
 						// new leader and start processing requests again.
+						shouldContinue = true
+					case etcdserver.ErrStopped.Error():
+						// one of the etcd nodes stopped from failure injection
+						shouldContinue = true
+					case transport.ErrConnClosing.Desc:
+						// server closed the transport (failure injected node)
+						shouldContinue = true
+
+						// default:
+						// errors from stresser.Cancel method:
+						// rpc error: code = 1 desc = context canceled (type grpc.rpcError)
+						// rpc error: code = 2 desc = grpc: the client connection is closing (type grpc.rpcError)
+					}
+					if shouldContinue {
 						continue
 					}
 					return