functional/tester: clean up runner logging

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
release-3.4
Gyuho Lee 2018-04-09 10:40:27 -07:00
parent 054721166f
commit 2922116be5
6 changed files with 104 additions and 24 deletions

View File

@ -245,11 +245,7 @@ func (clus *Cluster) updateStresserChecker() {
clus.checker = newNoChecker()
}
clus.lg.Info(
"updated stressers",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
)
clus.lg.Info("updated stressers")
}
func (clus *Cluster) checkConsistency() (err error) {
@ -646,6 +642,7 @@ func (clus *Cluster) defrag() error {
"defrag ALL PASS",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
)
return nil
}

View File

@ -51,6 +51,7 @@ func (clus *Cluster) Run() {
"round FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Error(err),
)
if clus.cleanup() != nil {
@ -74,6 +75,7 @@ func (clus *Cluster) Run() {
"compact START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Duration("timeout", timeout),
)
if err := clus.compact(revToCompact, timeout); err != nil {
@ -81,6 +83,7 @@ func (clus *Cluster) Run() {
"compact FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Error(err),
)
if err = clus.cleanup(); err != nil {
@ -88,6 +91,7 @@ func (clus *Cluster) Run() {
"cleanup FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Error(err),
)
return
@ -107,6 +111,7 @@ func (clus *Cluster) Run() {
"functional-tester PASS",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
)
}
@ -119,8 +124,9 @@ func (clus *Cluster) doRound() error {
clus.lg.Info(
"round START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Strings("failures", clus.failureStrings()),
zap.Int("total-failures", len(clus.failures)),
)
for i, fa := range clus.failures {
clus.cs = i
@ -133,8 +139,8 @@ func (clus *Cluster) doRound() error {
"case START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.String("desc", fa.Desc()),
zap.Int("total-failures", len(clus.failures)),
)
clus.lg.Info("wait health before injecting failures")
@ -149,6 +155,7 @@ func (clus *Cluster) doRound() error {
"stresser START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.String("desc", fa.Desc()),
)
if err := clus.stresser.Stress(); err != nil {
@ -161,6 +168,7 @@ func (clus *Cluster) doRound() error {
"inject START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.String("desc", fa.Desc()),
)
if err := fa.Inject(clus); err != nil {
@ -174,6 +182,7 @@ func (clus *Cluster) doRound() error {
"recover START",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.String("desc", fa.Desc()),
)
if err := fa.Recover(clus); err != nil {
@ -218,8 +227,8 @@ func (clus *Cluster) doRound() error {
"case PASS",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.String("desc", fa.Desc()),
zap.Int("total-failures", len(clus.failures)),
zap.Duration("took", time.Since(caseNow)),
)
}
@ -228,7 +237,7 @@ func (clus *Cluster) doRound() error {
"round ALL PASS",
zap.Int("round", clus.rd),
zap.Strings("failures", clus.failureStrings()),
zap.Int("total-failures", len(clus.failures)),
zap.Int("case-total", len(clus.failures)),
zap.Duration("took", time.Since(roundNow)),
)
return nil
@ -291,6 +300,7 @@ func (clus *Cluster) failed() {
"functional-tester FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
)
clus.DestroyEtcdAgents()
os.Exit(2)
@ -310,6 +320,7 @@ func (clus *Cluster) cleanup() error {
"closing stressers before archiving failure data",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
)
clus.stresser.Close()
@ -318,6 +329,7 @@ func (clus *Cluster) cleanup() error {
"cleanup FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Error(err),
)
return err
@ -327,6 +339,7 @@ func (clus *Cluster) cleanup() error {
"restart FAIL",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.Int("case-total", len(clus.failures)),
zap.Error(err),
)
return err

View File

@ -52,6 +52,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
// TODO: Too intensive stressing clients can panic etcd member with
// 'out of memory' error. Put rate limits in server side.
stressers[i] = &keyStresser{
stype: rpcpb.StressType_KV,
lg: clus.lg,
m: m,
keySize: int(clus.Tester.StressKeySize),
@ -65,6 +66,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
case "LEASE":
stressers[i] = &leaseStresser{
stype: rpcpb.StressType_LEASE,
lg: clus.lg,
m: m,
numLeases: 10, // TODO: configurable
@ -84,6 +86,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
"--req-rate", fmt.Sprintf("%v", reqRate),
}
stressers[i] = newRunnerStresser(
rpcpb.StressType_ELECTION_RUNNER,
clus.lg,
clus.Tester.RunnerExecPath,
args,
clus.rateLimiter,
@ -102,7 +106,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, reqRate)
stressers[i] = newRunnerStresser(
rpcpb.StressType_WATCH_RUNNER,
clus.lg,
clus.Tester.RunnerExecPath,
args,
clus.rateLimiter,
reqRate,
)
case "LOCK_RACER_RUNNER":
reqRate := 100
@ -114,7 +125,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, reqRate)
stressers[i] = newRunnerStresser(
rpcpb.StressType_LOCK_RACER_RUNNER,
clus.lg,
clus.Tester.RunnerExecPath,
args,
clus.rateLimiter,
reqRate,
)
case "LEASE_RUNNER":
args := []string{
@ -122,7 +140,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
"--ttl=30",
"--endpoints", m.EtcdClientEndpoint,
}
stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, 0)
stressers[i] = newRunnerStresser(
rpcpb.StressType_LEASE_RUNNER,
clus.lg,
clus.Tester.RunnerExecPath,
args,
clus.rateLimiter,
0,
)
}
}
return &compositeStresser{stressers}

View File

@ -34,7 +34,8 @@ import (
)
type keyStresser struct {
lg *zap.Logger
stype rpcpb.StressType
lg *zap.Logger
m *rpcpb.Member
@ -102,7 +103,8 @@ func (s *keyStresser) Stress() error {
}
s.lg.Info(
"key stresser START",
"stress START",
zap.String("stress-type", s.stype.String()),
zap.String("endpoint", s.m.EtcdClientEndpoint),
)
return nil
@ -156,7 +158,8 @@ func (s *keyStresser) run() {
return
default:
s.lg.Warn(
"key stresser exited with error",
"stress stopped",
zap.String("stress-type", s.stype.String()),
zap.String("endpoint", s.m.EtcdClientEndpoint),
zap.Error(err),
)
@ -188,7 +191,8 @@ func (s *keyStresser) Close() map[string]int {
s.emu.Unlock()
s.lg.Info(
"key stresser STOP",
"stress STOP",
zap.String("stress-type", s.stype.String()),
zap.String("endpoint", s.m.EtcdClientEndpoint),
)
return ess

View File

@ -38,7 +38,8 @@ const (
)
type leaseStresser struct {
lg *zap.Logger
stype rpcpb.StressType
lg *zap.Logger
m *rpcpb.Member
cli *clientv3.Client
@ -121,7 +122,8 @@ func (ls *leaseStresser) setupOnce() error {
func (ls *leaseStresser) Stress() error {
ls.lg.Info(
"lease stresser START",
"stress START",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
@ -159,22 +161,26 @@ func (ls *leaseStresser) run() {
}
ls.lg.Debug(
"lease stresser is creating leases",
"stress creating leases",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
ls.createLeases()
ls.lg.Debug(
"lease stresser created leases",
"stress created leases",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
ls.lg.Debug(
"lease stresser is dropped leases",
"stress dropped leases",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
ls.randomlyDropLeases()
ls.lg.Debug(
"lease stresser dropped leases",
"stress dropped leases",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
}
@ -243,6 +249,7 @@ func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
if err != nil {
ls.lg.Debug(
"createLease failed",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.Error(err),
)
@ -251,6 +258,7 @@ func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
ls.lg.Debug(
"createLease created lease",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -284,6 +292,7 @@ func (ls *leaseStresser) randomlyDropLeases() {
}
ls.lg.Debug(
"randomlyDropLease dropped a lease",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -313,6 +322,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
case <-ls.ctx.Done():
ls.lg.Debug(
"keepLeaseAlive context canceled",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
zap.Error(ls.ctx.Err()),
@ -327,6 +337,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
ls.aliveLeases.remove(leaseID)
ls.lg.Debug(
"keepLeaseAlive lease has not been renewed, dropped it",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -337,6 +348,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
if err != nil {
ls.lg.Debug(
"keepLeaseAlive lease creates stream error",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
zap.Error(err),
@ -350,6 +362,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
if err != nil {
ls.lg.Debug(
"keepLeaseAlive failed to receive lease keepalive response",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
zap.Error(err),
@ -359,6 +372,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
ls.lg.Debug(
"keepLeaseAlive waiting on lease stream",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -367,6 +381,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
if respRC == nil {
ls.lg.Debug(
"keepLeaseAlive received nil lease keepalive response",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -378,6 +393,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
if respRC.TTL <= 0 {
ls.lg.Debug(
"keepLeaseAlive stream received lease keepalive response TTL <= 0",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
zap.Int64("ttl", respRC.TTL),
@ -388,6 +404,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
// renew lease timestamp only if lease is present
ls.lg.Debug(
"keepLeaseAlive renewed a lease",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
)
@ -440,6 +457,7 @@ func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
ls.lg.Debug(
"randomlyDropLease error",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
zap.Error(ls.ctx.Err()),
@ -457,7 +475,8 @@ func (ls *leaseStresser) Close() map[string]int {
ls.aliveWg.Wait()
ls.cli.Close()
ls.lg.Info(
"lease stresser STOP",
"stress STOP",
zap.String("stress-type", ls.stype.String()),
zap.String("endpoint", ls.m.EtcdClientEndpoint),
)
return nil

View File

@ -20,10 +20,16 @@ import (
"os/exec"
"syscall"
"github.com/coreos/etcd/functional/rpcpb"
"go.uber.org/zap"
"golang.org/x/time/rate"
)
type runnerStresser struct {
stype rpcpb.StressType
lg *zap.Logger
cmd *exec.Cmd
cmdStr string
args []string
@ -34,9 +40,17 @@ type runnerStresser struct {
donec chan struct{}
}
func newRunnerStresser(cmdStr string, args []string, rl *rate.Limiter, reqRate int) *runnerStresser {
func newRunnerStresser(
stype rpcpb.StressType,
lg *zap.Logger,
cmdStr string,
args []string,
rl *rate.Limiter,
reqRate int,
) *runnerStresser {
rl.SetLimit(rl.Limit() - rate.Limit(reqRate))
return &runnerStresser{
stype: stype,
cmdStr: cmdStr,
args: args,
rl: rl,
@ -71,6 +85,10 @@ func (rs *runnerStresser) setupOnce() (err error) {
}
func (rs *runnerStresser) Stress() (err error) {
rs.lg.Info(
"stress START",
zap.String("stress-type", rs.stype.String()),
)
if err = rs.setupOnce(); err != nil {
return err
}
@ -78,6 +96,10 @@ func (rs *runnerStresser) Stress() (err error) {
}
func (rs *runnerStresser) Pause() map[string]int {
rs.lg.Info(
"stress STOP",
zap.String("stress-type", rs.stype.String()),
)
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGSTOP)
return nil
}