functional-tester: always clean up if tester encouters an error

The current tester doesn't not clean up if any of the failure injection/recovery fails. if tester fails to recover a dead node, tester hangs in the next round because the tester will keep waiting until cluster becomes healthy which is impossible since a node is down. To fix this issue, we will always clean up if any error happens during each round so that cluster will be healthy for next round.

FIX #6743
release-3.1
fanmin shi 2016-10-27 13:47:02 -07:00
parent 1b36162659
commit 7f5a7d1da5
1 changed files with 11 additions and 17 deletions

View File

@ -44,11 +44,10 @@ func (tt *tester) runLoop() {
tt.status.setRound(round)
roundTotalCounter.Inc()
if ok, err := tt.doRound(round); !ok {
if err != nil {
if tt.cleanup() != nil {
return
}
if err := tt.doRound(round); err != nil {
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix, err)
if tt.cleanup() != nil {
return
}
prevCompactRev = 0 // reset after clean up
continue
@ -83,42 +82,37 @@ func (tt *tester) runLoop() {
plog.Printf("%s functional-tester is finished", tt.logPrefix())
}
func (tt *tester) doRound(round int) (bool, error) {
func (tt *tester) doRound(round int) error {
for j, f := range tt.failures {
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
tt.status.setCase(j)
if err := tt.cluster.WaitHealth(); err != nil {
plog.Printf("%s wait full health error: %v", tt.logPrefix(), err)
return false, nil
return fmt.Errorf("wait full health error: %v", err)
}
plog.Printf("%s injecting failure %q", tt.logPrefix(), f.Desc())
if err := f.Inject(tt.cluster, round); err != nil {
plog.Printf("%s injection error: %v", tt.logPrefix(), err)
return false, nil
return fmt.Errorf("injection error: %v", err)
}
plog.Printf("%s injected failure", tt.logPrefix())
plog.Printf("%s recovering failure %q", tt.logPrefix(), f.Desc())
if err := f.Recover(tt.cluster, round); err != nil {
plog.Printf("%s recovery error: %v", tt.logPrefix(), err)
return false, nil
return fmt.Errorf("recovery error: %v", err)
}
plog.Printf("%s wait until cluster is healthy", tt.logPrefix())
if err := tt.cluster.WaitHealth(); err != nil {
plog.Printf("%s wait full health error: %v", tt.logPrefix(), err)
return false, nil
return fmt.Errorf("wait full health error: %v", err)
}
plog.Printf("%s recovered failure", tt.logPrefix())
if err := tt.checkConsistency(); err != nil {
plog.Warningf("%s functional-tester returning with tt.checkConsistency error (%v)", tt.logPrefix(), err)
return false, err
return fmt.Errorf("tt.checkConsistency error (%v)", err)
}
plog.Printf("%s succeed!", tt.logPrefix())
}
return true, nil
return nil
}
func (tt *tester) updateRevision() error {