Merge pull request #8070 from heyitsanthony/etcdctl-cluster-health

ctlv2: report unhealthy in cluster-health if any node is unavailable
release-3.3
Anthony Romano 2017-06-09 14:57:03 -07:00 committed by GitHub
commit 933aa09b73
2 changed files with 28 additions and 13 deletions

View File

@ -321,17 +321,31 @@ func TestCtlV2ClusterHealth(t *testing.T) {
}
}()
// has quorum
// all members available
if err := etcdctlClusterHealth(epc, "cluster is healthy"); err != nil {
t.Fatalf("cluster-health expected to be healthy (%v)", err)
}
// cut quorum
// missing members, has quorum
epc.procs[0].Stop()
epc.procs[1].Stop()
if err := etcdctlClusterHealth(epc, "cluster is unhealthy"); err != nil {
t.Fatalf("cluster-health expected to be unhealthy (%v)", err)
for i := 0; i < 3; i++ {
err := etcdctlClusterHealth(epc, "cluster is degraded")
if err == nil {
break
} else if i == 2 {
t.Fatalf("cluster-health expected to be degraded (%v)", err)
}
// possibly no leader yet; retry
time.Sleep(time.Second)
}
// no quorum
epc.procs[1].Stop()
if err := etcdctlClusterHealth(epc, "cluster is unavailable"); err != nil {
t.Fatalf("cluster-health expected to be unavailable (%v)", err)
}
epc.procs[0], epc.procs[1] = nil, nil
}

View File

@ -70,7 +70,7 @@ func handleClusterHealth(c *cli.Context) error {
}
for {
health := false
healthyMembers := 0
for _, m := range ms {
if len(m.ClientURLs) == 0 {
fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
@ -105,8 +105,8 @@ func handleClusterHealth(c *cli.Context) error {
checked = true
if result.Health == "true" || nresult.Health {
health = true
fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
healthyMembers++
} else {
fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
}
@ -116,19 +116,20 @@ func handleClusterHealth(c *cli.Context) error {
fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
}
}
if health {
switch healthyMembers {
case len(ms):
fmt.Println("cluster is healthy")
} else {
fmt.Println("cluster is unhealthy")
case 0:
fmt.Println("cluster is unavailable")
default:
fmt.Println("cluster is degraded")
}
if !forever {
if health {
if healthyMembers == len(ms) {
os.Exit(ExitSuccess)
return nil
}
os.Exit(ExitClusterNotHealthy)
return nil
}
fmt.Printf("\nnext check after 10 second...\n\n")