etcd/tests/functional/standby_test.go

341 lines
10 KiB
Go

package test
import (
"bytes"
"fmt"
"os"
"testing"
"time"
"github.com/coreos/etcd/server"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/tests"
"github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
"github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
)
// Create a full cluster and then change the active size.
func TestStandby(t *testing.T) {
clusterSize := 15
_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
if !assert.NoError(t, err) {
t.Fatal("cannot create cluster")
}
defer DestroyCluster(etcds)
resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"syncInterval":1}`))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
time.Sleep(time.Second)
c := etcd.NewClient(nil)
c.SyncCluster()
// Verify that we just have default machines.
result, err := c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 9)
t.Log("Reconfigure with a smaller active size")
resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7, "syncInterval":1}`))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
// Wait for two monitor cycles before checking for demotion.
time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))
// Verify that we now have seven peers.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 7)
t.Log("Test the functionality of all servers")
// Set key.
time.Sleep(time.Second)
if _, err := c.Set("foo", "bar", 0); err != nil {
panic(err)
}
time.Sleep(time.Second)
// Check that all peers and standbys have the value.
for i := range etcds {
resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
if assert.NoError(t, err) {
body := tests.ReadBodyJSON(resp)
if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
assert.Equal(t, node["value"], "bar")
}
}
}
t.Log("Reconfigure with larger active size and wait for join")
resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "syncInterval":1}`))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
time.Sleep((1 * time.Second) + (1 * time.Second))
// Verify that exactly eight machines are in the cluster.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 8)
}
// Create a full cluster, disconnect a peer, wait for removal, wait for standby join.
func TestStandbyAutoJoin(t *testing.T) {
clusterSize := 5
_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
if err != nil {
t.Fatal("cannot create cluster")
}
defer func() {
// Wrap this in a closure so that it picks up the updated version of
// the "etcds" variable.
DestroyCluster(etcds)
}()
c := etcd.NewClient(nil)
c.SyncCluster()
time.Sleep(1 * time.Second)
// Verify that we have five machines.
result, err := c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 5)
// Reconfigure with a short remove delay (2 second).
resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "removeDelay":2, "syncInterval":1}`))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
// Wait for a monitor cycle before checking for removal.
time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
// Verify that we now have four peers.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 4)
// Remove peer.
etcd := etcds[1]
etcds = append(etcds[:1], etcds[2:]...)
if err := etcd.Kill(); err != nil {
panic(err.Error())
}
etcd.Release()
// Wait for it to get dropped.
time.Sleep(server.PeerActivityMonitorTimeout + (1 * time.Second))
// Wait for the standby to join.
time.Sleep((1 * time.Second) + (1 * time.Second))
// Verify that we have 4 peers.
result, err = c.Get("_etcd/machines", true, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 4)
// Verify that node2 is not one of those peers.
_, err = c.Get("_etcd/machines/node2", false, false)
assert.Error(t, err)
}
// Create a full cluster and then change the active size gradually.
func TestStandbyGradualChange(t *testing.T) {
clusterSize := 9
_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
assert.NoError(t, err)
defer DestroyCluster(etcds)
if err != nil {
t.Fatal("cannot create cluster")
}
time.Sleep(time.Second)
c := etcd.NewClient(nil)
c.SyncCluster()
num := clusterSize
for inc := 0; inc < 2; inc++ {
for i := 0; i < 6; i++ {
// Verify that we just have i machines.
result, err := c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), num)
if inc == 0 {
num--
} else {
num++
}
t.Log("Reconfigure with active size", num)
resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
if inc == 0 {
// Wait for monitor cycles before checking for demotion.
time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
} else {
time.Sleep(time.Second + (1 * time.Second))
}
// Verify that we now have peers.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), num)
t.Log("Test the functionality of all servers")
// Set key.
if _, err := c.Set("foo", "bar", 0); err != nil {
panic(err)
}
time.Sleep(100 * time.Millisecond)
// Check that all peers and standbys have the value.
for i := range etcds {
resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
if assert.NoError(t, err) {
body := tests.ReadBodyJSON(resp)
if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
assert.Equal(t, node["value"], "bar")
}
}
}
}
}
}
// Create a full cluster and then change the active size dramatically.
func TestStandbyDramaticChange(t *testing.T) {
clusterSize := 9
_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
assert.NoError(t, err)
defer DestroyCluster(etcds)
if err != nil {
t.Fatal("cannot create cluster")
}
time.Sleep(time.Second)
c := etcd.NewClient(nil)
c.SyncCluster()
num := clusterSize
for i := 0; i < 3; i++ {
for inc := 0; inc < 2; inc++ {
// Verify that we just have i machines.
result, err := c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), num)
if inc == 0 {
num -= 6
} else {
num += 6
}
t.Log("Reconfigure with active size", num)
resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
if inc == 0 {
// Wait for monitor cycles before checking for demotion.
time.Sleep(6*server.ActiveMonitorTimeout + (1 * time.Second))
} else {
time.Sleep(time.Second + (1 * time.Second))
}
// Verify that we now have peers.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), num)
t.Log("Test the functionality of all servers")
// Set key.
if _, err := c.Set("foo", "bar", 0); err != nil {
panic(err)
}
time.Sleep(100 * time.Millisecond)
// Check that all peers and standbys have the value.
for i := range etcds {
resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
if assert.NoError(t, err) {
body := tests.ReadBodyJSON(resp)
if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
assert.Equal(t, node["value"], "bar")
}
}
}
}
}
}
func TestStandbyJoinMiss(t *testing.T) {
clusterSize := 2
_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
if err != nil {
t.Fatal("cannot create cluster")
}
defer DestroyCluster(etcds)
c := etcd.NewClient(nil)
c.SyncCluster()
time.Sleep(1 * time.Second)
// Verify that we have two machines.
result, err := c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), clusterSize)
resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"removeDelay":4, "syncInterval":4}`))
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
time.Sleep(time.Second)
resp, _ = tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
if !assert.Equal(t, resp.StatusCode, 200) {
t.FailNow()
}
// Wait for a monitor cycle before checking for removal.
time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
// Verify that we now have one peer.
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 1)
// Simulate the join failure
_, err = server.NewClient(nil).AddMachine("http://localhost:7001",
&server.JoinCommand{
MinVersion: store.MinVersion(),
MaxVersion: store.MaxVersion(),
Name: "node2",
RaftURL: "http://127.0.0.1:7002",
EtcdURL: "http://127.0.0.1:4002",
})
assert.NoError(t, err)
time.Sleep(6 * time.Second)
go tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
time.Sleep(time.Second)
result, err = c.Get("_etcd/machines", false, true)
assert.NoError(t, err)
assert.Equal(t, len(result.Node.Nodes), 1)
}