feat(peer_server): forbid rejoining with different name

Or it will confuse the cluster, especially the heartbeat between nodes.
release-0.4
Yicheng Qin 2014-04-17 15:36:51 -07:00
parent b0ac8a4b4b
commit 0c95e1eabb
6 changed files with 115 additions and 20 deletions

View File

@ -24,15 +24,16 @@ import (
var errors = map[int]string{
// command related errors
EcodeKeyNotFound: "Key not found",
EcodeTestFailed: "Compare failed", //test and set
EcodeNotFile: "Not a file",
EcodeNoMorePeer: "Reached the max number of peers in the cluster",
EcodeNotDir: "Not a directory",
EcodeNodeExist: "Key already exists", // create
EcodeRootROnly: "Root is read only",
EcodeKeyIsPreserved: "The prefix of given key is a keyword in etcd",
EcodeDirNotEmpty: "Directory not empty",
EcodeKeyNotFound: "Key not found",
EcodeTestFailed: "Compare failed", //test and set
EcodeNotFile: "Not a file",
EcodeNoMorePeer: "Reached the max number of peers in the cluster",
EcodeNotDir: "Not a directory",
EcodeNodeExist: "Key already exists", // create
EcodeRootROnly: "Root is read only",
EcodeKeyIsPreserved: "The prefix of given key is a keyword in etcd",
EcodeDirNotEmpty: "Directory not empty",
EcodeExistingPeerAddr: "Peer address has existed",
// Post form related errors
EcodeValueRequired: "Value is Required in POST form",
@ -60,15 +61,16 @@ var errors = map[int]string{
}
const (
EcodeKeyNotFound = 100
EcodeTestFailed = 101
EcodeNotFile = 102
EcodeNoMorePeer = 103
EcodeNotDir = 104
EcodeNodeExist = 105
EcodeKeyIsPreserved = 106
EcodeRootROnly = 107
EcodeDirNotEmpty = 108
EcodeKeyNotFound = 100
EcodeTestFailed = 101
EcodeNotFile = 102
EcodeNoMorePeer = 103
EcodeNotDir = 104
EcodeNodeExist = 105
EcodeKeyIsPreserved = 106
EcodeRootROnly = 107
EcodeDirNotEmpty = 108
EcodeExistingPeerAddr = 109
EcodeValueRequired = 200
EcodePrevValueRequired = 201

View File

@ -63,6 +63,15 @@ func (c *JoinCommandV1) Apply(context raft.Context) (interface{}, error) {
return b, nil
}
// Check if the join command adds an instance that collides with existing one on peer URL.
peerURLs := ps.registry.PeerURLs(ps.raftServer.Leader(), c.Name)
for _, peerURL := range peerURLs {
if peerURL == c.EtcdURL {
log.Warnf("%v tries to join the cluster with existing URL %v", c.Name, c.EtcdURL)
return []byte{0}, etcdErr.NewError(etcdErr.EcodeExistingPeerAddr, c.EtcdURL, context.CommitIndex())
}
}
// Check peer number in the cluster
if ps.registry.PeerCount() >= ps.ClusterConfig().ActiveSize {
log.Debug("Reject join request from ", c.Name)
@ -137,6 +146,15 @@ func (c *JoinCommandV2) Apply(context raft.Context) (interface{}, error) {
return json.Marshal(msg)
}
// Check if the join command adds an instance that collides with existing one on peer URL.
peerURLs := ps.registry.PeerURLs(ps.raftServer.Leader(), c.Name)
for _, peerURL := range peerURLs {
if peerURL == c.PeerURL {
log.Warnf("%v tries to join the cluster with existing URL %v", c.Name, c.PeerURL)
return []byte{0}, etcdErr.NewError(etcdErr.EcodeExistingPeerAddr, c.PeerURL, context.CommitIndex())
}
}
// Check peer number in the cluster.
if ps.registry.PeerCount() >= ps.ClusterConfig().ActiveSize {
log.Debug("Join as standby ", c.Name)

View File

@ -10,6 +10,7 @@ import (
"net/url"
"sort"
"strconv"
"strings"
"sync"
"time"
@ -187,6 +188,12 @@ func (s *PeerServer) findCluster(discoverURL string, peers []string) {
// Try its best to find possible peers, and connect with them.
if !isNewNode {
// It is not allowed to join the cluster with existing peer address
// This prevents old node joining with different name by mistake.
if !s.checkPeerAddressNonconflict() {
log.Fatalf("%v is not allowed to join the cluster with existing URL %v", s.Config.Name, s.Config.URL)
}
// Take old nodes into account.
allPeers := s.getKnownPeers()
// Discover registered peers.
@ -426,6 +433,25 @@ func (s *PeerServer) Upgradable() error {
return nil
}
// checkPeerAddressNonconflict checks whether the peer address has existed with different name.
func (s *PeerServer) checkPeerAddressNonconflict() bool {
// there exists the (name, peer address) pair
if peerURL, ok := s.registry.PeerURL(s.Config.Name); ok {
if peerURL == s.Config.URL {
return true
}
}
// check all existing peer addresses
peerURLs := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name)
for _, peerURL := range peerURLs {
if peerURL == s.Config.URL {
return false
}
}
return true
}
// Helper function to do discovery and return results in expected format
func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) {
peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL)
@ -455,6 +481,8 @@ func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err er
// getKnownPeers gets the previous peers from log
func (s *PeerServer) getKnownPeers() []string {
peers := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name)
log.Infof("Peer URLs in log: %s / %s (%s)", s.raftServer.Leader(), s.Config.Name, strings.Join(peers, ","))
for i := range peers {
u, err := url.Parse(peers[i])
if err != nil {

View File

@ -300,8 +300,7 @@ func (r *Registry) urls(key, leaderName, selfName string, url func(key, name str
}
}
log.Infof("URLs: %s: %s / %s (%s)", key, leaderName, selfName, strings.Join(urls, ","))
log.Debugf("URLs: %s: %s / %s (%s)", key, leaderName, selfName, strings.Join(urls, ","))
return urls
}

View File

@ -146,3 +146,48 @@ func TestReplaceWithDifferentPeerAddress(t *testing.T) {
t.Fatal("Failed to set value in etcd cluster")
}
}
// Create a five nodes
// Let the sixth instance join with different name and existing peer address
func TestRejoinWithDifferentName(t *testing.T) {
procAttr := new(os.ProcAttr)
procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
clusterSize := 5
argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
if err != nil {
t.Fatal("cannot create cluster")
}
defer DestroyCluster(etcds)
time.Sleep(2 * time.Second)
num := rand.Int() % clusterSize
fmt.Println("join node 6 that collides with node", num+1)
// kill
etcds[num].Kill()
etcds[num].Release()
time.Sleep(time.Second)
for i := 0; i < 2; i++ {
// restart
if i == 0 {
etcds[num], err = os.StartProcess(EtcdBinPath, append(argGroup[num], "-name=node6", "-peers=127.0.0.1:7002"), procAttr)
} else {
etcds[num], err = os.StartProcess(EtcdBinPath, append(argGroup[num], "-f", "-name=node6", "-peers=127.0.0.1:7002"), procAttr)
}
if err != nil {
t.Fatal("fail starting etcd:", err)
}
timer := time.AfterFunc(10*time.Second, func() {
t.Fatal("new etcd should fail immediately")
})
etcds[num].Wait()
etcds[num] = nil
timer.Stop()
}
}

View File

@ -154,6 +154,9 @@ func CreateCluster(size int, procAttr *os.ProcAttr, ssl bool) ([][]string, []*os
// Destroy all the nodes in the cluster
func DestroyCluster(etcds []*os.Process) error {
for _, etcd := range etcds {
if etcd == nil {
continue
}
err := etcd.Kill()
if err != nil {
panic(err.Error())