commit
869d1a3c69
12
README.md
12
README.md
|
@ -355,6 +355,18 @@ We should see there are three nodes in the cluster
|
|||
0.0.0.0:4001,0.0.0.0:4002,0.0.0.0:4003
|
||||
```
|
||||
|
||||
Machine list is also available via this API
|
||||
|
||||
```sh
|
||||
curl -L http://127.0.0.1:4001/v1/keys/_etcd/machines
|
||||
```
|
||||
|
||||
```json
|
||||
[{"action":"GET","key":"/machines/node1","value":"0.0.0.0,7001,4001","index":4},{"action":"GET","key":"/machines/node3","value":"0.0.0.0,7002,4002","index":4},{"action":"GET","key":"/machines/node4","value":"0.0.0.0,7003,4003","index":4}]
|
||||
```
|
||||
|
||||
The key of the machine is based on the ```commit index``` when it was added. The value of the machine is ```hostname```, ```raft port``` and ```client port```.
|
||||
|
||||
Also try to get the current leader in the cluster
|
||||
|
||||
```
|
||||
|
|
|
@ -35,6 +35,14 @@ func Multiplexer(w http.ResponseWriter, req *http.Request) {
|
|||
func SetHttpHandler(w *http.ResponseWriter, req *http.Request) {
|
||||
key := req.URL.Path[len("/v1/keys/"):]
|
||||
|
||||
if store.CheckKeyword(key) {
|
||||
|
||||
(*w).WriteHeader(http.StatusBadRequest)
|
||||
|
||||
(*w).Write(newJsonError(400, "Set"))
|
||||
return
|
||||
}
|
||||
|
||||
debug("[recv] POST http://%v/v1/keys/%s", raftServer.Name(), key)
|
||||
|
||||
value := req.FormValue("value")
|
||||
|
@ -57,6 +65,7 @@ func SetHttpHandler(w *http.ResponseWriter, req *http.Request) {
|
|||
(*w).WriteHeader(http.StatusBadRequest)
|
||||
|
||||
(*w).Write(newJsonError(202, "Set"))
|
||||
return
|
||||
}
|
||||
|
||||
if len(prevValue) != 0 {
|
||||
|
@ -94,7 +103,8 @@ func dispatch(c Command, w *http.ResponseWriter, req *http.Request, client bool)
|
|||
if raftServer.State() == "leader" {
|
||||
if body, err := raftServer.Do(c); err != nil {
|
||||
if _, ok := err.(store.NotFoundError); ok {
|
||||
http.NotFound((*w), req)
|
||||
(*w).WriteHeader(http.StatusNotFound)
|
||||
(*w).Write(newJsonError(100, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -109,13 +119,19 @@ func dispatch(c Command, w *http.ResponseWriter, req *http.Request, client bool)
|
|||
(*w).Write(newJsonError(102, err.Error()))
|
||||
return
|
||||
}
|
||||
if err.Error() == errors[103] {
|
||||
(*w).WriteHeader(http.StatusBadRequest)
|
||||
(*w).Write(newJsonError(103, ""))
|
||||
return
|
||||
}
|
||||
(*w).WriteHeader(http.StatusInternalServerError)
|
||||
(*w).Write(newJsonError(300, err.Error()))
|
||||
return
|
||||
} else {
|
||||
|
||||
if body == nil {
|
||||
http.NotFound((*w), req)
|
||||
(*w).WriteHeader(http.StatusNotFound)
|
||||
(*w).Write(newJsonError(100, err.Error()))
|
||||
} else {
|
||||
body, ok := body.([]byte)
|
||||
// this should not happen
|
||||
|
@ -221,13 +237,14 @@ func GetHttpHandler(w *http.ResponseWriter, req *http.Request) {
|
|||
if body, err := command.Apply(raftServer); err != nil {
|
||||
|
||||
if _, ok := err.(store.NotFoundError); ok {
|
||||
http.NotFound((*w), req)
|
||||
(*w).WriteHeader(http.StatusNotFound)
|
||||
(*w).Write(newJsonError(100, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
(*w).WriteHeader(http.StatusInternalServerError)
|
||||
(*w).Write(newJsonError(300, ""))
|
||||
return
|
||||
|
||||
} else {
|
||||
body, ok := body.([]byte)
|
||||
if !ok {
|
||||
|
@ -237,7 +254,6 @@ func GetHttpHandler(w *http.ResponseWriter, req *http.Request) {
|
|||
(*w).WriteHeader(http.StatusOK)
|
||||
(*w).Write(body)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -274,7 +290,6 @@ func WatchHttpHandler(w http.ResponseWriter, req *http.Request) {
|
|||
if body, err := command.Apply(raftServer); err != nil {
|
||||
warn("Unable to do watch command: %v", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
} else {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
|
@ -284,7 +299,6 @@ func WatchHttpHandler(w http.ResponseWriter, req *http.Request) {
|
|||
}
|
||||
|
||||
w.Write(body)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
|
15
command.go
15
command.go
|
@ -119,12 +119,25 @@ func (c *JoinCommand) CommandName() string {
|
|||
|
||||
// Join a server to the cluster
|
||||
func (c *JoinCommand) Apply(raftServer *raft.Server) (interface{}, error) {
|
||||
|
||||
// check machine number in the cluster
|
||||
num := machineNum()
|
||||
if num == maxClusterSize {
|
||||
return []byte("join fail"), fmt.Errorf(errors[103])
|
||||
}
|
||||
|
||||
// add peer in raft
|
||||
err := raftServer.AddPeer(c.Name)
|
||||
|
||||
// add machine in etcd
|
||||
addMachine(c.Name, c.Hostname, c.RaftPort, c.ClientPort)
|
||||
|
||||
// add machine in etcd storage
|
||||
nodeName := fmt.Sprintf("%s%d", "node", raftServer.CommitIndex())
|
||||
key := path.Join("machines", nodeName)
|
||||
key := path.Join("_etcd/machines", nodeName)
|
||||
value := fmt.Sprintf("%s,%d,%d", c.Hostname, c.RaftPort, c.ClientPort)
|
||||
etcdStore.Set(key, value, time.Unix(0, 0), raftServer.CommitIndex())
|
||||
|
||||
return []byte("join success"), err
|
||||
}
|
||||
|
||||
|
|
6
error.go
6
error.go
|
@ -13,6 +13,8 @@ func init() {
|
|||
errors[100] = "Key Not Found"
|
||||
errors[101] = "The given PrevValue is not equal to the value of the key"
|
||||
errors[102] = "Not A File"
|
||||
errors[103] = "Reached the max number of machines in the cluster"
|
||||
|
||||
// Post form related errors
|
||||
errors[200] = "Value is Required in POST form"
|
||||
errors[201] = "PrevValue is Required in POST form"
|
||||
|
@ -21,6 +23,10 @@ func init() {
|
|||
// raft related errors
|
||||
errors[300] = "Raft Internal Error"
|
||||
errors[301] = "During Leader Election"
|
||||
|
||||
// keyword
|
||||
errors[400] = "The prefix of the given key is a keyword in etcd"
|
||||
|
||||
}
|
||||
|
||||
type jsonError struct {
|
||||
|
|
37
etcd.go
37
etcd.go
|
@ -16,6 +16,8 @@ import (
|
|||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime/pprof"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
@ -57,6 +59,10 @@ var snapshot bool
|
|||
|
||||
var retryTimes int
|
||||
|
||||
var maxClusterSize int
|
||||
|
||||
var cpuprofile string
|
||||
|
||||
func init() {
|
||||
flag.BoolVar(&verbose, "v", false, "verbose logging")
|
||||
flag.BoolVar(&veryVerbose, "vv", false, "very verbose logging")
|
||||
|
@ -86,6 +92,10 @@ func init() {
|
|||
flag.IntVar(&maxSize, "m", 1024, "the max size of result buffer")
|
||||
|
||||
flag.IntVar(&retryTimes, "r", 3, "the max retry attempts when trying to join a cluster")
|
||||
|
||||
flag.IntVar(&maxClusterSize, "maxsize", 9, "the max size of the cluster")
|
||||
|
||||
flag.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file")
|
||||
}
|
||||
|
||||
// CONSTANTS
|
||||
|
@ -156,6 +166,26 @@ var info *Info
|
|||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if cpuprofile != "" {
|
||||
f, err := os.Create(cpuprofile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
pprof.StartCPUProfile(f)
|
||||
defer pprof.StopCPUProfile()
|
||||
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, os.Interrupt)
|
||||
go func() {
|
||||
for sig := range c {
|
||||
fmt.Printf("captured %v, stopping profiler and exiting..", sig)
|
||||
pprof.StopCPUProfile()
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
|
||||
if veryVerbose {
|
||||
verbose = true
|
||||
raft.SetLogLevel(raft.Debug)
|
||||
|
@ -276,6 +306,10 @@ func startRaft(securityType int) {
|
|||
}
|
||||
err = joinCluster(raftServer, machine)
|
||||
if err != nil {
|
||||
if err.Error() == errors[103] {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
debug("cannot join to cluster via machine %s %s", machine, err)
|
||||
} else {
|
||||
success = true
|
||||
|
@ -602,6 +636,9 @@ func joinCluster(s *raft.Server, serverName string) error {
|
|||
debug("Send Join Request to %s", address)
|
||||
json.NewEncoder(&b).Encode(command)
|
||||
resp, err = t.Post(fmt.Sprintf("%s/join", address), &b)
|
||||
} else if resp.StatusCode == http.StatusBadRequest {
|
||||
debug("Reach max number machines in the cluster")
|
||||
return fmt.Errorf(errors[103])
|
||||
} else {
|
||||
return fmt.Errorf("Unable to join")
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -34,7 +35,7 @@ func TestKillLeader(t *testing.T) {
|
|||
|
||||
leader := "0.0.0.0:7001"
|
||||
|
||||
for i := 0; i < 200; i++ {
|
||||
for i := 0; i < 10; i++ {
|
||||
port, _ := strconv.Atoi(strings.Split(leader, ":")[1])
|
||||
num := port - 7001
|
||||
fmt.Println("kill server ", num)
|
||||
|
@ -58,5 +59,63 @@ func TestKillLeader(t *testing.T) {
|
|||
fmt.Println("Leader election time average is", avgTime, "with election timeout", ELECTIONTIMEOUT)
|
||||
etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKillRandom kills random machines in the cluster and
|
||||
// restart them after all other machines agree on the same leader
|
||||
func TestKillRandom(t *testing.T) {
|
||||
procAttr := new(os.ProcAttr)
|
||||
procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
|
||||
|
||||
clusterSize := 9
|
||||
argGroup, etcds, err := createCluster(clusterSize, procAttr)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal("cannot create cluster")
|
||||
}
|
||||
|
||||
defer destroyCluster(etcds)
|
||||
|
||||
leaderChan := make(chan string, 1)
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
go leaderMonitor(clusterSize, 4, leaderChan)
|
||||
|
||||
toKill := make(map[int]bool)
|
||||
|
||||
for i := 0; i < 20; i++ {
|
||||
fmt.Printf("TestKillRandom Round[%d/20]\n", i)
|
||||
|
||||
j := 0
|
||||
for {
|
||||
|
||||
r := rand.Int31n(9)
|
||||
if _, ok := toKill[int(r)]; !ok {
|
||||
j++
|
||||
toKill[int(r)] = true
|
||||
}
|
||||
|
||||
if j > 3 {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for num, _ := range toKill {
|
||||
etcds[num].Kill()
|
||||
etcds[num].Release()
|
||||
}
|
||||
|
||||
<-leaderChan
|
||||
|
||||
for num, _ := range toKill {
|
||||
etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
|
||||
}
|
||||
|
||||
toKill = make(map[int]bool)
|
||||
}
|
||||
|
||||
<-leaderChan
|
||||
|
||||
}
|
||||
|
|
|
@ -28,3 +28,8 @@ func getClientAddr(name string) (string, bool) {
|
|||
|
||||
return addr, true
|
||||
}
|
||||
|
||||
// machineNum returns the number of machines in the cluster
|
||||
func machineNum() int {
|
||||
return len(machinesMap)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
package store
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestKeywords(t *testing.T) {
|
||||
keyword := CheckKeyword("machines")
|
||||
if !keyword {
|
||||
t.Fatal("machines should be keyword")
|
||||
}
|
||||
|
||||
keyword = CheckKeyword("/machines")
|
||||
|
||||
if !keyword {
|
||||
t.Fatal("/machines should be keyword")
|
||||
}
|
||||
|
||||
keyword = CheckKeyword("/machines/")
|
||||
|
||||
if !keyword {
|
||||
t.Fatal("/machines/ contains keyword prefix")
|
||||
}
|
||||
|
||||
keyword = CheckKeyword("/machines/node1")
|
||||
|
||||
if !keyword {
|
||||
t.Fatal("/machines/* contains keyword prefix")
|
||||
}
|
||||
|
||||
keyword = CheckKeyword("/nokeyword/machines/node1")
|
||||
|
||||
if keyword {
|
||||
t.Fatal("this does not contain keyword prefix")
|
||||
}
|
||||
|
||||
}
|
|
@ -1,8 +1,33 @@
|
|||
package store
|
||||
|
||||
import (
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// keywords for internal useage
|
||||
// Key for string keyword; Value for only checking prefix
|
||||
var keywords = map[string]bool{
|
||||
"/acoounts": true,
|
||||
"/_etcd": true,
|
||||
"/ephemeralNodes": true,
|
||||
"/machines": true,
|
||||
}
|
||||
|
||||
// CheckKeyword will check if the key contains the keyword.
|
||||
// For now, we only check for prefix.
|
||||
func CheckKeyword(key string) bool {
|
||||
key = path.Clean("/" + key)
|
||||
|
||||
// find the second "/"
|
||||
i := strings.Index(key[1:], "/")
|
||||
|
||||
var prefix string
|
||||
|
||||
if i == -1 {
|
||||
prefix = key
|
||||
} else {
|
||||
prefix = key[:i+1]
|
||||
}
|
||||
_, ok := keywords[prefix]
|
||||
|
||||
return ok
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ func TestSaveAndRecovery(t *testing.T) {
|
|||
|
||||
s := CreateStore(100)
|
||||
s.Set("foo", "bar", time.Unix(0, 0), 1)
|
||||
s.Set("foo2", "bar2", time.Now().Add(time.Second * 5), 2)
|
||||
s.Set("foo2", "bar2", time.Now().Add(time.Second*5), 2)
|
||||
state, err := s.Save()
|
||||
|
||||
if err != nil {
|
||||
|
|
18
test.go
18
test.go
|
@ -11,9 +11,10 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
var client = http.Client{Transport: &http.Transport{
|
||||
Dial: dialTimeoutFast,
|
||||
},
|
||||
var client = http.Client{
|
||||
Transport: &http.Transport{
|
||||
Dial: dialTimeoutFast,
|
||||
},
|
||||
}
|
||||
|
||||
// Sending set commands
|
||||
|
@ -33,8 +34,6 @@ func set(stop chan bool) {
|
|||
stopSet = true
|
||||
|
||||
default:
|
||||
fmt.Println("Set failed!")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,12 +96,15 @@ func destroyCluster(etcds []*os.Process) error {
|
|||
func leaderMonitor(size int, allowDeadNum int, leaderChan chan string) {
|
||||
leaderMap := make(map[int]string)
|
||||
baseAddrFormat := "http://0.0.0.0:400%d/leader"
|
||||
|
||||
for {
|
||||
knownLeader := "unknown"
|
||||
dead := 0
|
||||
var i int
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
leader, err := getLeader(fmt.Sprintf(baseAddrFormat, i+1))
|
||||
|
||||
if err == nil {
|
||||
leaderMap[i] = leader
|
||||
|
||||
|
@ -112,14 +114,18 @@ func leaderMonitor(size int, allowDeadNum int, leaderChan chan string) {
|
|||
if leader != knownLeader {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
dead++
|
||||
if dead > allowDeadNum {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if i == size {
|
||||
select {
|
||||
case <-leaderChan:
|
||||
|
@ -129,8 +135,10 @@ func leaderMonitor(size int, allowDeadNum int, leaderChan chan string) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
time.Sleep(time.Millisecond * 10)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func getLeader(addr string) (string, error) {
|
||||
|
|
Loading…
Reference in New Issue