Compare commits
14 Commits
v3.4.0-rc.
...
v3.4.0-rc.
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e5528acf57 | ||
![]() |
9977550ae9 | ||
![]() |
4d7a6e2755 | ||
![]() |
5e8757c3c5 | ||
![]() |
012e38fef3 | ||
![]() |
41a2cfa122 | ||
![]() |
9f8a1edf38 | ||
![]() |
165ba72593 | ||
![]() |
9c850ccef0 | ||
![]() |
61d6efda4c | ||
![]() |
b76f149c35 | ||
![]() |
5e33bb1a95 | ||
![]() |
83bf125d93 | ||
![]() |
d23af41bca |
@@ -271,7 +271,10 @@ etcdctl --endpoints=$ENDPOINTS endpoint health
|
||||
|
||||
<img src="https://storage.googleapis.com/etcd/demo/11_etcdctl_snapshot_2016051001.gif" alt="11_etcdctl_snapshot_2016051001"/>
|
||||
|
||||
Snapshot can only be requested from one etcd node, so `--endpoints` flag should contain only one endpoint.
|
||||
|
||||
```
|
||||
ENDPOINTS=$HOST_1:2379
|
||||
etcdctl --endpoints=$ENDPOINTS snapshot save my.db
|
||||
|
||||
Snapshot saved at my.db
|
||||
|
@@ -13,26 +13,34 @@ Background
|
||||
|
||||
Membership reconfiguration has been one of the biggest operational challenges. Let’s review common challenges.
|
||||
|
||||
### 1. New Cluster member overloads Leader
|
||||
A newly joined etcd member starts with no data, thus demanding more updates from leader until it catches up with leader’s logs. Then leader’s network is more likely to be overloaded, blocking or dropping leader heartbeats to followers. In such case, a follower may election-timeout to start a new leader election. That is, a cluster with a new member is more vulnerable to leader election. Both leader election and the subsequent update propagation to the new member are prone to causing periods of cluster unavailability (see *Figure 1*).
|
||||
|
||||

|
||||
|
||||
### 2. Network Partitions scenarios
|
||||
What if network partition happens? It depends on leader partition. If the leader still maintains the active quorum, the cluster would continue to operate (see *Figure 2*).
|
||||
|
||||

|
||||
|
||||
#### 2.1 Leader isolation
|
||||
What if the leader becomes isolated from the rest of the cluster? Leader monitors progress of each follower. When leader loses connectivity from the quorum, it reverts back to follower which will affect the cluster availability (see *Figure 3*).
|
||||
|
||||

|
||||
|
||||
When a new node is added to 3 node cluster, the cluster size becomes 4 and the quorum size becomes 3. What if a new node had joined the cluster, and then network partition happens? It depends on which partition the new member gets located after partition. If the new node happens to be located in the same partition as leader’s, the leader still maintains the active quorum of 3. No leadership election happens, and no cluster availability gets affected (see *Figure 4*).
|
||||
When a new node is added to 3 node cluster, the cluster size becomes 4 and the quorum size becomes 3. What if a new node had joined the cluster, and then network partition happens? It depends on which partition the new member gets located after partition.
|
||||
|
||||
#### 2.2 Cluster Split 3+1
|
||||
If the new node happens to be located in the same partition as leader’s, the leader still maintains the active quorum of 3. No leadership election happens, and no cluster availability gets affected (see *Figure 4*).
|
||||
|
||||

|
||||
|
||||
#### 2.3 Cluster Split 2+2
|
||||
If the cluster is 2-and-2 partitioned, then neither of partition maintains the quorum of 3. In this case, leadership election happens (see *Figure 5*).
|
||||
|
||||

|
||||
|
||||
#### 2.4 Quorum Lost
|
||||
What if network partition happens first, and then a new member gets added? A partitioned 3-node cluster already has one disconnected follower. When a new member is added, the quorum changes from 2 to 3. Now, this cluster has only 2 active nodes out 4, thus losing quorum and starting a new leadership election (see *Figure 6*).
|
||||
|
||||

|
||||
@@ -43,6 +51,7 @@ Adding a new member to a 1-node cluster changes the quorum size to 2, immediatel
|
||||
|
||||

|
||||
|
||||
### 3. Cluster Misconfigurations
|
||||
An even worse case is when an added member is misconfigured. Membership reconfiguration is a two-step process: “etcdctl member add” and starting an etcd server process with the given peer URL. That is, “member add” command is applied regardless of URL, even when the URL value is invalid. If the first step is applied with invalid URLs, the second step cannot even start the new etcd. Once the cluster loses quorum, there is no way to revert the membership change (see *Figure 8*).
|
||||
|
||||

|
||||
|
@@ -388,7 +388,7 @@ func (as *authStore) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse,
|
||||
var hashed []byte
|
||||
var err error
|
||||
|
||||
if !r.Options.NoPassword {
|
||||
if r.Options != nil && !r.Options.NoPassword {
|
||||
hashed, err = bcrypt.GenerateFromPassword([]byte(r.Password), as.bcryptCost)
|
||||
if err != nil {
|
||||
if as.lg != nil {
|
||||
|
@@ -129,8 +129,12 @@ func NewFromURLs(urls []string) (*Client, error) {
|
||||
// Close shuts down the client's etcd connections.
|
||||
func (c *Client) Close() error {
|
||||
c.cancel()
|
||||
c.Watcher.Close()
|
||||
c.Lease.Close()
|
||||
if c.Watcher != nil {
|
||||
c.Watcher.Close()
|
||||
}
|
||||
if c.Lease != nil {
|
||||
c.Lease.Close()
|
||||
}
|
||||
if c.resolverGroup != nil {
|
||||
c.resolverGroup.Close()
|
||||
}
|
||||
|
@@ -156,3 +156,13 @@ func TestIsHaltErr(t *testing.T) {
|
||||
t.Errorf("cancel on context should be Halted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloseCtxClient(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
c := NewCtxClient(ctx)
|
||||
err := c.Close()
|
||||
// Close returns ctx.toErr, a nil error means an open Done channel
|
||||
if err == nil {
|
||||
t.Errorf("failed to Close the client. %v", err)
|
||||
}
|
||||
}
|
||||
|
@@ -170,7 +170,10 @@ func (cfg *Config) setupLogging() error {
|
||||
}
|
||||
|
||||
if !isJournal {
|
||||
copied := logutil.AddOutputPaths(logutil.DefaultZapLoggerConfig, outputPaths, errOutputPaths)
|
||||
copied := logutil.DefaultZapLoggerConfig
|
||||
copied.OutputPaths = outputPaths
|
||||
copied.ErrorOutputPaths = errOutputPaths
|
||||
copied = logutil.MergeOutputPaths(copied)
|
||||
copied.Level = zap.NewAtomicLevelAt(logutil.ConvertToZapLevel(cfg.LogLevel))
|
||||
if cfg.Debug || cfg.LogLevel == "debug" {
|
||||
// enable tracing even when "--debug --log-level info"
|
||||
|
@@ -53,15 +53,12 @@ var DefaultZapLoggerConfig = zap.Config{
|
||||
ErrorOutputPaths: []string{"stderr"},
|
||||
}
|
||||
|
||||
// AddOutputPaths adds output paths to the existing output paths, resolving conflicts.
|
||||
func AddOutputPaths(cfg zap.Config, outputPaths, errorOutputPaths []string) zap.Config {
|
||||
// MergeOutputPaths merges logging output paths, resolving conflicts.
|
||||
func MergeOutputPaths(cfg zap.Config) zap.Config {
|
||||
outputs := make(map[string]struct{})
|
||||
for _, v := range cfg.OutputPaths {
|
||||
outputs[v] = struct{}{}
|
||||
}
|
||||
for _, v := range outputPaths {
|
||||
outputs[v] = struct{}{}
|
||||
}
|
||||
outputSlice := make([]string, 0)
|
||||
if _, ok := outputs["/dev/null"]; ok {
|
||||
// "/dev/null" to discard all
|
||||
@@ -78,9 +75,6 @@ func AddOutputPaths(cfg zap.Config, outputPaths, errorOutputPaths []string) zap.
|
||||
for _, v := range cfg.ErrorOutputPaths {
|
||||
errOutputs[v] = struct{}{}
|
||||
}
|
||||
for _, v := range errorOutputPaths {
|
||||
errOutputs[v] = struct{}{}
|
||||
}
|
||||
errOutputSlice := make([]string, 0)
|
||||
if _, ok := errOutputs["/dev/null"]; ok {
|
||||
// "/dev/null" to discard all
|
||||
|
@@ -265,7 +265,7 @@ func TestLogMaybeAppend(t *testing.T) {
|
||||
t.Fatalf("unexpected error %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(tt.ents, gents) {
|
||||
t.Errorf("%d: appended entries = %v, want %v", i, gents, tt.ents)
|
||||
t.Errorf("#%d: appended entries = %v, want %v", i, gents, tt.ents)
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -426,7 +426,7 @@ func TestUnstableEnts(t *testing.T) {
|
||||
|
||||
ents := raftLog.unstableEntries()
|
||||
if l := len(ents); l > 0 {
|
||||
raftLog.stableTo(ents[l-1].Index, ents[l-i].Term)
|
||||
raftLog.stableTo(ents[l-1].Index, ents[l-1].Term)
|
||||
}
|
||||
if !reflect.DeepEqual(ents, tt.wents) {
|
||||
t.Errorf("#%d: unstableEnts = %+v, want %+v", i, ents, tt.wents)
|
||||
@@ -671,13 +671,13 @@ func TestIsOutOfBounds(t *testing.T) {
|
||||
}()
|
||||
err := l.mustCheckOutOfBounds(tt.lo, tt.hi)
|
||||
if tt.wpanic {
|
||||
t.Errorf("%d: panic = %v, want %v", i, false, true)
|
||||
t.Errorf("#%d: panic = %v, want %v", i, false, true)
|
||||
}
|
||||
if tt.wErrCompacted && err != ErrCompacted {
|
||||
t.Errorf("%d: err = %v, want %v", i, err, ErrCompacted)
|
||||
t.Errorf("#%d: err = %v, want %v", i, err, ErrCompacted)
|
||||
}
|
||||
if !tt.wErrCompacted && err != nil {
|
||||
t.Errorf("%d: unexpected err %v", i, err)
|
||||
t.Errorf("#%d: unexpected err %v", i, err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
@@ -367,7 +367,7 @@ func newRaft(c *Config) *raft {
|
||||
}
|
||||
assertConfStatesEquivalent(r.logger, cs, r.switchToConfig(cfg, prs))
|
||||
|
||||
if !isHardStateEqual(hs, emptyState) {
|
||||
if !IsEmptyHardState(hs) {
|
||||
r.loadState(hs)
|
||||
}
|
||||
if c.Applied > 0 {
|
||||
@@ -1099,7 +1099,7 @@ func stepLeader(r *raft, m pb.Message) error {
|
||||
case ReadOnlyLeaseBased:
|
||||
ri := r.raftLog.committed
|
||||
if m.From == None || m.From == r.id { // from local member
|
||||
r.readStates = append(r.readStates, ReadState{Index: r.raftLog.committed, RequestCtx: m.Entries[0].Data})
|
||||
r.readStates = append(r.readStates, ReadState{Index: ri, RequestCtx: m.Entries[0].Data})
|
||||
} else {
|
||||
r.send(pb.Message{To: m.From, Type: pb.MsgReadIndexResp, Index: ri, Entries: m.Entries})
|
||||
}
|
||||
|
@@ -106,7 +106,7 @@ func TestStorageLastIndex(t *testing.T) {
|
||||
t.Errorf("err = %v, want nil", err)
|
||||
}
|
||||
if last != 5 {
|
||||
t.Errorf("term = %d, want %d", last, 5)
|
||||
t.Errorf("last = %d, want %d", last, 5)
|
||||
}
|
||||
|
||||
s.Append([]pb.Entry{{Index: 6, Term: 5}})
|
||||
@@ -115,7 +115,7 @@ func TestStorageLastIndex(t *testing.T) {
|
||||
t.Errorf("err = %v, want nil", err)
|
||||
}
|
||||
if last != 6 {
|
||||
t.Errorf("last = %d, want %d", last, 5)
|
||||
t.Errorf("last = %d, want %d", last, 6)
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -140,12 +140,23 @@ main() {
|
||||
./release/etcd-${RELEASE_VERSION}-$(go env GOOS)-amd64/etcd --version | grep -q "etcd Version: ${VERSION}" || true
|
||||
./release/etcd-${RELEASE_VERSION}-$(go env GOOS)-amd64/etcdctl version | grep -q "etcdctl version: ${VERSION}" || true
|
||||
|
||||
# Generate SHA256SUM
|
||||
echo -e "Generating sha256sum of release artifacts.\n"
|
||||
ls ./release | grep -E '\.tar.gz$|\.zip$' | xargs shasum -a 256; > ./release/SHA256SUM
|
||||
if [ -s ./release/SHA256SUM ]; then
|
||||
cat ./release/SHA256SUM
|
||||
else
|
||||
echo "sha256sum is not valid. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Upload artifacts.
|
||||
if [ "${NO_UPLOAD}" == 1 ]; then
|
||||
echo "Skipping artifact upload to gs://etcd. --no-upload flat is set."
|
||||
else
|
||||
read -p "Upload etcd ${RELEASE_VERSION} release artifacts to gs://etcd [y/N]? " confirm
|
||||
[[ "${confirm,,}" == "y" ]] || exit 1
|
||||
gsutil -m cp ./release/SHA256SUM gs://etcd/${RELEASE_VERSION}/
|
||||
gsutil -m cp ./release/*.zip gs://etcd/${RELEASE_VERSION}/
|
||||
gsutil -m cp ./release/*.tar.gz gs://etcd/${RELEASE_VERSION}/
|
||||
gsutil -m acl ch -u allUsers:R -r gs://etcd/${RELEASE_VERSION}/
|
||||
|
@@ -37,6 +37,8 @@ func TestV3MetricsInsecure(t *testing.T) {
|
||||
}
|
||||
|
||||
func metricsTest(cx ctlCtx) {
|
||||
cx.t.Skip()
|
||||
|
||||
if err := ctlV3Put(cx, "k", "v", ""); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
}
|
||||
@@ -44,9 +46,6 @@ func metricsTest(cx ctlCtx) {
|
||||
if strings.HasSuffix(ver, "-pre") {
|
||||
ver = strings.Replace(ver, "-pre", "", 1)
|
||||
}
|
||||
if strings.HasSuffix(ver, "-rc.1") {
|
||||
ver = strings.Replace(ver, "-rc.1", "", 1)
|
||||
}
|
||||
|
||||
i := 0
|
||||
for _, test := range []struct {
|
||||
|
@@ -26,7 +26,7 @@ import (
|
||||
var (
|
||||
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
|
||||
MinClusterVersion = "3.0.0"
|
||||
Version = "3.4.0-rc.2"
|
||||
Version = "3.4.0-rc.4"
|
||||
APIVersion = "unknown"
|
||||
|
||||
// Git SHA Value will be set during build
|
||||
|
Reference in New Issue
Block a user