functional: wait election timeout after member add
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>release-3.4
parent
bd235ab8f9
commit
448e0fc481
|
@ -33,6 +33,11 @@ import (
|
||||||
"google.golang.org/grpc/credentials"
|
"google.golang.org/grpc/credentials"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ElectionTimeout returns an election timeout duration.
|
||||||
|
func (m *Member) ElectionTimeout() time.Duration {
|
||||||
|
return time.Duration(m.Etcd.ElectionTimeoutMs) * time.Millisecond
|
||||||
|
}
|
||||||
|
|
||||||
// DialEtcdGRPCServer creates a raw gRPC connection to an etcd member.
|
// DialEtcdGRPCServer creates a raw gRPC connection to an etcd member.
|
||||||
func (m *Member) DialEtcdGRPCServer(opts ...grpc.DialOption) (*grpc.ClientConn, error) {
|
func (m *Member) DialEtcdGRPCServer(opts ...grpc.DialOption) (*grpc.ClientConn, error) {
|
||||||
dialOpts := []grpc.DialOption{
|
dialOpts := []grpc.DialOption{
|
||||||
|
|
|
@ -158,11 +158,13 @@ func (c *fetchSnapshotCaseQuorum) Recover(clus *Cluster) error {
|
||||||
clus.lg.Info(
|
clus.lg.Info(
|
||||||
"restore snapshot and restart from snapshot request START",
|
"restore snapshot and restart from snapshot request START",
|
||||||
zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint),
|
zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint),
|
||||||
|
zap.Strings("initial-cluster", initClus),
|
||||||
)
|
)
|
||||||
err := clus.sendOp(oldlead, rpcpb.Operation_RESTORE_RESTART_FROM_SNAPSHOT)
|
err := clus.sendOp(oldlead, rpcpb.Operation_RESTORE_RESTART_FROM_SNAPSHOT)
|
||||||
clus.lg.Info(
|
clus.lg.Info(
|
||||||
"restore snapshot and restart from snapshot request END",
|
"restore snapshot and restart from snapshot request END",
|
||||||
zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint),
|
zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint),
|
||||||
|
zap.Strings("initial-cluster", initClus),
|
||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -178,7 +180,11 @@ func (c *fetchSnapshotCaseQuorum) Recover(clus *Cluster) error {
|
||||||
// 7. Add another member to establish 2-node cluster.
|
// 7. Add another member to establish 2-node cluster.
|
||||||
// 8. Add another member to establish 3-node cluster.
|
// 8. Add another member to establish 3-node cluster.
|
||||||
// 9. Add more if any.
|
// 9. Add more if any.
|
||||||
|
idxs := make([]int, 0, len(c.injected))
|
||||||
for idx := range c.injected {
|
for idx := range c.injected {
|
||||||
|
idxs = append(idxs, idx)
|
||||||
|
}
|
||||||
|
for i, idx := range idxs {
|
||||||
clus.lg.Info(
|
clus.lg.Info(
|
||||||
"member add request START",
|
"member add request START",
|
||||||
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
||||||
|
@ -197,10 +203,6 @@ func (c *fetchSnapshotCaseQuorum) Recover(clus *Cluster) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait until membership reconfiguration entry gets applied
|
|
||||||
// TODO: test concurrent member add
|
|
||||||
time.Sleep(3 * time.Second)
|
|
||||||
|
|
||||||
// start the added(new) member with fresh data
|
// start the added(new) member with fresh data
|
||||||
clus.Members[idx].EtcdOnSnapshotRestore = clus.Members[idx].Etcd
|
clus.Members[idx].EtcdOnSnapshotRestore = clus.Members[idx].Etcd
|
||||||
clus.Members[idx].EtcdOnSnapshotRestore.InitialClusterState = "existing"
|
clus.Members[idx].EtcdOnSnapshotRestore.InitialClusterState = "existing"
|
||||||
|
@ -212,18 +214,38 @@ func (c *fetchSnapshotCaseQuorum) Recover(clus *Cluster) error {
|
||||||
clus.lg.Info(
|
clus.lg.Info(
|
||||||
"restart from snapshot request START",
|
"restart from snapshot request START",
|
||||||
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
||||||
|
zap.Strings("initial-cluster", initClus),
|
||||||
)
|
)
|
||||||
err = clus.sendOp(idx, rpcpb.Operation_RESTART_FROM_SNAPSHOT)
|
err = clus.sendOp(idx, rpcpb.Operation_RESTART_FROM_SNAPSHOT)
|
||||||
clus.lg.Info(
|
clus.lg.Info(
|
||||||
"restart from snapshot request END",
|
"restart from snapshot request END",
|
||||||
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint),
|
||||||
|
zap.Strings("initial-cluster", initClus),
|
||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
if i != len(c.injected)-1 {
|
||||||
|
// wait until membership reconfiguration entry gets applied
|
||||||
|
// TODO: test concurrent member add
|
||||||
|
dur := 5 * clus.Members[idx].ElectionTimeout()
|
||||||
|
clus.lg.Info(
|
||||||
|
"waiting after restart from snapshot request",
|
||||||
|
zap.Int("i", i),
|
||||||
|
zap.Int("idx", idx),
|
||||||
|
zap.Duration("sleep", dur),
|
||||||
|
)
|
||||||
|
time.Sleep(dur)
|
||||||
|
} else {
|
||||||
|
clus.lg.Info(
|
||||||
|
"restart from snapshot request ALL END",
|
||||||
|
zap.Int("i", i),
|
||||||
|
zap.Int("idx", idx),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue