Merge pull request #7008 from heyitsanthony/fix-dns

retry on resolution failure for advertised peer DNS check
release-3.1
Anthony Romano 2016-12-27 10:03:01 -08:00 committed by GitHub
commit 17873f7be8
5 changed files with 80 additions and 45 deletions

View File

@ -21,6 +21,8 @@ import (
"strings"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/transport"
"github.com/coreos/etcd/pkg/types"
@ -62,7 +64,10 @@ type ServerConfig struct {
// VerifyBootstrap sanity-checks the initial config for bootstrap case
// and returns an error for things that should never happen.
func (c *ServerConfig) VerifyBootstrap() error {
if err := c.verifyLocalMember(true); err != nil {
if err := c.hasLocalMember(); err != nil {
return err
}
if err := c.advertiseMatchesCluster(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
@ -77,10 +82,9 @@ func (c *ServerConfig) VerifyBootstrap() error {
// VerifyJoinExisting sanity-checks the initial config for join existing cluster
// case and returns an error for things that should never happen.
func (c *ServerConfig) VerifyJoinExisting() error {
// no need for strict checking since the member have announced its
// peer urls to the cluster before starting and do not have to set
// it in the configuration again.
if err := c.verifyLocalMember(false); err != nil {
// The member has announced its peer urls to the cluster before starting; no need to
// set the configuration again.
if err := c.hasLocalMember(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
@ -92,25 +96,24 @@ func (c *ServerConfig) VerifyJoinExisting() error {
return nil
}
// verifyLocalMember verifies the configured member is in configured
// cluster. If strict is set, it also verifies the configured member
// has the same peer urls as configured advertised peer urls.
func (c *ServerConfig) verifyLocalMember(strict bool) error {
urls := c.InitialPeerURLsMap[c.Name]
// Make sure the cluster at least contains the local server.
if urls == nil {
// hasLocalMember checks that the cluster at least contains the local server.
func (c *ServerConfig) hasLocalMember() error {
if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
}
return nil
}
if strict {
// Advertised peer URLs must match those in the cluster peer list
apurls := c.PeerURLs.StringSlice()
sort.Strings(apurls)
urls.Sort()
if !netutil.URLStringsEqual(apurls, urls.StringSlice()) {
umap := map[string]types.URLs{c.Name: c.PeerURLs}
return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
}
// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
func (c *ServerConfig) advertiseMatchesCluster() error {
urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
urls.Sort()
sort.Strings(apurls)
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
defer cancel()
if !netutil.URLStringsEqual(ctx, apurls, urls.StringSlice()) {
umap := map[string]types.URLs{c.Name: c.PeerURLs}
return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
}
return nil
}

View File

@ -137,7 +137,9 @@ func TestConfigVerifyLocalMember(t *testing.T) {
if tt.apurls != nil {
cfg.PeerURLs = mustNewURLs(t, tt.apurls)
}
err = cfg.verifyLocalMember(tt.strict)
if err = cfg.hasLocalMember(); err == nil && tt.strict {
err = cfg.advertiseMatchesCluster()
}
if (err == nil) && tt.shouldError {
t.Errorf("#%d: Got no error where one was expected", i)
}

View File

@ -24,6 +24,9 @@ import (
"sort"
"strings"
"sync"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/netutil"
@ -484,8 +487,10 @@ func ValidateClusterAndAssignIDs(local *RaftCluster, existing *RaftCluster) erro
sort.Sort(MembersByPeerURLs(ems))
sort.Sort(MembersByPeerURLs(lms))
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
defer cancel()
for i := range ems {
if !netutil.URLStringsEqual(ems[i].PeerURLs, lms[i].PeerURLs) {
if !netutil.URLStringsEqual(ctx, ems[i].PeerURLs, lms[i].PeerURLs) {
return fmt.Errorf("unmatched member while checking PeerURLs")
}
lms[i].ID = ems[i].ID

View File

@ -20,6 +20,9 @@ import (
"net/url"
"reflect"
"sort"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/pkg/capnslog"
@ -32,10 +35,12 @@ var (
resolveTCPAddr = net.ResolveTCPAddr
)
const retryInterval = time.Second
// resolveTCPAddrs is a convenience wrapper for net.ResolveTCPAddr.
// resolveTCPAddrs return a new set of url.URLs, in which all DNS hostnames
// are resolved.
func resolveTCPAddrs(urls [][]url.URL) ([][]url.URL, error) {
func resolveTCPAddrs(ctx context.Context, urls [][]url.URL) ([][]url.URL, error) {
newurls := make([][]url.URL, 0)
for _, us := range urls {
nus := make([]url.URL, len(us))
@ -47,37 +52,52 @@ func resolveTCPAddrs(urls [][]url.URL) ([][]url.URL, error) {
nus[i] = *nu
}
for i, u := range nus {
host, _, err := net.SplitHostPort(u.Host)
h, err := resolveURL(ctx, u)
if err != nil {
plog.Errorf("could not parse url %s during tcp resolving", u.Host)
return nil, err
}
if host == "localhost" {
continue
if h != "" {
nus[i].Host = h
}
if net.ParseIP(host) != nil {
continue
}
tcpAddr, err := resolveTCPAddr("tcp", u.Host)
if err != nil {
plog.Errorf("could not resolve host %s", u.Host)
return nil, err
}
plog.Infof("resolving %s to %s", u.Host, tcpAddr.String())
nus[i].Host = tcpAddr.String()
}
newurls = append(newurls, nus)
}
return newurls, nil
}
func resolveURL(ctx context.Context, u url.URL) (string, error) {
for ctx.Err() == nil {
host, _, err := net.SplitHostPort(u.Host)
if err != nil {
plog.Errorf("could not parse url %s during tcp resolving", u.Host)
return "", err
}
if host == "localhost" || net.ParseIP(host) != nil {
return "", nil
}
tcpAddr, err := resolveTCPAddr("tcp", u.Host)
if err == nil {
plog.Infof("resolving %s to %s", u.Host, tcpAddr.String())
return tcpAddr.String(), nil
}
plog.Warningf("failed resolving host %s (%v); retrying in %v", u.Host, err, retryInterval)
select {
case <-ctx.Done():
plog.Errorf("could not resolve host %s", u.Host)
return "", err
case <-time.After(retryInterval):
}
}
return "", ctx.Err()
}
// urlsEqual checks equality of url.URLS between two arrays.
// This check pass even if an URL is in hostname and opposite is in IP address.
func urlsEqual(a []url.URL, b []url.URL) bool {
func urlsEqual(ctx context.Context, a []url.URL, b []url.URL) bool {
if len(a) != len(b) {
return false
}
urls, err := resolveTCPAddrs([][]url.URL{a, b})
urls, err := resolveTCPAddrs(ctx, [][]url.URL{a, b})
if err != nil {
return false
}
@ -93,7 +113,7 @@ func urlsEqual(a []url.URL, b []url.URL) bool {
return true
}
func URLStringsEqual(a []string, b []string) bool {
func URLStringsEqual(ctx context.Context, a []string, b []string) bool {
if len(a) != len(b) {
return false
}
@ -114,7 +134,7 @@ func URLStringsEqual(a []string, b []string) bool {
urlsB = append(urlsB, *u)
}
return urlsEqual(urlsA, urlsB)
return urlsEqual(ctx, urlsA, urlsB)
}
func IsNetworkTimeoutError(err error) bool {

View File

@ -21,6 +21,9 @@ import (
"reflect"
"strconv"
"testing"
"time"
"golang.org/x/net/context"
)
func TestResolveTCPAddrs(t *testing.T) {
@ -124,7 +127,9 @@ func TestResolveTCPAddrs(t *testing.T) {
}
return &net.TCPAddr{IP: net.ParseIP(tt.hostMap[host]), Port: i, Zone: ""}, nil
}
urls, err := resolveTCPAddrs(tt.urls)
ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
urls, err := resolveTCPAddrs(ctx, tt.urls)
cancel()
if tt.hasError {
if err == nil {
t.Errorf("expected error")
@ -244,14 +249,14 @@ func TestURLsEqual(t *testing.T) {
}
for _, test := range tests {
result := urlsEqual(test.a, test.b)
result := urlsEqual(context.TODO(), test.a, test.b)
if result != test.expect {
t.Errorf("a:%v b:%v, expected %v but %v", test.a, test.b, test.expect, result)
}
}
}
func TestURLStringsEqual(t *testing.T) {
result := URLStringsEqual([]string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"})
result := URLStringsEqual(context.TODO(), []string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"})
if !result {
t.Errorf("unexpected result %v", result)
}