// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package integration

import (
	"context"
	"fmt"
	"math/rand"
	"reflect"
	"sort"
	"testing"
	"time"

	"go.etcd.io/etcd/clientv3"
	"go.etcd.io/etcd/etcdserver/api/v3rpc"
	"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
	"go.etcd.io/etcd/integration"
	mvccpb "go.etcd.io/etcd/mvcc/mvccpb"
	"go.etcd.io/etcd/pkg/testutil"

	"google.golang.org/grpc/metadata"
)

type watcherTest func(*testing.T, *watchctx)

type watchctx struct {
	clus          *integration.ClusterV3
	w             clientv3.Watcher
	kv            clientv3.KV
	wclientMember int
	kvMember      int
	ch            clientv3.WatchChan
}

func runWatchTest(t *testing.T, f watcherTest) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
	defer clus.Terminate(t)

	wclientMember := rand.Intn(3)
	w := clus.Client(wclientMember).Watcher
	// select a different client for KV operations so puts succeed if
	// a test knocks out the watcher client.
	kvMember := rand.Intn(3)
	for kvMember == wclientMember {
		kvMember = rand.Intn(3)
	}
	kv := clus.Client(kvMember).KV

	wctx := &watchctx{clus, w, kv, wclientMember, kvMember, nil}
	f(t, wctx)
}

// TestWatchMultiWatcher modifies multiple keys and observes the changes.
func TestWatchMultiWatcher(t *testing.T) {
	runWatchTest(t, testWatchMultiWatcher)
}

func testWatchMultiWatcher(t *testing.T, wctx *watchctx) {
	numKeyUpdates := 4
	keys := []string{"foo", "bar", "baz"}

	donec := make(chan struct{})
	readyc := make(chan struct{})
	for _, k := range keys {
		// key watcher
		go func(key string) {
			ch := wctx.w.Watch(context.TODO(), key)
			if ch == nil {
				t.Fatalf("expected watcher channel, got nil")
			}
			readyc <- struct{}{}
			for i := 0; i < numKeyUpdates; i++ {
				resp, ok := <-ch
				if !ok {
					t.Fatalf("watcher unexpectedly closed")
				}
				v := fmt.Sprintf("%s-%d", key, i)
				gotv := string(resp.Events[0].Kv.Value)
				if gotv != v {
					t.Errorf("#%d: got %s, wanted %s", i, gotv, v)
				}
			}
			donec <- struct{}{}
		}(k)
	}
	// prefix watcher on "b" (bar and baz)
	go func() {
		prefixc := wctx.w.Watch(context.TODO(), "b", clientv3.WithPrefix())
		if prefixc == nil {
			t.Fatalf("expected watcher channel, got nil")
		}
		readyc <- struct{}{}
		evs := []*clientv3.Event{}
		for i := 0; i < numKeyUpdates*2; i++ {
			resp, ok := <-prefixc
			if !ok {
				t.Fatalf("watcher unexpectedly closed")
			}
			evs = append(evs, resp.Events...)
		}

		// check response
		expected := []string{}
		bkeys := []string{"bar", "baz"}
		for _, k := range bkeys {
			for i := 0; i < numKeyUpdates; i++ {
				expected = append(expected, fmt.Sprintf("%s-%d", k, i))
			}
		}
		got := []string{}
		for _, ev := range evs {
			got = append(got, string(ev.Kv.Value))
		}
		sort.Strings(got)
		if !reflect.DeepEqual(expected, got) {
			t.Errorf("got %v, expected %v", got, expected)
		}

		// ensure no extra data
		select {
		case resp, ok := <-prefixc:
			if !ok {
				t.Fatalf("watcher unexpectedly closed")
			}
			t.Fatalf("unexpected event %+v", resp)
		case <-time.After(time.Second):
		}
		donec <- struct{}{}
	}()

	// wait for watcher bring up
	for i := 0; i < len(keys)+1; i++ {
		<-readyc
	}
	// generate events
	ctx := context.TODO()
	for i := 0; i < numKeyUpdates; i++ {
		for _, k := range keys {
			v := fmt.Sprintf("%s-%d", k, i)
			if _, err := wctx.kv.Put(ctx, k, v); err != nil {
				t.Fatal(err)
			}
		}
	}
	// wait for watcher shutdown
	for i := 0; i < len(keys)+1; i++ {
		<-donec
	}
}

// TestWatchRange tests watcher creates ranges
func TestWatchRange(t *testing.T) {
	runWatchTest(t, testWatchRange)
}

func testWatchRange(t *testing.T, wctx *watchctx) {
	if wctx.ch = wctx.w.Watch(context.TODO(), "a", clientv3.WithRange("c")); wctx.ch == nil {
		t.Fatalf("expected non-nil channel")
	}
	putAndWatch(t, wctx, "a", "a")
	putAndWatch(t, wctx, "b", "b")
	putAndWatch(t, wctx, "bar", "bar")
}

// TestWatchReconnRequest tests the send failure path when requesting a watcher.
func TestWatchReconnRequest(t *testing.T) {
	runWatchTest(t, testWatchReconnRequest)
}

func testWatchReconnRequest(t *testing.T, wctx *watchctx) {
	donec, stopc := make(chan struct{}), make(chan struct{}, 1)
	go func() {
		timer := time.After(2 * time.Second)
		defer close(donec)
		// take down watcher connection
		for {
			wctx.clus.Members[wctx.wclientMember].DropConnections()
			select {
			case <-timer:
				// spinning on close may live lock reconnection
				return
			case <-stopc:
				return
			default:
			}
		}
	}()
	// should reconnect when requesting watch
	if wctx.ch = wctx.w.Watch(context.TODO(), "a"); wctx.ch == nil {
		t.Fatalf("expected non-nil channel")
	}

	// wait for disconnections to stop
	stopc <- struct{}{}
	<-donec

	// spinning on dropping connections may trigger a leader election
	// due to resource starvation; l-read to ensure the cluster is stable
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	if _, err := wctx.kv.Get(ctx, "_"); err != nil {
		t.Fatal(err)
	}
	cancel()

	// ensure watcher works
	putAndWatch(t, wctx, "a", "a")
}

// TestWatchReconnInit tests watcher resumes correctly if connection lost
// before any data was sent.
func TestWatchReconnInit(t *testing.T) {
	runWatchTest(t, testWatchReconnInit)
}

func testWatchReconnInit(t *testing.T, wctx *watchctx) {
	if wctx.ch = wctx.w.Watch(context.TODO(), "a"); wctx.ch == nil {
		t.Fatalf("expected non-nil channel")
	}
	wctx.clus.Members[wctx.wclientMember].DropConnections()
	// watcher should recover
	putAndWatch(t, wctx, "a", "a")
}

// TestWatchReconnRunning tests watcher resumes correctly if connection lost
// after data was sent.
func TestWatchReconnRunning(t *testing.T) {
	runWatchTest(t, testWatchReconnRunning)
}

func testWatchReconnRunning(t *testing.T, wctx *watchctx) {
	if wctx.ch = wctx.w.Watch(context.TODO(), "a"); wctx.ch == nil {
		t.Fatalf("expected non-nil channel")
	}
	putAndWatch(t, wctx, "a", "a")
	// take down watcher connection
	wctx.clus.Members[wctx.wclientMember].DropConnections()
	// watcher should recover
	putAndWatch(t, wctx, "a", "b")
}

// TestWatchCancelImmediate ensures a closed channel is returned
// if the context is cancelled.
func TestWatchCancelImmediate(t *testing.T) {
	runWatchTest(t, testWatchCancelImmediate)
}

func testWatchCancelImmediate(t *testing.T, wctx *watchctx) {
	ctx, cancel := context.WithCancel(context.Background())
	cancel()
	wch := wctx.w.Watch(ctx, "a")
	select {
	case wresp, ok := <-wch:
		if ok {
			t.Fatalf("read wch got %v; expected closed channel", wresp)
		}
	default:
		t.Fatalf("closed watcher channel should not block")
	}
}

// TestWatchCancelInit tests watcher closes correctly after no events.
func TestWatchCancelInit(t *testing.T) {
	runWatchTest(t, testWatchCancelInit)
}

func testWatchCancelInit(t *testing.T, wctx *watchctx) {
	ctx, cancel := context.WithCancel(context.Background())
	if wctx.ch = wctx.w.Watch(ctx, "a"); wctx.ch == nil {
		t.Fatalf("expected non-nil watcher channel")
	}
	cancel()
	select {
	case <-time.After(time.Second):
		t.Fatalf("took too long to cancel")
	case _, ok := <-wctx.ch:
		if ok {
			t.Fatalf("expected watcher channel to close")
		}
	}
}

// TestWatchCancelRunning tests watcher closes correctly after events.
func TestWatchCancelRunning(t *testing.T) {
	runWatchTest(t, testWatchCancelRunning)
}

func testWatchCancelRunning(t *testing.T, wctx *watchctx) {
	ctx, cancel := context.WithCancel(context.Background())
	if wctx.ch = wctx.w.Watch(ctx, "a"); wctx.ch == nil {
		t.Fatalf("expected non-nil watcher channel")
	}
	if _, err := wctx.kv.Put(ctx, "a", "a"); err != nil {
		t.Fatal(err)
	}
	cancel()
	select {
	case <-time.After(time.Second):
		t.Fatalf("took too long to cancel")
	case _, ok := <-wctx.ch:
		if !ok {
			// closed before getting put; OK
			break
		}
		// got the PUT; should close next
		select {
		case <-time.After(time.Second):
			t.Fatalf("took too long to close")
		case v, ok2 := <-wctx.ch:
			if ok2 {
				t.Fatalf("expected watcher channel to close, got %v", v)
			}
		}
	}
}

func putAndWatch(t *testing.T, wctx *watchctx, key, val string) {
	if _, err := wctx.kv.Put(context.TODO(), key, val); err != nil {
		t.Fatal(err)
	}
	select {
	case <-time.After(5 * time.Second):
		t.Fatalf("watch timed out")
	case v, ok := <-wctx.ch:
		if !ok {
			t.Fatalf("unexpected watch close")
		}
		if string(v.Events[0].Kv.Value) != val {
			t.Fatalf("bad value got %v, wanted %v", v.Events[0].Kv.Value, val)
		}
	}
}

func TestWatchResumeInitRev(t *testing.T) {
	defer testutil.AfterTest(t)
	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)

	cli := clus.Client(0)
	if _, err := cli.Put(context.TODO(), "b", "2"); err != nil {
		t.Fatal(err)
	}
	if _, err := cli.Put(context.TODO(), "a", "3"); err != nil {
		t.Fatal(err)
	}
	// if resume is broken, it'll pick up this key first instead of a=3
	if _, err := cli.Put(context.TODO(), "a", "4"); err != nil {
		t.Fatal(err)
	}

	wch := clus.Client(0).Watch(context.Background(), "a", clientv3.WithRev(1), clientv3.WithCreatedNotify())
	if resp, ok := <-wch; !ok || resp.Header.Revision != 4 {
		t.Fatalf("got (%v, %v), expected create notification rev=4", resp, ok)
	}
	// pause wch
	clus.Members[0].DropConnections()
	clus.Members[0].PauseConnections()

	select {
	case resp, ok := <-wch:
		t.Skipf("wch should block, got (%+v, %v); drop not fast enough", resp, ok)
	case <-time.After(100 * time.Millisecond):
	}

	// resume wch
	clus.Members[0].UnpauseConnections()

	select {
	case resp, ok := <-wch:
		if !ok {
			t.Fatal("unexpected watch close")
		}
		if len(resp.Events) == 0 {
			t.Fatal("expected event on watch")
		}
		if string(resp.Events[0].Kv.Value) != "3" {
			t.Fatalf("expected value=3, got event %+v", resp.Events[0])
		}
	case <-time.After(5 * time.Second):
		t.Fatal("watch timed out")
	}
}

// TestWatchResumeCompacted checks that the watcher gracefully closes in case
// that it tries to resume to a revision that's been compacted out of the store.
// Since the watcher's server restarts with stale data, the watcher will receive
// either a compaction error or all keys by staying in sync before the compaction
// is finally applied.
func TestWatchResumeCompacted(t *testing.T) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
	defer clus.Terminate(t)

	// create a waiting watcher at rev 1
	w := clus.Client(0)
	wch := w.Watch(context.Background(), "foo", clientv3.WithRev(1))
	select {
	case w := <-wch:
		t.Errorf("unexpected message from wch %v", w)
	default:
	}
	clus.Members[0].Stop(t)

	ticker := time.After(time.Second * 10)
	for clus.WaitLeader(t) <= 0 {
		select {
		case <-ticker:
			t.Fatalf("failed to wait for new leader")
		default:
			time.Sleep(10 * time.Millisecond)
		}
	}

	// put some data and compact away
	numPuts := 5
	kv := clus.Client(1)
	for i := 0; i < numPuts; i++ {
		if _, err := kv.Put(context.TODO(), "foo", "bar"); err != nil {
			t.Fatal(err)
		}
	}
	if _, err := kv.Compact(context.TODO(), 3); err != nil {
		t.Fatal(err)
	}

	clus.Members[0].Restart(t)

	// since watch's server isn't guaranteed to be synced with the cluster when
	// the watch resumes, there is a window where the watch can stay synced and
	// read off all events; if the watcher misses the window, it will go out of
	// sync and get a compaction error.
	wRev := int64(2)
	for int(wRev) <= numPuts+1 {
		var wresp clientv3.WatchResponse
		var ok bool
		select {
		case wresp, ok = <-wch:
			if !ok {
				t.Fatalf("expected wresp, but got closed channel")
			}
		case <-time.After(5 * time.Second):
			t.Fatalf("compacted watch timed out")
		}
		for _, ev := range wresp.Events {
			if ev.Kv.ModRevision != wRev {
				t.Fatalf("expected modRev %v, got %+v", wRev, ev)
			}
			wRev++
		}
		if wresp.Err() == nil {
			continue
		}
		if wresp.Err() != rpctypes.ErrCompacted {
			t.Fatalf("wresp.Err() expected %v, got %+v", rpctypes.ErrCompacted, wresp.Err())
		}
		break
	}
	if int(wRev) > numPuts+1 {
		// got data faster than the compaction
		return
	}
	// received compaction error; ensure the channel closes
	select {
	case wresp, ok := <-wch:
		if ok {
			t.Fatalf("expected closed channel, but got %v", wresp)
		}
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for channel close")
	}
}

// TestWatchCompactRevision ensures the CompactRevision error is given on a
// compaction event ahead of a watcher.
func TestWatchCompactRevision(t *testing.T) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)

	// set some keys
	kv := clus.RandClient()
	for i := 0; i < 5; i++ {
		if _, err := kv.Put(context.TODO(), "foo", "bar"); err != nil {
			t.Fatal(err)
		}
	}

	w := clus.RandClient()

	if _, err := kv.Compact(context.TODO(), 4); err != nil {
		t.Fatal(err)
	}
	wch := w.Watch(context.Background(), "foo", clientv3.WithRev(2))

	// get compacted error message
	wresp, ok := <-wch
	if !ok {
		t.Fatalf("expected wresp, but got closed channel")
	}
	if wresp.Err() != rpctypes.ErrCompacted {
		t.Fatalf("wresp.Err() expected %v, but got %v", rpctypes.ErrCompacted, wresp.Err())
	}
	if !wresp.Canceled {
		t.Fatalf("wresp.Canceled expected true, got %+v", wresp)
	}

	// ensure the channel is closed
	if wresp, ok = <-wch; ok {
		t.Fatalf("expected closed channel, but got %v", wresp)
	}
}

func TestWatchWithProgressNotify(t *testing.T)        { testWatchWithProgressNotify(t, true) }
func TestWatchWithProgressNotifyNoEvent(t *testing.T) { testWatchWithProgressNotify(t, false) }

func testWatchWithProgressNotify(t *testing.T, watchOnPut bool) {
	defer testutil.AfterTest(t)

	// accelerate report interval so test terminates quickly
	oldpi := v3rpc.GetProgressReportInterval()
	// using atomics to avoid race warnings
	v3rpc.SetProgressReportInterval(3 * time.Second)
	pi := 3 * time.Second
	defer func() { v3rpc.SetProgressReportInterval(oldpi) }()

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
	defer clus.Terminate(t)

	wc := clus.RandClient()

	opts := []clientv3.OpOption{clientv3.WithProgressNotify()}
	if watchOnPut {
		opts = append(opts, clientv3.WithPrefix())
	}
	rch := wc.Watch(context.Background(), "foo", opts...)

	select {
	case resp := <-rch: // wait for notification
		if len(resp.Events) != 0 {
			t.Fatalf("resp.Events expected none, got %+v", resp.Events)
		}
	case <-time.After(2 * pi):
		t.Fatalf("watch response expected in %v, but timed out", pi)
	}

	kvc := clus.RandClient()
	if _, err := kvc.Put(context.TODO(), "foox", "bar"); err != nil {
		t.Fatal(err)
	}

	select {
	case resp := <-rch:
		if resp.Header.Revision != 2 {
			t.Fatalf("resp.Header.Revision expected 2, got %d", resp.Header.Revision)
		}
		if watchOnPut { // wait for put if watch on the put key
			ev := []*clientv3.Event{{Type: clientv3.EventTypePut,
				Kv: &mvccpb.KeyValue{Key: []byte("foox"), Value: []byte("bar"), CreateRevision: 2, ModRevision: 2, Version: 1}}}
			if !reflect.DeepEqual(ev, resp.Events) {
				t.Fatalf("expected %+v, got %+v", ev, resp.Events)
			}
		} else if len(resp.Events) != 0 { // wait for notification otherwise
			t.Fatalf("expected no events, but got %+v", resp.Events)
		}
	case <-time.After(time.Duration(1.5 * float64(pi))):
		t.Fatalf("watch response expected in %v, but timed out", pi)
	}
}

func TestWatchRequestProgress(t *testing.T) {
	testCases := []struct {
		name     string
		watchers []string
	}{
		{"0-watcher", []string{}},
		{"1-watcher", []string{"/"}},
		{"2-watcher", []string{"/", "/"}},
	}

	for _, c := range testCases {
		t.Run(c.name, func(t *testing.T) {
			defer testutil.AfterTest(t)

			watchTimeout := 3 * time.Second

			clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
			defer clus.Terminate(t)

			wc := clus.RandClient()

			var watchChans []clientv3.WatchChan

			for _, prefix := range c.watchers {
				watchChans = append(watchChans, wc.Watch(context.Background(), prefix, clientv3.WithPrefix()))
			}

			_, err := wc.Put(context.Background(), "/a", "1")
			if err != nil {
				t.Fatal(err)
			}

			for _, rch := range watchChans {
				select {
				case resp := <-rch: // wait for notification
					if len(resp.Events) != 1 {
						t.Fatalf("resp.Events expected 1, got %d", len(resp.Events))
					}
				case <-time.After(watchTimeout):
					t.Fatalf("watch response expected in %v, but timed out", watchTimeout)
				}
			}

			// put a value not being watched to increment revision
			_, err = wc.Put(context.Background(), "x", "1")
			if err != nil {
				t.Fatal(err)
			}

			err = wc.RequestProgress(context.Background())
			if err != nil {
				t.Fatal(err)
			}

			// verify all watch channels receive a progress notify
			for _, rch := range watchChans {
				select {
				case resp := <-rch:
					if !resp.IsProgressNotify() {
						t.Fatalf("expected resp.IsProgressNotify() == true")
					}
					if resp.Header.Revision != 3 {
						t.Fatalf("resp.Header.Revision expected 3, got %d", resp.Header.Revision)
					}
				case <-time.After(watchTimeout):
					t.Fatalf("progress response expected in %v, but timed out", watchTimeout)
				}
			}
		})
	}
}

func TestWatchEventType(t *testing.T) {
	cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer cluster.Terminate(t)

	client := cluster.RandClient()
	ctx := context.Background()
	watchChan := client.Watch(ctx, "/", clientv3.WithPrefix())

	if _, err := client.Put(ctx, "/toDelete", "foo"); err != nil {
		t.Fatalf("Put failed: %v", err)
	}
	if _, err := client.Put(ctx, "/toDelete", "bar"); err != nil {
		t.Fatalf("Put failed: %v", err)
	}
	if _, err := client.Delete(ctx, "/toDelete"); err != nil {
		t.Fatalf("Delete failed: %v", err)
	}
	lcr, err := client.Lease.Grant(ctx, 1)
	if err != nil {
		t.Fatalf("lease create failed: %v", err)
	}
	if _, err := client.Put(ctx, "/toExpire", "foo", clientv3.WithLease(lcr.ID)); err != nil {
		t.Fatalf("Put failed: %v", err)
	}

	tests := []struct {
		et       mvccpb.Event_EventType
		isCreate bool
		isModify bool
	}{{
		et:       clientv3.EventTypePut,
		isCreate: true,
	}, {
		et:       clientv3.EventTypePut,
		isModify: true,
	}, {
		et: clientv3.EventTypeDelete,
	}, {
		et:       clientv3.EventTypePut,
		isCreate: true,
	}, {
		et: clientv3.EventTypeDelete,
	}}

	var res []*clientv3.Event

	for {
		select {
		case wres := <-watchChan:
			res = append(res, wres.Events...)
		case <-time.After(10 * time.Second):
			t.Fatalf("Should receive %d events and then break out loop", len(tests))
		}
		if len(res) == len(tests) {
			break
		}
	}

	for i, tt := range tests {
		ev := res[i]
		if tt.et != ev.Type {
			t.Errorf("#%d: event type want=%s, get=%s", i, tt.et, ev.Type)
		}
		if tt.isCreate && !ev.IsCreate() {
			t.Errorf("#%d: event should be CreateEvent", i)
		}
		if tt.isModify && !ev.IsModify() {
			t.Errorf("#%d: event should be ModifyEvent", i)
		}
	}
}

func TestWatchErrConnClosed(t *testing.T) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)

	cli := clus.Client(0)

	donec := make(chan struct{})
	go func() {
		defer close(donec)
		ch := cli.Watch(context.TODO(), "foo")

		if wr := <-ch; !isCanceled(wr.Err()) {
			t.Fatalf("expected context canceled, got %v", wr.Err())
		}
	}()

	if err := cli.ActiveConnection().Close(); err != nil {
		t.Fatal(err)
	}
	clus.TakeClient(0)

	select {
	case <-time.After(integration.RequestWaitTimeout):
		t.Fatal("wc.Watch took too long")
	case <-donec:
	}
}

func TestWatchAfterClose(t *testing.T) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)

	cli := clus.Client(0)
	clus.TakeClient(0)
	if err := cli.Close(); err != nil {
		t.Fatal(err)
	}

	donec := make(chan struct{})
	go func() {
		cli.Watch(context.TODO(), "foo")
		if err := cli.Close(); err != nil && err != context.Canceled {
			t.Fatalf("expected %v, got %v", context.Canceled, err)
		}
		close(donec)
	}()
	select {
	case <-time.After(integration.RequestWaitTimeout):
		t.Fatal("wc.Watch took too long")
	case <-donec:
	}
}

// TestWatchWithRequireLeader checks the watch channel closes when no leader.
func TestWatchWithRequireLeader(t *testing.T) {
	defer testutil.AfterTest(t)

	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
	defer clus.Terminate(t)

	// Put a key for the non-require leader watch to read as an event.
	// The watchers will be on member[0]; put key through member[0] to
	// ensure that it receives the update so watching after killing quorum
	// is guaranteed to have the key.
	liveClient := clus.Client(0)
	if _, err := liveClient.Put(context.TODO(), "foo", "bar"); err != nil {
		t.Fatal(err)
	}

	clus.Members[1].Stop(t)
	clus.Members[2].Stop(t)
	clus.Client(1).Close()
	clus.Client(2).Close()
	clus.TakeClient(1)
	clus.TakeClient(2)

	// wait for election timeout, then member[0] will not have a leader.
	tickDuration := 10 * time.Millisecond
	// existing streams need three elections before they're torn down; wait until 5 elections cycle
	// so proxy tests receive a leader loss event on its existing watch before creating a new watch.
	time.Sleep(time.Duration(5*clus.Members[0].ElectionTicks) * tickDuration)

	chLeader := liveClient.Watch(clientv3.WithRequireLeader(context.TODO()), "foo", clientv3.WithRev(1))
	chNoLeader := liveClient.Watch(context.TODO(), "foo", clientv3.WithRev(1))

	select {
	case resp, ok := <-chLeader:
		if !ok {
			t.Fatalf("expected %v watch channel, got closed channel", rpctypes.ErrNoLeader)
		}
		if resp.Err() != rpctypes.ErrNoLeader {
			t.Fatalf("expected %v watch response error, got %+v", rpctypes.ErrNoLeader, resp)
		}
	case <-time.After(integration.RequestWaitTimeout):
		t.Fatal("watch without leader took too long to close")
	}

	select {
	case resp, ok := <-chLeader:
		if ok {
			t.Fatalf("expected closed channel, got response %v", resp)
		}
	case <-time.After(integration.RequestWaitTimeout):
		t.Fatal("waited too long for channel to close")
	}

	if _, ok := <-chNoLeader; !ok {
		t.Fatalf("expected response, got closed channel")
	}
}

// TestWatchWithFilter checks that watch filtering works.
func TestWatchWithFilter(t *testing.T) {
	cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer cluster.Terminate(t)

	client := cluster.RandClient()
	ctx := context.Background()

	wcNoPut := client.Watch(ctx, "a", clientv3.WithFilterPut())
	wcNoDel := client.Watch(ctx, "a", clientv3.WithFilterDelete())

	if _, err := client.Put(ctx, "a", "abc"); err != nil {
		t.Fatal(err)
	}
	if _, err := client.Delete(ctx, "a"); err != nil {
		t.Fatal(err)
	}

	npResp := <-wcNoPut
	if len(npResp.Events) != 1 || npResp.Events[0].Type != clientv3.EventTypeDelete {
		t.Fatalf("expected delete event, got %+v", npResp.Events)
	}
	ndResp := <-wcNoDel
	if len(ndResp.Events) != 1 || ndResp.Events[0].Type != clientv3.EventTypePut {
		t.Fatalf("expected put event, got %+v", ndResp.Events)
	}

	select {
	case resp := <-wcNoPut:
		t.Fatalf("unexpected event on filtered put (%+v)", resp)
	case resp := <-wcNoDel:
		t.Fatalf("unexpected event on filtered delete (%+v)", resp)
	case <-time.After(100 * time.Millisecond):
	}
}

// TestWatchWithCreatedNotification checks that WithCreatedNotify returns a
// Created watch response.
func TestWatchWithCreatedNotification(t *testing.T) {
	cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer cluster.Terminate(t)

	client := cluster.RandClient()

	ctx := context.Background()

	createC := client.Watch(ctx, "a", clientv3.WithCreatedNotify())

	resp := <-createC

	if !resp.Created {
		t.Fatalf("expected created event, got %v", resp)
	}
}

// TestWatchWithCreatedNotificationDropConn ensures that
// a watcher with created notify does not post duplicate
// created events from disconnect.
func TestWatchWithCreatedNotificationDropConn(t *testing.T) {
	cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer cluster.Terminate(t)

	client := cluster.RandClient()

	wch := client.Watch(context.Background(), "a", clientv3.WithCreatedNotify())

	resp := <-wch

	if !resp.Created {
		t.Fatalf("expected created event, got %v", resp)
	}

	cluster.Members[0].DropConnections()

	// check watch channel doesn't post another watch response.
	select {
	case wresp := <-wch:
		t.Fatalf("got unexpected watch response: %+v\n", wresp)
	case <-time.After(time.Second):
		// watcher may not reconnect by the time it hits the select,
		// so it wouldn't have a chance to filter out the second create event
	}
}

// TestWatchCancelOnServer ensures client watcher cancels propagate back to the server.
func TestWatchCancelOnServer(t *testing.T) {
	cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer cluster.Terminate(t)

	client := cluster.RandClient()
	numWatches := 10

	// The grpc proxy starts watches to detect leadership after the proxy server
	// returns as started; to avoid racing on the proxy's internal watches, wait
	// until require leader watches get create responses to ensure the leadership
	// watches have started.
	for {
		ctx, cancel := context.WithCancel(clientv3.WithRequireLeader(context.TODO()))
		ww := client.Watch(ctx, "a", clientv3.WithCreatedNotify())
		wresp := <-ww
		cancel()
		if wresp.Err() == nil {
			break
		}
	}

	cancels := make([]context.CancelFunc, numWatches)
	for i := 0; i < numWatches; i++ {
		// force separate streams in client
		md := metadata.Pairs("some-key", fmt.Sprintf("%d", i))
		mctx := metadata.NewOutgoingContext(context.Background(), md)
		ctx, cancel := context.WithCancel(mctx)
		cancels[i] = cancel
		w := client.Watch(ctx, fmt.Sprintf("%d", i), clientv3.WithCreatedNotify())
		<-w
	}

	// get max watches; proxy tests have leadership watches, so total may be >numWatches
	maxWatches, _ := cluster.Members[0].Metric("etcd_debugging_mvcc_watcher_total")

	// cancel all and wait for cancels to propagate to etcd server
	for i := 0; i < numWatches; i++ {
		cancels[i]()
	}
	time.Sleep(time.Second)

	minWatches, err := cluster.Members[0].Metric("etcd_debugging_mvcc_watcher_total")
	if err != nil {
		t.Fatal(err)
	}

	maxWatchV, minWatchV := 0, 0
	n, serr := fmt.Sscanf(maxWatches+" "+minWatches, "%d %d", &maxWatchV, &minWatchV)
	if n != 2 || serr != nil {
		t.Fatalf("expected n=2 and err=nil, got n=%d and err=%v", n, serr)
	}

	if maxWatchV-minWatchV < numWatches {
		t.Fatalf("expected %d canceled watchers, got %d", numWatches, maxWatchV-minWatchV)
	}
}

// TestWatchOverlapContextCancel stresses the watcher stream teardown path by
// creating/canceling watchers to ensure that new watchers are not taken down
// by a torn down watch stream. The sort of race that's being detected:
//     1. create w1 using a cancelable ctx with %v as "ctx"
//     2. cancel ctx
//     3. watcher client begins tearing down watcher grpc stream since no more watchers
//     3. start creating watcher w2 using a new "ctx" (not canceled), attaches to old grpc stream
//     4. watcher client finishes tearing down stream on "ctx"
//     5. w2 comes back canceled
func TestWatchOverlapContextCancel(t *testing.T) {
	f := func(clus *integration.ClusterV3) {}
	testWatchOverlapContextCancel(t, f)
}

func TestWatchOverlapDropConnContextCancel(t *testing.T) {
	f := func(clus *integration.ClusterV3) {
		clus.Members[0].DropConnections()
	}
	testWatchOverlapContextCancel(t, f)
}

func testWatchOverlapContextCancel(t *testing.T, f func(*integration.ClusterV3)) {
	defer testutil.AfterTest(t)
	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)

	n := 100
	ctxs, ctxc := make([]context.Context, 5), make([]chan struct{}, 5)
	for i := range ctxs {
		// make unique stream
		md := metadata.Pairs("some-key", fmt.Sprintf("%d", i))
		ctxs[i] = metadata.NewOutgoingContext(context.Background(), md)
		// limits the maximum number of outstanding watchers per stream
		ctxc[i] = make(chan struct{}, 2)
	}

	// issue concurrent watches on "abc" with cancel
	cli := clus.RandClient()
	if _, err := cli.Put(context.TODO(), "abc", "def"); err != nil {
		t.Fatal(err)
	}
	ch := make(chan struct{}, n)
	for i := 0; i < n; i++ {
		go func() {
			defer func() { ch <- struct{}{} }()
			idx := rand.Intn(len(ctxs))
			ctx, cancel := context.WithCancel(ctxs[idx])
			ctxc[idx] <- struct{}{}
			wch := cli.Watch(ctx, "abc", clientv3.WithRev(1))
			f(clus)
			select {
			case _, ok := <-wch:
				if !ok {
					t.Fatalf("unexpected closed channel %p", wch)
				}
			// may take a second or two to reestablish a watcher because of
			// grpc back off policies for disconnects
			case <-time.After(5 * time.Second):
				t.Errorf("timed out waiting for watch on %p", wch)
			}
			// randomize how cancel overlaps with watch creation
			if rand.Intn(2) == 0 {
				<-ctxc[idx]
				cancel()
			} else {
				cancel()
				<-ctxc[idx]
			}
		}()
	}
	// join on watches
	for i := 0; i < n; i++ {
		select {
		case <-ch:
		case <-time.After(5 * time.Second):
			t.Fatalf("timed out waiting for completed watch")
		}
	}
}

// TestWatchCancelAndCloseClient ensures that canceling a watcher then immediately
// closing the client does not return a client closing error.
func TestWatchCancelAndCloseClient(t *testing.T) {
	defer testutil.AfterTest(t)
	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)
	cli := clus.Client(0)
	ctx, cancel := context.WithCancel(context.Background())
	wch := cli.Watch(ctx, "abc")
	donec := make(chan struct{})
	go func() {
		defer close(donec)
		select {
		case wr, ok := <-wch:
			if ok {
				t.Fatalf("expected closed watch after cancel(), got resp=%+v err=%v", wr, wr.Err())
			}
		case <-time.After(5 * time.Second):
			t.Fatal("timed out waiting for closed channel")
		}
	}()
	cancel()
	if err := cli.Close(); err != nil {
		t.Fatal(err)
	}
	<-donec
	clus.TakeClient(0)
}

// TestWatchStressResumeClose establishes a bunch of watchers, disconnects
// to put them in resuming mode, cancels them so some resumes by cancel fail,
// then closes the watcher interface to ensure correct clean up.
func TestWatchStressResumeClose(t *testing.T) {
	defer testutil.AfterTest(t)
	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)
	cli := clus.Client(0)

	ctx, cancel := context.WithCancel(context.Background())
	// add more watches than can be resumed before the cancel
	wchs := make([]clientv3.WatchChan, 2000)
	for i := range wchs {
		wchs[i] = cli.Watch(ctx, "abc")
	}
	clus.Members[0].DropConnections()
	cancel()
	if err := cli.Close(); err != nil {
		t.Fatal(err)
	}
	clus.TakeClient(0)
}

// TestWatchCancelDisconnected ensures canceling a watcher works when
// its grpc stream is disconnected / reconnecting.
func TestWatchCancelDisconnected(t *testing.T) {
	defer testutil.AfterTest(t)
	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
	defer clus.Terminate(t)
	cli := clus.Client(0)
	ctx, cancel := context.WithCancel(context.Background())
	// add more watches than can be resumed before the cancel
	wch := cli.Watch(ctx, "abc")
	clus.Members[0].Stop(t)
	cancel()
	select {
	case <-wch:
	case <-time.After(time.Second):
		t.Fatal("took too long to cancel disconnected watcher")
	}
}