Merge pull request #4043 from gyuho/storage_range_all_unsynced

storage: range all unsynced at once
2015-12-28 14:45:40 -07:00 · 2015-12-28 14:45:40 -07:00 · 6c5dc28d0f
parent 0d7fb820c7 ecc3e15a46
commit 6c5dc28d0f
3 changed files with 86 additions and 359 deletions
--- a/storage/kvstore.go
+++ b/storage/kvstore.go
@ -189,65 +189,6 @@ func (s *store) TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err
 	return n, rev, nil
 }

-// RangeHistory ranges the history from key to end starting from startRev.
-// If `end` is nil, the request only observes the events on key.
-// If `end` is not nil, it observes the events on key range [key, range_end).
-// Limit limits the number of events returned.
-// If startRev <=0, rangeEvents returns events from the beginning of uncompacted history.
-//
-// If the required start rev is compacted, ErrCompacted will be returned.
-// If the required start rev has not happened, ErrFutureRev will be returned.
-//
-// RangeHistory returns revision bytes slice and key-values that satisfy the requirement (0 <= n <= limit).
-// If history in the revision range has not all happened, it returns immeidately
-// what is available.
-// It also returns nextRev which indicates the start revision used for the following
-// RangeEvents call. The nextRev could be smaller than the given endRev if the store
-// has not progressed so far or it hits the event limit.
-//
-// TODO: return byte slices instead of keyValues to avoid meaningless encode and decode.
-// This also helps to return raw (key, val) pair directly to make API consistent.
-func (s *store) RangeHistory(key, end []byte, limit, startRev int64) (revbs [][]byte, kvs []storagepb.KeyValue, nextRev int64, err error) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if startRev > 0 && startRev <= s.compactMainRev {
-		return nil, nil, 0, ErrCompacted
-	}
-	if startRev > s.currentRev.main {
-		return nil, nil, 0, ErrFutureRev
-	}
-
-	revs := s.kvindex.RangeSince(key, end, startRev)
-	if len(revs) == 0 {
-		return nil, nil, s.currentRev.main + 1, nil
-	}
-
-	tx := s.b.BatchTx()
-	tx.Lock()
-	defer tx.Unlock()
-	// fetch events from the backend using revisions
-	for _, rev := range revs {
-		start, end := revBytesRange(rev)
-
-		ks, vs := tx.UnsafeRange(keyBucketName, start, end, 0)
-		if len(vs) != 1 {
-			log.Fatalf("storage: range cannot find rev (%d,%d)", rev.main, rev.sub)
-		}
-
-		var kv storagepb.KeyValue
-		if err := kv.Unmarshal(vs[0]); err != nil {
-			log.Fatalf("storage: cannot unmarshal event: %v", err)
-		}
-		revbs = append(revbs, ks[0])
-		kvs = append(kvs, kv)
-		if limit > 0 && len(kvs) >= int(limit) {
-			return revbs, kvs, rev.main + 1, nil
-		}
-	}
-	return revbs, kvs, s.currentRev.main + 1, nil
-}
-
 func (s *store) Compact(rev int64) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
--- a/storage/kvstore_test.go
+++ b/storage/kvstore_test.go
@ -262,73 +262,6 @@ func TestStoreDeleteRange(t *testing.T) {
 	}
 }

-func TestStoreRangeHistory(t *testing.T) {
-	key := newTestKeyBytes(revision{2, 0}, false)
-	kv := storagepb.KeyValue{
-		Key:            []byte("foo"),
-		Value:          []byte("bar"),
-		CreateRevision: 1,
-		ModRevision:    2,
-		Version:        1,
-	}
-	kvb, err := kv.Marshal()
-	if err != nil {
-		t.Fatal(err)
-	}
-	currev := revision{2, 0}
-
-	tests := []struct {
-		idxr indexRangeEventsResp
-		r    rangeResp
-	}{
-		{
-			indexRangeEventsResp{[]revision{{2, 0}}},
-			rangeResp{[][]byte{key}, [][]byte{kvb}},
-		},
-		{
-			indexRangeEventsResp{[]revision{{2, 0}, {3, 0}}},
-			rangeResp{[][]byte{key}, [][]byte{kvb}},
-		},
-	}
-	for i, tt := range tests {
-		s := newFakeStore()
-		b := s.b.(*fakeBackend)
-		fi := s.kvindex.(*fakeIndex)
-
-		s.currentRev = currev
-		fi.indexRangeEventsRespc <- tt.idxr
-		b.tx.rangeRespc <- tt.r
-
-		keys, kvs, _, err := s.RangeHistory([]byte("foo"), []byte("goo"), 1, 1)
-		if err != nil {
-			t.Errorf("#%d: err = %v, want nil", i, err)
-		}
-		if w := [][]byte{key}; !reflect.DeepEqual(keys, w) {
-			t.Errorf("#%d: keys = %+v, want %+v", i, keys, w)
-		}
-		if w := []storagepb.KeyValue{kv}; !reflect.DeepEqual(kvs, w) {
-			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, w)
-		}
-
-		wact := []testutil.Action{
-			{"rangeEvents", []interface{}{[]byte("foo"), []byte("goo"), int64(1)}},
-		}
-		if g := fi.Action(); !reflect.DeepEqual(g, wact) {
-			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
-		}
-		wstart, wend := revBytesRange(tt.idxr.revs[0])
-		wact = []testutil.Action{
-			{"range", []interface{}{keyBucketName, wstart, wend, int64(0)}},
-		}
-		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
-			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
-		}
-		if s.currentRev != currev {
-			t.Errorf("#%d: current rev = %+v, want %+v", i, s.currentRev, currev)
-		}
-	}
-}
-
 func TestStoreCompact(t *testing.T) {
 	s := newFakeStore()
 	b := s.b.(*fakeBackend)
@ -420,202 +353,6 @@ func TestStoreRestore(t *testing.T) {
 	}
 }

-// tests end parameter works well
-func TestStoreRangeHistoryEnd(t *testing.T) {
-	s := newStore(tmpPath)
-	defer cleanup(s, tmpPath)
-
-	s.Put([]byte("foo"), []byte("bar"))
-	s.Put([]byte("foo1"), []byte("bar1"))
-	s.Put([]byte("foo2"), []byte("bar2"))
-	keys := [][]byte{
-		newTestKeyBytes(revision{1, 0}, false),
-		newTestKeyBytes(revision{2, 0}, false),
-		newTestKeyBytes(revision{3, 0}, false),
-	}
-	kvs := []storagepb.KeyValue{
-		{Key: []byte("foo"), Value: []byte("bar"), CreateRevision: 1, ModRevision: 1, Version: 1},
-		{Key: []byte("foo1"), Value: []byte("bar1"), CreateRevision: 2, ModRevision: 2, Version: 1},
-		{Key: []byte("foo2"), Value: []byte("bar2"), CreateRevision: 3, ModRevision: 3, Version: 1},
-	}
-
-	tests := []struct {
-		key, end []byte
-		wkeys    [][]byte
-		wkvs     []storagepb.KeyValue
-	}{
-		// get no keys
-		{
-			[]byte("doo"), []byte("foo"),
-			nil, nil,
-		},
-		// get no keys when key == end
-		{
-			[]byte("foo"), []byte("foo"),
-			nil, nil,
-		},
-		// get no keys when ranging single key
-		{
-			[]byte("doo"), nil,
-			nil, nil,
-		},
-		// get all keys
-		{
-			[]byte("foo"), []byte("foo3"),
-			keys, kvs,
-		},
-		// get partial keys
-		{
-			[]byte("foo"), []byte("foo1"),
-			keys[:1], kvs[:1],
-		},
-		// get single key
-		{
-			[]byte("foo"), nil,
-			keys[:1], kvs[:1],
-		},
-	}
-
-	for i, tt := range tests {
-		keys, kvs, rev, err := s.RangeHistory(tt.key, tt.end, 0, 1)
-		if err != nil {
-			t.Fatal(err)
-		}
-		if rev != 4 {
-			t.Errorf("#%d: rev = %d, want %d", i, rev, 4)
-		}
-		if !reflect.DeepEqual(keys, tt.wkeys) {
-			t.Errorf("#%d: actions = %+v, want %+v", i, keys, tt.wkeys)
-		}
-		if !reflect.DeepEqual(kvs, tt.wkvs) {
-			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, tt.wkvs)
-		}
-	}
-}
-
-func TestStoreRangeHistoryRev(t *testing.T) {
-	s := newStore(tmpPath)
-	defer cleanup(s, tmpPath)
-
-	s.Put([]byte("foo"), []byte("bar"))
-	s.DeleteRange([]byte("foo"), nil)
-	s.Put([]byte("foo"), []byte("bar"))
-	s.Put([]byte("unrelated"), []byte("unrelated"))
-	keys := [][]byte{
-		newTestKeyBytes(revision{1, 0}, false),
-		newTestKeyBytes(revision{2, 0}, true),
-		newTestKeyBytes(revision{3, 0}, false),
-	}
-	kvs := []storagepb.KeyValue{
-		{Key: []byte("foo"), Value: []byte("bar"), CreateRevision: 1, ModRevision: 1, Version: 1},
-		{Key: []byte("foo")},
-		{Key: []byte("foo"), Value: []byte("bar"), CreateRevision: 3, ModRevision: 3, Version: 1},
-	}
-
-	tests := []struct {
-		start int64
-
-		wkeys [][]byte
-		wkvs  []storagepb.KeyValue
-		wnext int64
-	}{
-		{0, keys, kvs, 5},
-		{1, keys, kvs, 5},
-		{3, keys[2:], kvs[2:], 5},
-	}
-
-	for i, tt := range tests {
-		keys, kvs, next, err := s.RangeHistory([]byte("foo"), nil, 0, tt.start)
-		if err != nil {
-			t.Fatal(err)
-		}
-		if !reflect.DeepEqual(keys, tt.wkeys) {
-			t.Errorf("#%d: acts = %+v, want %+v", i, keys, tt.wkeys)
-		}
-		if !reflect.DeepEqual(kvs, tt.wkvs) {
-			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, tt.wkvs)
-		}
-		if next != tt.wnext {
-			t.Errorf("#%d: next = %d, want %d", i, next, tt.wnext)
-		}
-	}
-}
-
-func TestStoreRangeHistoryBad(t *testing.T) {
-	s := newStore(tmpPath)
-	defer cleanup(s, tmpPath)
-
-	s.Put([]byte("foo"), []byte("bar"))
-	s.Put([]byte("foo"), []byte("bar1"))
-	s.Put([]byte("foo"), []byte("bar2"))
-	if err := s.Compact(3); err != nil {
-		t.Fatalf("compact error (%v)", err)
-	}
-
-	tests := []struct {
-		rev  int64
-		werr error
-	}{
-		{1, ErrCompacted},
-		{2, ErrCompacted},
-		{3, ErrCompacted},
-		{4, ErrFutureRev},
-		{10, ErrFutureRev},
-	}
-	for i, tt := range tests {
-		_, _, _, err := s.RangeHistory([]byte("foo"), nil, 0, tt.rev)
-		if err != tt.werr {
-			t.Errorf("#%d: error = %v, want %v", i, err, tt.werr)
-		}
-	}
-}
-
-func TestStoreRangeHistoryLimit(t *testing.T) {
-	s := newStore(tmpPath)
-	defer cleanup(s, tmpPath)
-
-	s.Put([]byte("foo"), []byte("bar"))
-	s.DeleteRange([]byte("foo"), nil)
-	s.Put([]byte("foo"), []byte("bar"))
-	keys := [][]byte{
-		newTestKeyBytes(revision{1, 0}, false),
-		newTestKeyBytes(revision{2, 0}, true),
-		newTestKeyBytes(revision{3, 0}, false),
-	}
-	kvs := []storagepb.KeyValue{
-		{Key: []byte("foo"), Value: []byte("bar"), CreateRevision: 1, ModRevision: 1, Version: 1},
-		{Key: []byte("foo")},
-		{Key: []byte("foo"), Value: []byte("bar"), CreateRevision: 3, ModRevision: 3, Version: 1},
-	}
-
-	tests := []struct {
-		limit int64
-		wkeys [][]byte
-		wkvs  []storagepb.KeyValue
-	}{
-		// no limit
-		{-1, keys, kvs},
-		// no limit
-		{0, keys, kvs},
-		{1, keys[:1], kvs[:1]},
-		{2, keys[:2], kvs[:2]},
-		{3, keys, kvs},
-		{100, keys, kvs},
-	}
-	for i, tt := range tests {
-		keys, kvs, _, err := s.RangeHistory([]byte("foo"), nil, tt.limit, 1)
-		if err != nil {
-			t.Fatalf("#%d: range error (%v)", i, err)
-		}
-		if !reflect.DeepEqual(keys, tt.wkeys) {
-			t.Errorf("#%d: acts = %+v, want %+v", i, keys, tt.wkeys)
-		}
-		if !reflect.DeepEqual(kvs, tt.wkvs) {
-			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, tt.wkvs)
-		}
-	}
-}
-
 func TestRestoreContinueUnfinishedCompaction(t *testing.T) {
 	s0 := newStore(tmpPath)
 	defer os.Remove(tmpPath)
--- a/storage/watchable_store.go
+++ b/storage/watchable_store.go
@ -17,6 +17,7 @@ package storage
 import (
 	"fmt"
 	"log"
+	"math"
 	"sync"
 	"time"

@ -241,57 +242,105 @@ func (s *watchableStore) syncWatchingsLoop() {
 	}
 }

-// syncWatchings syncs the watchings in the unsyncd map.
+// syncWatchings periodically syncs unsynced watchings by: Iterate all unsynced
+// watchings to get the minimum revision within its range, skipping the
+// watching if its current revision is behind the compact revision of the
+// store. And use this minimum revision to get all key-value pairs. Then send
+// those events to watchings.
 func (s *watchableStore) syncWatchings() {
-	_, curRev, _ := s.store.Range(nil, nil, 0, 0)
+	s.store.mu.Lock()
+	defer s.store.mu.Unlock()
+
+	if len(s.unsynced) == 0 {
+		return
+	}
+
+	// in order to find key-value pairs from unsynced watchings, we need to
+	// find min revision index, and these revisions can be used to
+	// query the backend store of key-value pairs
+	minRev := int64(math.MaxInt64)
+
+	curRev := s.store.currentRev.main
+	compactionRev := s.store.compactMainRev
+
+	// TODO: change unsynced struct type same to this
+	keyToUnsynced := make(map[string]map[*watching]struct{})
+
 	for w := range s.unsynced {
-		var end []byte
-		if w.prefix {
-			end = make([]byte, len(w.key))
-			copy(end, w.key)
-			end[len(w.key)-1]++
+		k := string(w.key)
+
+		if w.cur > curRev {
+			panic("watching current revision should not exceed current revision")
 		}
-		limit := cap(w.ch) - len(w.ch)
-		// the channel is full, try it in the next round
-		if limit == 0 {
-			continue
-		}
-		revbs, kvs, nextRev, err := s.store.RangeHistory(w.key, end, int64(limit), w.cur)
-		if err != nil {
-			// TODO: send error event to watching
+
+		if w.cur < compactionRev {
+			// TODO: return error compacted to that watching instead of
+			// just removing it sliently from unsynced.
 			delete(s.unsynced, w)
 			continue
 		}

-		// push events to the channel
-		for i, kv := range kvs {
-			var evt storagepb.Event_EventType
-			switch {
-			case isTombstone(revbs[i]):
-				evt = storagepb.DELETE
-			default:
-				evt = storagepb.PUT
-			}
-
-			w.ch <- storagepb.Event{
-				Type:    evt,
-				Kv:      &kv,
-				WatchID: w.id,
-			}
-			pendingEventsGauge.Inc()
+		if minRev >= w.cur {
+			minRev = w.cur
 		}
-		// switch to tracking future events if needed
-		if nextRev > curRev {
-			k := string(w.key)
+
+		if _, ok := keyToUnsynced[k]; !ok {
+			keyToUnsynced[k] = make(map[*watching]struct{})
+		}
+		keyToUnsynced[k][w] = struct{}{}
+	}
+
+	minBytes, maxBytes := newRevBytes(), newRevBytes()
+	revToBytes(revision{main: minRev}, minBytes)
+	revToBytes(revision{main: curRev + 1}, maxBytes)
+
+	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
+	// values are actual key-value pairs in backend.
+	tx := s.store.b.BatchTx()
+	tx.Lock()
+	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
+	tx.Unlock()
+
+	for i, v := range vs {
+		var kv storagepb.KeyValue
+		if err := kv.Unmarshal(v); err != nil {
+			log.Panicf("storage: cannot unmarshal event: %v", err)
+		}
+
+		k := string(kv.Key)
+		wm, ok := keyToUnsynced[k]
+		if !ok {
+			continue
+		}
+
+		var ev storagepb.Event
+		switch {
+		case isTombstone(ks[i]):
+			ev.Type = storagepb.DELETE
+		default:
+			ev.Type = storagepb.PUT
+		}
+		ev.Kv = &kv
+
+		for w := range wm {
+			ev.WatchID = w.id
+
+			select {
+			case w.ch <- ev:
+				pendingEventsGauge.Inc()
+			default:
+				// TODO: handle the full unsynced watchings.
+				// continue to process other watchings for now, the full ones
+				// will be processed next time and hopefully it will not be full.
+				continue
+			}
 			if err := unsafeAddWatching(&s.synced, k, w); err != nil {
 				log.Panicf("error unsafeAddWatching (%v) for key %s", err, k)
 			}
 			delete(s.unsynced, w)
-			continue
 		}
-		// put it back to try it in the next round
-		w.cur = nextRev
 	}
+
 	slowWatchingGauge.Set(float64(len(s.unsynced)))
 }