etcd/mvcc/index.go

234 lines
5.3 KiB
Go
Raw Normal View History

2016-05-13 06:50:33 +03:00
// Copyright 2015 The etcd Authors
2015-09-15 23:54:11 +03:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2016-04-25 22:32:58 +03:00
package mvcc
2015-05-15 03:53:41 +03:00
import (
"sort"
2015-05-15 03:53:41 +03:00
"sync"
2016-03-23 03:10:28 +03:00
"github.com/google/btree"
2015-05-15 03:53:41 +03:00
)
type index interface {
2015-08-20 18:39:07 +03:00
Get(key []byte, atRev int64) (rev, created revision, ver int64, err error)
Range(key, end []byte, atRev int64) ([][]byte, []revision)
Put(key []byte, rev revision)
Tombstone(key []byte, rev revision) error
RangeSince(key, end []byte, rev int64) []revision
2015-08-20 18:39:07 +03:00
Compact(rev int64) map[revision]struct{}
Keep(rev int64) map[revision]struct{}
Equal(b index) bool
Insert(ki *keyIndex)
KeyIndex(ki *keyIndex) *keyIndex
2015-05-22 18:11:43 +03:00
}
2015-05-15 03:53:41 +03:00
type treeIndex struct {
sync.RWMutex
tree *btree.BTree
}
func newTreeIndex() index {
return &treeIndex{
tree: btree.New(32),
}
}
2015-08-20 18:39:07 +03:00
func (ti *treeIndex) Put(key []byte, rev revision) {
2015-05-15 03:53:41 +03:00
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
2015-05-31 08:56:33 +03:00
keyi.put(rev.main, rev.sub)
2015-05-15 03:53:41 +03:00
ti.tree.ReplaceOrInsert(keyi)
return
}
okeyi := item.(*keyIndex)
2015-05-31 08:56:33 +03:00
okeyi.put(rev.main, rev.sub)
2015-05-15 03:53:41 +03:00
}
2015-08-20 18:39:07 +03:00
func (ti *treeIndex) Get(key []byte, atRev int64) (modified, created revision, ver int64, err error) {
2015-05-15 03:53:41 +03:00
keyi := &keyIndex{key: key}
ti.RLock()
defer ti.RUnlock()
if keyi = ti.keyIndex(keyi); keyi == nil {
2015-08-20 18:39:07 +03:00
return revision{}, revision{}, 0, ErrRevisionNotFound
2015-05-15 03:53:41 +03:00
}
2015-05-31 08:56:33 +03:00
return keyi.get(atRev)
2015-05-15 03:53:41 +03:00
}
func (ti *treeIndex) KeyIndex(keyi *keyIndex) *keyIndex {
ti.RLock()
defer ti.RUnlock()
return ti.keyIndex(keyi)
}
func (ti *treeIndex) keyIndex(keyi *keyIndex) *keyIndex {
if item := ti.tree.Get(keyi); item != nil {
return item.(*keyIndex)
}
return nil
}
2015-08-20 18:39:07 +03:00
func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []revision) {
2015-05-22 18:11:43 +03:00
if end == nil {
2015-06-29 22:47:17 +03:00
rev, _, _, err := ti.Get(key, atRev)
2015-05-22 18:11:43 +03:00
if err != nil {
2015-05-31 08:56:33 +03:00
return nil, nil
2015-05-22 18:11:43 +03:00
}
2015-08-20 18:39:07 +03:00
return [][]byte{key}, []revision{rev}
2015-05-22 18:11:43 +03:00
}
keyi := &keyIndex{key: key}
endi := &keyIndex{key: end}
ti.RLock()
defer ti.RUnlock()
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
2015-05-22 18:11:43 +03:00
return false
}
curKeyi := item.(*keyIndex)
2015-06-29 22:47:17 +03:00
rev, _, _, err := curKeyi.get(atRev)
2015-05-22 18:11:43 +03:00
if err != nil {
return true
}
2015-05-31 08:56:33 +03:00
revs = append(revs, rev)
keys = append(keys, curKeyi.key)
2015-05-22 18:11:43 +03:00
return true
})
2015-05-31 08:56:33 +03:00
return keys, revs
2015-05-22 18:11:43 +03:00
}
2015-08-20 18:39:07 +03:00
func (ti *treeIndex) Tombstone(key []byte, rev revision) error {
2015-05-15 03:53:41 +03:00
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
2015-08-20 18:39:07 +03:00
return ErrRevisionNotFound
2015-05-15 03:53:41 +03:00
}
ki := item.(*keyIndex)
return ki.tombstone(rev.main, rev.sub)
2015-05-15 03:53:41 +03:00
}
// RangeSince returns all revisions from key(including) to end(excluding)
// at or after the given rev. The returned slice is sorted in the order
// of revision.
func (ti *treeIndex) RangeSince(key, end []byte, rev int64) []revision {
ti.RLock()
defer ti.RUnlock()
keyi := &keyIndex{key: key}
if end == nil {
item := ti.tree.Get(keyi)
if item == nil {
return nil
}
keyi = item.(*keyIndex)
return keyi.since(rev)
}
endi := &keyIndex{key: end}
var revs []revision
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
return false
}
curKeyi := item.(*keyIndex)
revs = append(revs, curKeyi.since(rev)...)
return true
})
sort.Sort(revisions(revs))
return revs
}
2015-08-20 18:39:07 +03:00
func (ti *treeIndex) Compact(rev int64) map[revision]struct{} {
available := make(map[revision]struct{})
2016-04-11 09:16:56 +03:00
var emptyki []*keyIndex
2016-05-21 08:30:50 +03:00
plog.Printf("store.index: compact %d", rev)
2015-05-15 03:55:54 +03:00
// TODO: do not hold the lock for long time?
// This is probably OK. Compacting 10M keys takes O(10ms).
2015-05-15 03:53:41 +03:00
ti.Lock()
defer ti.Unlock()
2015-05-31 08:56:33 +03:00
ti.tree.Ascend(compactIndex(rev, available, &emptyki))
2015-05-15 03:53:41 +03:00
for _, ki := range emptyki {
item := ti.tree.Delete(ki)
if item == nil {
2016-05-21 08:30:50 +03:00
plog.Panic("store.index: unexpected delete failure during compaction")
2015-05-15 03:53:41 +03:00
}
}
return available
}
// Keep finds all revisions to be kept for a Compaction at the given rev.
func (ti *treeIndex) Keep(rev int64) map[revision]struct{} {
available := make(map[revision]struct{})
ti.RLock()
defer ti.RUnlock()
ti.tree.Ascend(func(i btree.Item) bool {
keyi := i.(*keyIndex)
keyi.keep(rev, available)
return true
})
return available
}
2015-08-20 18:39:07 +03:00
func compactIndex(rev int64, available map[revision]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool {
2015-05-15 03:53:41 +03:00
return func(i btree.Item) bool {
keyi := i.(*keyIndex)
2015-05-31 08:56:33 +03:00
keyi.compact(rev, available)
2015-05-15 03:53:41 +03:00
if keyi.isEmpty() {
*emptyki = append(*emptyki, keyi)
}
return true
}
}
func (a *treeIndex) Equal(bi index) bool {
b := bi.(*treeIndex)
if a.tree.Len() != b.tree.Len() {
return false
}
equal := true
a.tree.Ascend(func(item btree.Item) bool {
aki := item.(*keyIndex)
bki := b.tree.Get(item).(*keyIndex)
if !aki.equal(bki) {
equal = false
return false
}
return true
})
return equal
}
func (ti *treeIndex) Insert(ki *keyIndex) {
ti.Lock()
defer ti.Unlock()
ti.tree.ReplaceOrInsert(ki)
}