Merge pull request #2894 from xiang90/refactor_keyIndex

Storage initial compaction
release-2.1
Xiang Li 2015-06-05 12:38:11 -07:00
commit 976ac65c86
10 changed files with 629 additions and 454 deletions

View File

@ -13,7 +13,7 @@ type BatchTx interface {
Unlock() Unlock()
UnsafeCreateBucket(name []byte) UnsafeCreateBucket(name []byte)
UnsafePut(bucketName []byte, key []byte, value []byte) UnsafePut(bucketName []byte, key []byte, value []byte)
UnsafeRange(bucketName []byte, key, endKey []byte, limit int64) [][]byte UnsafeRange(bucketName []byte, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte)
UnsafeDelete(bucketName []byte, key []byte) UnsafeDelete(bucketName []byte, key []byte)
Commit() Commit()
} }
@ -49,28 +49,27 @@ func (t *batchTx) UnsafePut(bucketName []byte, key []byte, value []byte) {
} }
// before calling unsafeRange, the caller MUST hold the lock on tnx. // before calling unsafeRange, the caller MUST hold the lock on tnx.
func (t *batchTx) UnsafeRange(bucketName []byte, key, endKey []byte, limit int64) [][]byte { func (t *batchTx) UnsafeRange(bucketName []byte, key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) {
bucket := t.tx.Bucket(bucketName) bucket := t.tx.Bucket(bucketName)
if bucket == nil { if bucket == nil {
log.Fatalf("storage: bucket %s does not exist", string(bucketName)) log.Fatalf("storage: bucket %s does not exist", string(bucketName))
} }
var vs [][]byte
if len(endKey) == 0 { if len(endKey) == 0 {
if v := bucket.Get(key); v == nil { if v := bucket.Get(key); v == nil {
return vs return keys, vs
} else { } else {
return append(vs, v) return append(keys, key), append(vs, v)
} }
} }
c := bucket.Cursor() c := bucket.Cursor()
for ck, cv := c.Seek(key); ck != nil && bytes.Compare(ck, endKey) < 0; ck, cv = c.Next() { for ck, cv := c.Seek(key); ck != nil && bytes.Compare(ck, endKey) < 0; ck, cv = c.Next() {
vs = append(vs, cv) vs = append(vs, cv)
keys = append(keys, ck)
} }
return vs return keys, vs
} }
// before calling unsafeDelete, the caller MUST hold the lock on tnx. // before calling unsafeDelete, the caller MUST hold the lock on tnx.

View File

@ -8,16 +8,11 @@ import (
) )
type index interface { type index interface {
Get(key []byte, atIndex uint64) (index uint64, err error) Get(key []byte, atRev int64) (rev reversion, err error)
Range(key, end []byte, atIndex uint64) []kipair Range(key, end []byte, atRev int64) ([][]byte, []reversion)
Put(key []byte, index uint64) Put(key []byte, rev reversion)
Tombstone(key []byte, index uint64) error Tombstone(key []byte, rev reversion) error
Compact(index uint64) map[uint64]struct{} Compact(rev int64) map[reversion]struct{}
}
type kipair struct {
index uint64
key []byte
} }
type treeIndex struct { type treeIndex struct {
@ -31,47 +26,46 @@ func newTreeIndex() index {
} }
} }
func (ti *treeIndex) Put(key []byte, index uint64) { func (ti *treeIndex) Put(key []byte, rev reversion) {
keyi := &keyIndex{key: key} keyi := &keyIndex{key: key}
ti.Lock() ti.Lock()
defer ti.Unlock() defer ti.Unlock()
item := ti.tree.Get(keyi) item := ti.tree.Get(keyi)
if item == nil { if item == nil {
keyi.put(index) keyi.put(rev.main, rev.sub)
ti.tree.ReplaceOrInsert(keyi) ti.tree.ReplaceOrInsert(keyi)
return return
} }
okeyi := item.(*keyIndex) okeyi := item.(*keyIndex)
okeyi.put(index) okeyi.put(rev.main, rev.sub)
} }
func (ti *treeIndex) Get(key []byte, atIndex uint64) (index uint64, err error) { func (ti *treeIndex) Get(key []byte, atRev int64) (rev reversion, err error) {
keyi := &keyIndex{key: key} keyi := &keyIndex{key: key}
ti.RLock() ti.RLock()
defer ti.RUnlock() defer ti.RUnlock()
item := ti.tree.Get(keyi) item := ti.tree.Get(keyi)
if item == nil { if item == nil {
return 0, ErrIndexNotFound return reversion{}, ErrReversionNotFound
} }
keyi = item.(*keyIndex) keyi = item.(*keyIndex)
return keyi.get(atIndex) return keyi.get(atRev)
} }
func (ti *treeIndex) Range(key, end []byte, atIndex uint64) []kipair { func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []reversion) {
if end == nil { if end == nil {
index, err := ti.Get(key, atIndex) rev, err := ti.Get(key, atRev)
if err != nil { if err != nil {
return nil return nil, nil
} }
return []kipair{{key: key, index: index}} return [][]byte{key}, []reversion{rev}
} }
keyi := &keyIndex{key: key} keyi := &keyIndex{key: key}
endi := &keyIndex{key: end} endi := &keyIndex{key: end}
pairs := make([]kipair, 0)
ti.RLock() ti.RLock()
defer ti.RUnlock() defer ti.RUnlock()
@ -81,41 +75,42 @@ func (ti *treeIndex) Range(key, end []byte, atIndex uint64) []kipair {
return false return false
} }
curKeyi := item.(*keyIndex) curKeyi := item.(*keyIndex)
index, err := curKeyi.get(atIndex) rev, err := curKeyi.get(atRev)
if err != nil { if err != nil {
return true return true
} }
pairs = append(pairs, kipair{index, curKeyi.key}) revs = append(revs, rev)
keys = append(keys, curKeyi.key)
return true return true
}) })
return pairs return keys, revs
} }
func (ti *treeIndex) Tombstone(key []byte, index uint64) error { func (ti *treeIndex) Tombstone(key []byte, rev reversion) error {
keyi := &keyIndex{key: key} keyi := &keyIndex{key: key}
ti.Lock() ti.Lock()
defer ti.Unlock() defer ti.Unlock()
item := ti.tree.Get(keyi) item := ti.tree.Get(keyi)
if item == nil { if item == nil {
return ErrIndexNotFound return ErrReversionNotFound
} }
ki := item.(*keyIndex) ki := item.(*keyIndex)
ki.tombstone(index) ki.tombstone(rev.main, rev.sub)
return nil return nil
} }
func (ti *treeIndex) Compact(index uint64) map[uint64]struct{} { func (ti *treeIndex) Compact(rev int64) map[reversion]struct{} {
available := make(map[uint64]struct{}) available := make(map[reversion]struct{})
emptyki := make([]*keyIndex, 0) emptyki := make([]*keyIndex, 0)
log.Printf("store.index: compact %d", index) log.Printf("store.index: compact %d", rev)
// TODO: do not hold the lock for long time? // TODO: do not hold the lock for long time?
// This is probably OK. Compacting 10M keys takes O(10ms). // This is probably OK. Compacting 10M keys takes O(10ms).
ti.Lock() ti.Lock()
defer ti.Unlock() defer ti.Unlock()
ti.tree.Ascend(compactIndex(index, available, &emptyki)) ti.tree.Ascend(compactIndex(rev, available, &emptyki))
for _, ki := range emptyki { for _, ki := range emptyki {
item := ti.tree.Delete(ki) item := ti.tree.Delete(ki)
if item == nil { if item == nil {
@ -125,10 +120,10 @@ func (ti *treeIndex) Compact(index uint64) map[uint64]struct{} {
return available return available
} }
func compactIndex(index uint64, available map[uint64]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool { func compactIndex(rev int64, available map[reversion]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool {
return func(i btree.Item) bool { return func(i btree.Item) bool {
keyi := i.(*keyIndex) keyi := i.(*keyIndex)
keyi.compact(index, available) keyi.compact(rev, available)
if keyi.isEmpty() { if keyi.isEmpty() {
*emptyki = append(*emptyki, keyi) *emptyki = append(*emptyki, keyi)
} }

View File

@ -9,20 +9,20 @@ func TestIndexPutAndGet(t *testing.T) {
index := newTestTreeIndex() index := newTestTreeIndex()
tests := []T{ tests := []T{
{[]byte("foo"), 0, ErrIndexNotFound, 0}, {[]byte("foo"), 0, ErrReversionNotFound, 0},
{[]byte("foo"), 1, nil, 1}, {[]byte("foo"), 1, nil, 1},
{[]byte("foo"), 3, nil, 1}, {[]byte("foo"), 3, nil, 1},
{[]byte("foo"), 5, nil, 5}, {[]byte("foo"), 5, nil, 5},
{[]byte("foo"), 6, nil, 5}, {[]byte("foo"), 6, nil, 5},
{[]byte("foo1"), 0, ErrIndexNotFound, 0}, {[]byte("foo1"), 0, ErrReversionNotFound, 0},
{[]byte("foo1"), 1, ErrIndexNotFound, 0}, {[]byte("foo1"), 1, ErrReversionNotFound, 0},
{[]byte("foo1"), 2, nil, 2}, {[]byte("foo1"), 2, nil, 2},
{[]byte("foo1"), 5, nil, 2}, {[]byte("foo1"), 5, nil, 2},
{[]byte("foo1"), 6, nil, 6}, {[]byte("foo1"), 6, nil, 6},
{[]byte("foo2"), 0, ErrIndexNotFound, 0}, {[]byte("foo2"), 0, ErrReversionNotFound, 0},
{[]byte("foo2"), 1, ErrIndexNotFound, 0}, {[]byte("foo2"), 1, ErrReversionNotFound, 0},
{[]byte("foo2"), 3, nil, 3}, {[]byte("foo2"), 3, nil, 3},
{[]byte("foo2"), 4, nil, 4}, {[]byte("foo2"), 4, nil, 4},
{[]byte("foo2"), 6, nil, 4}, {[]byte("foo2"), 6, nil, 4},
@ -34,31 +34,26 @@ func TestContinuousCompact(t *testing.T) {
index := newTestTreeIndex() index := newTestTreeIndex()
tests := []T{ tests := []T{
{[]byte("foo"), 0, ErrIndexNotFound, 0}, {[]byte("foo"), 0, ErrReversionNotFound, 0},
{[]byte("foo"), 1, nil, 1}, {[]byte("foo"), 1, nil, 1},
{[]byte("foo"), 3, nil, 1}, {[]byte("foo"), 3, nil, 1},
{[]byte("foo"), 5, nil, 5}, {[]byte("foo"), 5, nil, 5},
{[]byte("foo"), 6, nil, 5}, {[]byte("foo"), 6, nil, 5},
{[]byte("foo1"), 0, ErrIndexNotFound, 0}, {[]byte("foo1"), 0, ErrReversionNotFound, 0},
{[]byte("foo1"), 1, ErrIndexNotFound, 0}, {[]byte("foo1"), 1, ErrReversionNotFound, 0},
{[]byte("foo1"), 2, nil, 2}, {[]byte("foo1"), 2, nil, 2},
{[]byte("foo1"), 5, nil, 2}, {[]byte("foo1"), 5, nil, 2},
{[]byte("foo1"), 6, nil, 6}, {[]byte("foo1"), 6, nil, 6},
{[]byte("foo2"), 0, ErrIndexNotFound, 0}, {[]byte("foo2"), 0, ErrReversionNotFound, 0},
{[]byte("foo2"), 1, ErrIndexNotFound, 0}, {[]byte("foo2"), 1, ErrReversionNotFound, 0},
{[]byte("foo2"), 3, nil, 3}, {[]byte("foo2"), 3, nil, 3},
{[]byte("foo2"), 4, nil, 4}, {[]byte("foo2"), 4, nil, 4},
{[]byte("foo2"), 6, nil, 4}, {[]byte("foo2"), 6, nil, 4},
} }
wa := map[uint64]struct{}{ wa := map[reversion]struct{}{
1: struct{}{}, reversion{main: 1}: struct{}{},
2: struct{}{},
3: struct{}{},
4: struct{}{},
5: struct{}{},
6: struct{}{},
} }
ga := index.Compact(1) ga := index.Compact(1)
if !reflect.DeepEqual(ga, wa) { if !reflect.DeepEqual(ga, wa) {
@ -66,72 +61,96 @@ func TestContinuousCompact(t *testing.T) {
} }
verify(t, index, tests) verify(t, index, tests)
wa = map[reversion]struct{}{
reversion{main: 1}: struct{}{},
reversion{main: 2}: struct{}{},
}
ga = index.Compact(2) ga = index.Compact(2)
if !reflect.DeepEqual(ga, wa) { if !reflect.DeepEqual(ga, wa) {
t.Errorf("a = %v, want %v", ga, wa) t.Errorf("a = %v, want %v", ga, wa)
} }
verify(t, index, tests) verify(t, index, tests)
wa = map[reversion]struct{}{
reversion{main: 1}: struct{}{},
reversion{main: 2}: struct{}{},
reversion{main: 3}: struct{}{},
}
ga = index.Compact(3) ga = index.Compact(3)
if !reflect.DeepEqual(ga, wa) { if !reflect.DeepEqual(ga, wa) {
t.Errorf("a = %v, want %v", ga, wa) t.Errorf("a = %v, want %v", ga, wa)
} }
verify(t, index, tests) verify(t, index, tests)
wa = map[reversion]struct{}{
reversion{main: 1}: struct{}{},
reversion{main: 2}: struct{}{},
reversion{main: 4}: struct{}{},
}
ga = index.Compact(4) ga = index.Compact(4)
delete(wa, 3) delete(wa, reversion{main: 3})
tests[12] = T{[]byte("foo2"), 3, ErrIndexNotFound, 0} tests[12] = T{[]byte("foo2"), 3, ErrReversionNotFound, 0}
if !reflect.DeepEqual(wa, ga) { if !reflect.DeepEqual(wa, ga) {
t.Errorf("a = %v, want %v", ga, wa) t.Errorf("a = %v, want %v", ga, wa)
} }
verify(t, index, tests) verify(t, index, tests)
wa = map[reversion]struct{}{
reversion{main: 2}: struct{}{},
reversion{main: 4}: struct{}{},
reversion{main: 5}: struct{}{},
}
ga = index.Compact(5) ga = index.Compact(5)
delete(wa, 1) delete(wa, reversion{main: 1})
if !reflect.DeepEqual(ga, wa) { if !reflect.DeepEqual(ga, wa) {
t.Errorf("a = %v, want %v", ga, wa) t.Errorf("a = %v, want %v", ga, wa)
} }
tests[1] = T{[]byte("foo"), 1, ErrIndexNotFound, 0} tests[1] = T{[]byte("foo"), 1, ErrReversionNotFound, 0}
tests[2] = T{[]byte("foo"), 3, ErrIndexNotFound, 0} tests[2] = T{[]byte("foo"), 3, ErrReversionNotFound, 0}
verify(t, index, tests) verify(t, index, tests)
wa = map[reversion]struct{}{
reversion{main: 4}: struct{}{},
reversion{main: 5}: struct{}{},
reversion{main: 6}: struct{}{},
}
ga = index.Compact(6) ga = index.Compact(6)
delete(wa, 2) delete(wa, reversion{main: 2})
if !reflect.DeepEqual(ga, wa) { if !reflect.DeepEqual(ga, wa) {
t.Errorf("a = %v, want %v", ga, wa) t.Errorf("a = %v, want %v", ga, wa)
} }
tests[7] = T{[]byte("foo1"), 2, ErrIndexNotFound, 0} tests[7] = T{[]byte("foo1"), 2, ErrReversionNotFound, 0}
tests[8] = T{[]byte("foo1"), 5, ErrIndexNotFound, 0} tests[8] = T{[]byte("foo1"), 5, ErrReversionNotFound, 0}
verify(t, index, tests) verify(t, index, tests)
} }
func verify(t *testing.T, index index, tests []T) { func verify(t *testing.T, index index, tests []T) {
for i, tt := range tests { for i, tt := range tests {
h, err := index.Get(tt.key, tt.index) h, err := index.Get(tt.key, tt.rev)
if err != tt.werr { if err != tt.werr {
t.Errorf("#%d: err = %v, want %v", i, err, tt.werr) t.Errorf("#%d: err = %v, want %v", i, err, tt.werr)
} }
if h != tt.windex { if h.main != tt.wrev {
t.Errorf("#%d: index = %d, want %d", i, h, tt.windex) t.Errorf("#%d: rev = %d, want %d", i, h.main, tt.wrev)
} }
} }
} }
type T struct { type T struct {
key []byte key []byte
index uint64 rev int64
werr error werr error
windex uint64 wrev int64
} }
func newTestTreeIndex() index { func newTestTreeIndex() index {
index := newTreeIndex() index := newTreeIndex()
index.Put([]byte("foo"), 1) index.Put([]byte("foo"), reversion{main: 1})
index.Put([]byte("foo1"), 2) index.Put([]byte("foo1"), reversion{main: 2})
index.Put([]byte("foo2"), 3) index.Put([]byte("foo2"), reversion{main: 3})
index.Put([]byte("foo2"), 4) index.Put([]byte("foo2"), reversion{main: 4})
index.Put([]byte("foo"), 5) index.Put([]byte("foo"), reversion{main: 5})
index.Put([]byte("foo1"), 6) index.Put([]byte("foo1"), reversion{main: 6})
return index return index
} }

View File

@ -3,33 +3,34 @@ package storage
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"log" "log"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/google/btree" "github.com/coreos/etcd/Godeps/_workspace/src/github.com/google/btree"
) )
var ( var (
ErrIndexNotFound = errors.New("index: not found") ErrReversionNotFound = errors.New("stroage: reversion not found")
) )
// keyIndex stores the index of an key in the backend. // keyIndex stores the reversion of an key in the backend.
// Each keyIndex has at least one key generation. // Each keyIndex has at least one key generation.
// Each generation might have several key versions. // Each generation might have several key versions.
// Tombstone on a key appends an tombstone version at the end // Tombstone on a key appends an tombstone version at the end
// of the current generation and creates a new empty generation. // of the current generation and creates a new empty generation.
// Each version of a key has an index pointing to the backend. // Each version of a key has an index pointing to the backend.
// //
// For example: put(1);put(2);tombstone(3);put(4);tombstone(5) on key "foo" // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
// generate a keyIndex: // generate a keyIndex:
// key: "foo" // key: "foo"
// index: 5 // rev: 5
// generations: // generations:
// {empty} // {empty}
// {4, 5(t)} // {4.0, 5.0(t)}
// {1, 2, 3(t)} // {1.0, 2.0, 3.0(t)}
// //
// Compact a keyIndex removes the versions with smaller or equal to // Compact a keyIndex removes the versions with smaller or equal to
// index except the largest one. If the generations becomes empty // rev except the largest one. If the generations becomes empty
// during compaction, it will be removed. if all the generations get // during compaction, it will be removed. if all the generations get
// removed, the keyIndex Should be removed. // removed, the keyIndex Should be removed.
@ -37,115 +38,125 @@ var (
// compact(2) on the previous example // compact(2) on the previous example
// generations: // generations:
// {empty} // {empty}
// {4, 5(t)} // {4.0, 5.0(t)}
// {2, 3(t)} // {2.0, 3.0(t)}
// //
// compact(4) // compact(4)
// generations: // generations:
// {empty} // {empty}
// {4, 5(t)} // {4.0, 5.0(t)}
// //
// compact(5): // compact(5):
// generations: // generations:
// {empty} // {empty} -> key SHOULD be removed.
// {5(t)}
// //
// compact(6): // compact(6):
// generations: // generations:
// {empty} -> key SHOULD be removed. // {empty} -> key SHOULD be removed.
type keyIndex struct { type keyIndex struct {
key []byte key []byte
index uint64 rev int64
generations []generation generations []generation
} }
// put puts an index to the keyIndex. // put puts a reversion to the keyIndex.
func (ki *keyIndex) put(index uint64) { func (ki *keyIndex) put(rev int64, subrev int64) {
if index < ki.index { if rev < ki.rev {
log.Panicf("store.keyindex: put with unexpected smaller index [%d / %d]", index, ki.index) log.Panicf("store.keyindex: put with unexpected smaller reversion [%d / %d]", rev, ki.rev)
} }
if len(ki.generations) == 0 { if len(ki.generations) == 0 {
ki.generations = append(ki.generations, generation{}) ki.generations = append(ki.generations, generation{})
} }
g := &ki.generations[len(ki.generations)-1] g := &ki.generations[len(ki.generations)-1]
g.cont = append(g.cont, index) g.revs = append(g.revs, reversion{rev, subrev})
g.ver++ g.ver++
ki.index = index ki.rev = rev
} }
// tombstone puts an index, pointing to a tombstone, to the keyIndex. // tombstone puts a reversion, pointing to a tombstone, to the keyIndex.
// It also creates a new empty generation in the keyIndex. // It also creates a new empty generation in the keyIndex.
func (ki *keyIndex) tombstone(index uint64) { func (ki *keyIndex) tombstone(rev int64, subrev int64) {
if ki.isEmpty() { if ki.isEmpty() {
log.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key)) log.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
} }
ki.put(index) ki.put(rev, subrev)
ki.generations = append(ki.generations, generation{}) ki.generations = append(ki.generations, generation{})
} }
// get gets the index of thk that satisfies the given atIndex. // get gets the reversion of the key that satisfies the given atRev.
// Index must be lower or equal to the given atIndex. // Rev must be higher than or equal to the given atRev.
func (ki *keyIndex) get(atIndex uint64) (index uint64, err error) { func (ki *keyIndex) get(atRev int64) (rev reversion, err error) {
if ki.isEmpty() { if ki.isEmpty() {
log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key)) log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
} }
g := ki.findGeneration(atIndex) g := ki.findGeneration(atRev)
if g.isEmpty() { if g.isEmpty() {
return 0, ErrIndexNotFound return reversion{}, ErrReversionNotFound
} }
f := func(index, ver uint64) bool { f := func(rev reversion) bool {
if index <= atIndex { if rev.main <= atRev {
return false return false
} }
return true return true
} }
_, n := g.walk(f) n := g.walk(f)
if n != -1 { if n != -1 {
return g.cont[n], nil return g.revs[n], nil
} }
return 0, ErrIndexNotFound
return reversion{}, ErrReversionNotFound
} }
// compact compacts a keyIndex by removing the versions with smaller or equal // compact compacts a keyIndex by removing the versions with smaller or equal
// index than the given atIndex except the largest one. // reversion than the given atRev except the largest one (If the largest one is
// a tombstone, it will not be kept).
// If a generation becomes empty during compaction, it will be removed. // If a generation becomes empty during compaction, it will be removed.
func (ki *keyIndex) compact(atIndex uint64, available map[uint64]struct{}) { func (ki *keyIndex) compact(atRev int64, available map[reversion]struct{}) {
if ki.isEmpty() { if ki.isEmpty() {
log.Panic("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key)) log.Panic("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
} }
// walk until reaching the first content that has an index smaller or equal to
// the atIndex. // walk until reaching the first reversion that has an reversion smaller or equal to
// add all the reached indexes into available map. // the atReversion.
f := func(index, _ uint64) bool { // add it to the available map
available[index] = struct{}{} f := func(rev reversion) bool {
if index <= atIndex { if rev.main <= atRev {
available[rev] = struct{}{}
return false return false
} }
return true return true
} }
g := ki.findGeneration(atIndex) g := ki.findGeneration(atRev)
i := len(ki.generations) - 1 if g == nil {
for i >= 0 { return
}
i := 0
for i <= len(ki.generations)-1 {
wg := &ki.generations[i] wg := &ki.generations[i]
if wg == g { if wg == g {
break break
} }
wg.walk(f) i++
i--
} }
_, n := g.walk(f) if !g.isEmpty() {
n := g.walk(f)
// remove the previous contents. // remove the previous contents.
if n != -1 { if n != -1 {
g.cont = g.cont[n:] g.revs = g.revs[n:]
}
// remove any tombstone
if len(g.revs) == 1 && i != len(ki.generations)-1 {
delete(available, g.revs[0])
i++
}
} }
// remove the previous generations. // remove the previous generations.
ki.generations = ki.generations[i:] ki.generations = ki.generations[i:]
return return
} }
@ -155,51 +166,58 @@ func (ki *keyIndex) isEmpty() bool {
// findGeneartion finds out the generation of the keyIndex that the // findGeneartion finds out the generation of the keyIndex that the
// given index belongs to. // given index belongs to.
func (ki *keyIndex) findGeneration(index uint64) *generation { func (ki *keyIndex) findGeneration(rev int64) *generation {
g, youngerg := len(ki.generations)-1, len(ki.generations)-2 cg := len(ki.generations) - 1
// If the head index of a younger generation is smaller than for cg >= 0 {
// the given index, the index cannot be in the younger if len(ki.generations[cg].revs) == 0 {
// generation. cg--
for youngerg >= 0 && ki.generations[youngerg].cont != nil { continue
yg := ki.generations[youngerg]
if yg.cont[len(yg.cont)-1] < index {
break
} }
g-- g := ki.generations[cg]
youngerg-- if g.revs[0].main <= rev {
return &ki.generations[cg]
}
cg--
} }
if g < 0 { return nil
return nil
}
return &ki.generations[g]
} }
func (a *keyIndex) Less(b btree.Item) bool { func (a *keyIndex) Less(b btree.Item) bool {
return bytes.Compare(a.key, b.(*keyIndex).key) == -1 return bytes.Compare(a.key, b.(*keyIndex).key) == -1
} }
type generation struct { func (ki *keyIndex) String() string {
ver uint64 var s string
cont []uint64 for _, g := range ki.generations {
} s += g.String()
func (g *generation) isEmpty() bool { return len(g.cont) == 0 }
// walk walks through the (index, version) pairs in the generation in ascending order.
// It passes the (index, version) to the given function.
// walk returns until: 1. it finishs walking all pairs 2. the function returns false.
// walk returns the (index, version) pair at where it stopped. If it stopped after
// finishing walking, (0, -1) will be returned.
func (g *generation) walk(f func(index, ver uint64) bool) (uint64, int) {
ver := g.ver
l := len(g.cont)
for i := range g.cont {
ok := f(g.cont[l-i-1], ver)
if !ok {
return ver, l - i - 1
}
ver--
} }
return 0, -1 return s
}
type generation struct {
ver int64
revs []reversion
}
func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
// walk walks through the reversions in the generation in ascending order.
// It passes the revision to the given function.
// walk returns until: 1. it finishs walking all pairs 2. the function returns false.
// walk returns the position at where it stopped. If it stopped after
// finishing walking, -1 will be returned.
func (g *generation) walk(f func(rev reversion) bool) int {
l := len(g.revs)
for i := range g.revs {
ok := f(g.revs[l-i-1])
if !ok {
return l - i - 1
}
}
return -1
}
func (g *generation) String() string {
return fmt.Sprintf("g: ver[%d], revs %#v\n", g.ver, g.revs)
} }

View File

@ -7,23 +7,22 @@ import (
func TestKeyIndexGet(t *testing.T) { func TestKeyIndexGet(t *testing.T) {
// key: "foo" // key: "foo"
// index: 12 // rev: 12
// generations: // generations:
// {empty} // {empty}
// {8[1], 10[2], 12(t)[3]} // {8[1], 10[2], 12(t)[3]}
// {4[2], 6(t)[3]} // {4[2], 6(t)[3]}
ki := newTestKeyIndex() ki := newTestKeyIndex()
ki.compact(4, make(map[uint64]struct{})) ki.compact(4, make(map[reversion]struct{}))
tests := []struct { tests := []struct {
index uint64 rev int64
windex uint64 wrev int64
werr error werr error
}{ }{
// expected not exist on an index that is greater than the last tombstone {13, 12, nil},
{13, 0, ErrIndexNotFound}, {13, 12, nil},
{13, 0, ErrIndexNotFound},
// get on generation 2 // get on generation 2
{12, 12, nil}, {12, 12, nil},
@ -31,7 +30,7 @@ func TestKeyIndexGet(t *testing.T) {
{10, 10, nil}, {10, 10, nil},
{9, 8, nil}, {9, 8, nil},
{8, 8, nil}, {8, 8, nil},
{7, 0, ErrIndexNotFound}, {7, 6, nil},
// get on generation 1 // get on generation 1
{6, 6, nil}, {6, 6, nil},
@ -40,35 +39,35 @@ func TestKeyIndexGet(t *testing.T) {
} }
for i, tt := range tests { for i, tt := range tests {
index, err := ki.get(tt.index) rev, err := ki.get(tt.rev)
if err != tt.werr { if err != tt.werr {
t.Errorf("#%d: err = %v, want %v", i, err, tt.werr) t.Errorf("#%d: err = %v, want %v", i, err, tt.werr)
} }
if index != tt.windex { if rev.main != tt.wrev {
t.Errorf("#%d: index = %d, want %d", i, index, tt.index) t.Errorf("#%d: rev = %d, want %d", i, rev.main, tt.rev)
} }
} }
} }
func TestKeyIndexPut(t *testing.T) { func TestKeyIndexPut(t *testing.T) {
ki := &keyIndex{key: []byte("foo")} ki := &keyIndex{key: []byte("foo")}
ki.put(5) ki.put(5, 0)
wki := &keyIndex{ wki := &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 5, rev: 5,
generations: []generation{{ver: 1, cont: []uint64{5}}}, generations: []generation{{ver: 1, revs: []reversion{{main: 5}}}},
} }
if !reflect.DeepEqual(ki, wki) { if !reflect.DeepEqual(ki, wki) {
t.Errorf("ki = %+v, want %+v", ki, wki) t.Errorf("ki = %+v, want %+v", ki, wki)
} }
ki.put(7) ki.put(7, 0)
wki = &keyIndex{ wki = &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 7, rev: 7,
generations: []generation{{ver: 2, cont: []uint64{5, 7}}}, generations: []generation{{ver: 2, revs: []reversion{{main: 5}, {main: 7}}}},
} }
if !reflect.DeepEqual(ki, wki) { if !reflect.DeepEqual(ki, wki) {
t.Errorf("ki = %+v, want %+v", ki, wki) t.Errorf("ki = %+v, want %+v", ki, wki)
@ -77,27 +76,31 @@ func TestKeyIndexPut(t *testing.T) {
func TestKeyIndexTombstone(t *testing.T) { func TestKeyIndexTombstone(t *testing.T) {
ki := &keyIndex{key: []byte("foo")} ki := &keyIndex{key: []byte("foo")}
ki.put(5) ki.put(5, 0)
ki.tombstone(7) ki.tombstone(7, 0)
wki := &keyIndex{ wki := &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 7, rev: 7,
generations: []generation{{ver: 2, cont: []uint64{5, 7}}, {}}, generations: []generation{{ver: 2, revs: []reversion{{main: 5}, {main: 7}}}, {}},
} }
if !reflect.DeepEqual(ki, wki) { if !reflect.DeepEqual(ki, wki) {
t.Errorf("ki = %+v, want %+v", ki, wki) t.Errorf("ki = %+v, want %+v", ki, wki)
} }
ki.put(8) ki.put(8, 0)
ki.put(9) ki.put(9, 0)
ki.tombstone(15) ki.tombstone(15, 0)
wki = &keyIndex{ wki = &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 15, rev: 15,
generations: []generation{{ver: 2, cont: []uint64{5, 7}}, {ver: 3, cont: []uint64{8, 9, 15}}, {}}, generations: []generation{
{ver: 2, revs: []reversion{{main: 5}, {main: 7}}},
{ver: 3, revs: []reversion{{main: 8}, {main: 9}, {main: 15}}},
{},
},
} }
if !reflect.DeepEqual(ki, wki) { if !reflect.DeepEqual(ki, wki) {
t.Errorf("ki = %+v, want %+v", ki, wki) t.Errorf("ki = %+v, want %+v", ki, wki)
@ -106,221 +109,192 @@ func TestKeyIndexTombstone(t *testing.T) {
func TestKeyIndexCompact(t *testing.T) { func TestKeyIndexCompact(t *testing.T) {
tests := []struct { tests := []struct {
compact uint64 compact int64
wki *keyIndex wki *keyIndex
wam map[uint64]struct{} wam map[reversion]struct{}
}{ }{
{ {
1, 1,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{2, 4, 6}}, {ver: 3, revs: []reversion{{main: 2}, {main: 4}, {main: 6}}},
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{},
2: struct{}{}, 4: struct{}{}, 6: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
},
}, },
{ {
2, 2,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{2, 4, 6}}, {ver: 3, revs: []reversion{{main: 2}, {main: 4}, {main: 6}}},
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
2: struct{}{}, 4: struct{}{}, 6: struct{}{}, reversion{main: 2}: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
}, },
}, },
{ {
3, 3,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{2, 4, 6}}, {ver: 3, revs: []reversion{{main: 2}, {main: 4}, {main: 6}}},
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
2: struct{}{}, 4: struct{}{}, 6: struct{}{}, reversion{main: 2}: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
}, },
}, },
{ {
4, 4,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{4, 6}}, {ver: 3, revs: []reversion{{main: 4}, {main: 6}}},
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
4: struct{}{}, 6: struct{}{}, reversion{main: 4}: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
}, },
}, },
{ {
5, 5,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{4, 6}}, {ver: 3, revs: []reversion{{main: 4}, {main: 6}}},
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
4: struct{}{}, 6: struct{}{}, reversion{main: 4}: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
}, },
}, },
{ {
6, 6,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{6}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{ver: 3, cont: []uint64{8, 10, 12}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{},
6: struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
},
}, },
{ {
7, 7,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{},
8: struct{}{}, 10: struct{}{}, 12: struct{}{},
},
}, },
{ {
8, 8,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
8: struct{}{}, 10: struct{}{}, 12: struct{}{}, reversion{main: 8}: struct{}{},
}, },
}, },
{ {
9, 9,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{8, 10, 12}}, {ver: 3, revs: []reversion{{main: 8}, {main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
8: struct{}{}, 10: struct{}{}, 12: struct{}{}, reversion{main: 8}: struct{}{},
}, },
}, },
{ {
10, 10,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{10, 12}}, {ver: 3, revs: []reversion{{main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
10: struct{}{}, 12: struct{}{}, reversion{main: 10}: struct{}{},
}, },
}, },
{ {
11, 11,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{
{ver: 3, cont: []uint64{10, 12}}, {ver: 3, revs: []reversion{{main: 10}, {main: 12}}},
{}, {},
}, },
}, },
map[uint64]struct{}{ map[reversion]struct{}{
10: struct{}{}, 12: struct{}{}, reversion{main: 10}: struct{}{},
}, },
}, },
{ {
12, 12,
&keyIndex{ &keyIndex{
key: []byte("foo"), key: []byte("foo"),
index: 12, rev: 12,
generations: []generation{ generations: []generation{{}},
{ver: 3, cont: []uint64{12}},
{},
},
}, },
map[uint64]struct{}{ map[reversion]struct{}{},
12: struct{}{},
},
},
{
13,
&keyIndex{
key: []byte("foo"),
index: 12,
generations: []generation{
{},
},
},
map[uint64]struct{}{},
}, },
} }
// Continous Compaction // Continous Compaction
ki := newTestKeyIndex() ki := newTestKeyIndex()
for i, tt := range tests { for i, tt := range tests {
am := make(map[uint64]struct{}) am := make(map[reversion]struct{})
ki.compact(tt.compact, am) ki.compact(tt.compact, am)
if !reflect.DeepEqual(ki, tt.wki) { if !reflect.DeepEqual(ki, tt.wki) {
t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki) t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki)
} }
if !reflect.DeepEqual(am, tt.wam) { if !reflect.DeepEqual(am, tt.wam) {
t.Errorf("#%d: am = %+v, want %+v", am, tt.wam) t.Errorf("#%d: am = %+v, want %+v", i, am, tt.wam)
} }
} }
// Jump Compaction // Jump Compaction
for i, tt := range tests { for i, tt := range tests {
if (i%2 == 0 && i < 6) && (i%2 == 1 && i > 6) { if (i%2 == 0 && i < 6) && (i%2 == 1 && i > 6) {
am := make(map[uint64]struct{}) am := make(map[reversion]struct{})
ki.compact(tt.compact, am) ki.compact(tt.compact, am)
if !reflect.DeepEqual(ki, tt.wki) { if !reflect.DeepEqual(ki, tt.wki) {
t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki) t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki)
@ -334,31 +308,31 @@ func TestKeyIndexCompact(t *testing.T) {
// OnceCompaction // OnceCompaction
for i, tt := range tests { for i, tt := range tests {
ki := newTestKeyIndex() ki := newTestKeyIndex()
am := make(map[uint64]struct{}) am := make(map[reversion]struct{})
ki.compact(tt.compact, am) ki.compact(tt.compact, am)
if !reflect.DeepEqual(ki, tt.wki) { if !reflect.DeepEqual(ki, tt.wki) {
t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki) t.Errorf("#%d: ki = %+v, want %+v", i, ki, tt.wki)
} }
if !reflect.DeepEqual(am, tt.wam) { if !reflect.DeepEqual(am, tt.wam) {
t.Errorf("#%d: am = %+v, want %+v", am, tt.wam) t.Errorf("#%d: am = %+v, want %+v", i, am, tt.wam)
} }
} }
} }
func newTestKeyIndex() *keyIndex { func newTestKeyIndex() *keyIndex {
// key: "foo" // key: "foo"
// index: 12 // rev: 12
// generations: // generations:
// {empty} // {empty}
// {8[1], 10[2], 12(t)[3]} // {8[1], 10[2], 12(t)[3]}
// {2[1], 4[2], 6(t)[3]} // {2[1], 4[2], 6(t)[3]}
ki := &keyIndex{key: []byte("foo")} ki := &keyIndex{key: []byte("foo")}
ki.put(2) ki.put(2, 0)
ki.put(4) ki.put(4, 0)
ki.tombstone(6) ki.tombstone(6, 0)
ki.put(8) ki.put(8, 0)
ki.put(10) ki.put(10, 0)
ki.tombstone(12) ki.tombstone(12, 0)
return ki return ki
} }

View File

@ -3,33 +3,36 @@ package storage
import "github.com/coreos/etcd/storage/storagepb" import "github.com/coreos/etcd/storage/storagepb"
type KV interface { type KV interface {
// Range gets the keys in the range at rangeIndex. // Range gets the keys in the range at rangeRev.
// If rangeIndex <=0, range gets the keys at currentIndex. // If rangeRev <=0, range gets the keys at currentRev.
// If `end` is nil, the request returns the key. // If `end` is nil, the request returns the key.
// If `end` is not nil, it gets the keys in range [key, range_end). // If `end` is not nil, it gets the keys in range [key, range_end).
// Limit limits the number of keys returned. // Limit limits the number of keys returned.
Range(key, end []byte, limit, rangeIndex int64) (kvs []storagepb.KeyValue, index int64) // If the required rev is compacted, ErrCompacted will be returned.
Range(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error)
// Put puts the given key,value into the store. // Put puts the given key,value into the store.
// A put also increases the index of the store, and generates one event in the event history. // A put also increases the rev of the store, and generates one event in the event history.
Put(key, value []byte) (index int64) Put(key, value []byte) (rev int64)
// DeleteRange deletes the given range from the store. // DeleteRange deletes the given range from the store.
// A deleteRange increases the index of the store if any key in the range exists. // A deleteRange increases the rev of the store if any key in the range exists.
// The number of key deleted will be returned. // The number of key deleted will be returned.
// It also generates one event for each key delete in the event history. // It also generates one event for each key delete in the event history.
// if the `end` is nil, deleteRange deletes the key. // if the `end` is nil, deleteRange deletes the key.
// if the `end` is not nil, deleteRange deletes the keys in range [key, range_end). // if the `end` is not nil, deleteRange deletes the keys in range [key, range_end).
DeleteRange(key, end []byte) (n, index int64) DeleteRange(key, end []byte) (n, rev int64)
// TnxBegin begins a tnx. Only Tnx prefixed operation can be executed, others will be blocked // TnxBegin begins a tnx. Only Tnx prefixed operation can be executed, others will be blocked
// until tnx ends. Only one on-going tnx is allowed. // until tnx ends. Only one on-going tnx is allowed.
// TnxBegin returns an int64 tnx ID. // TnxBegin returns an int64 tnx ID.
// All tnx prefixed operations with same tnx ID will be done with the same index. // All tnx prefixed operations with same tnx ID will be done with the same rev.
TnxBegin() int64 TnxBegin() int64
// TnxEnd ends the on-going tnx with tnx ID. If the on-going tnx ID is not matched, error is returned. // TnxEnd ends the on-going tnx with tnx ID. If the on-going tnx ID is not matched, error is returned.
TnxEnd(tnxID int64) error TnxEnd(tnxID int64) error
TnxRange(tnxID int64, key, end []byte, limit, rangeIndex int64) (kvs []storagepb.KeyValue, index int64, err error) TnxRange(tnxID int64, key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error)
TnxPut(tnxID int64, key, value []byte) (index int64, err error) TnxPut(tnxID int64, key, value []byte) (rev int64, err error)
TnxDeleteRange(tnxID int64, key, end []byte) (n, index int64, err error) TnxDeleteRange(tnxID int64, key, end []byte) (n, rev int64, err error)
Compact(rev int64) error
} }

View File

@ -1,8 +1,6 @@
package storage package storage
import ( import (
"bytes"
"encoding/binary"
"errors" "errors"
"log" "log"
"math/rand" "math/rand"
@ -18,7 +16,11 @@ var (
batchInterval = 100 * time.Millisecond batchInterval = 100 * time.Millisecond
keyBucketName = []byte("key") keyBucketName = []byte("key")
scheduledCompactKeyName = []byte("scheduledCompactRev")
finishedCompactKeyName = []byte("finishedCompactRev")
ErrTnxIDMismatch = errors.New("storage: tnx id mismatch") ErrTnxIDMismatch = errors.New("storage: tnx id mismatch")
ErrCompacted = errors.New("storage: required reversion has been compacted")
) )
type store struct { type store struct {
@ -27,8 +29,9 @@ type store struct {
b backend.Backend b backend.Backend
kvindex index kvindex index
currentIndex uint64 currentRev reversion
subIndex uint32 // tracks next subIndex to put into backend // the main reversion of the last compaction
compactMainRev int64
tmu sync.Mutex // protect the tnxID field tmu sync.Mutex // protect the tnxID field
tnxID int64 // tracks the current tnxID to verify tnx operations tnxID int64 // tracks the current tnxID to verify tnx operations
@ -36,9 +39,10 @@ type store struct {
func newStore(path string) KV { func newStore(path string) KV {
s := &store{ s := &store{
b: backend.New(path, batchInterval, batchLimit), b: backend.New(path, batchInterval, batchLimit),
kvindex: newTreeIndex(), kvindex: newTreeIndex(),
currentIndex: 0, currentRev: reversion{},
compactMainRev: -1,
} }
tx := s.b.BatchTx() tx := s.b.BatchTx()
@ -52,31 +56,31 @@ func newStore(path string) KV {
func (s *store) Put(key, value []byte) int64 { func (s *store) Put(key, value []byte) int64 {
id := s.TnxBegin() id := s.TnxBegin()
s.put(key, value, s.currentIndex+1) s.put(key, value, s.currentRev.main+1)
s.TnxEnd(id) s.TnxEnd(id)
return int64(s.currentIndex) return int64(s.currentRev.main)
} }
func (s *store) Range(key, end []byte, limit, rangeIndex int64) (kvs []storagepb.KeyValue, index int64) { func (s *store) Range(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error) {
id := s.TnxBegin() id := s.TnxBegin()
kvs, index = s.rangeKeys(key, end, limit, rangeIndex) kvs, rev, err = s.rangeKeys(key, end, limit, rangeRev)
s.TnxEnd(id) s.TnxEnd(id)
return kvs, index return kvs, rev, err
} }
func (s *store) DeleteRange(key, end []byte) (n, index int64) { func (s *store) DeleteRange(key, end []byte) (n, rev int64) {
id := s.TnxBegin() id := s.TnxBegin()
n = s.deleteRange(key, end, s.currentIndex+1) n = s.deleteRange(key, end, s.currentRev.main+1)
s.TnxEnd(id) s.TnxEnd(id)
return n, int64(s.currentIndex) return n, int64(s.currentRev.main)
} }
func (s *store) TnxBegin() int64 { func (s *store) TnxBegin() int64 {
s.mu.Lock() s.mu.Lock()
s.subIndex = 0 s.currentRev.sub = 0
s.tmu.Lock() s.tmu.Lock()
defer s.tmu.Unlock() defer s.tmu.Unlock()
@ -91,111 +95,119 @@ func (s *store) TnxEnd(tnxID int64) error {
return ErrTnxIDMismatch return ErrTnxIDMismatch
} }
if s.subIndex != 0 { if s.currentRev.sub != 0 {
s.currentIndex += 1 s.currentRev.main += 1
} }
s.subIndex = 0 s.currentRev.sub = 0
s.mu.Unlock() s.mu.Unlock()
return nil return nil
} }
func (s *store) TnxRange(tnxID int64, key, end []byte, limit, rangeIndex int64) (kvs []storagepb.KeyValue, index int64, err error) { func (s *store) TnxRange(tnxID int64, key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error) {
s.tmu.Lock() s.tmu.Lock()
defer s.tmu.Unlock() defer s.tmu.Unlock()
if tnxID != s.tnxID { if tnxID != s.tnxID {
return nil, 0, ErrTnxIDMismatch return nil, 0, ErrTnxIDMismatch
} }
kvs, index = s.rangeKeys(key, end, limit, rangeIndex) return s.rangeKeys(key, end, limit, rangeRev)
return kvs, index, nil
} }
func (s *store) TnxPut(tnxID int64, key, value []byte) (index int64, err error) { func (s *store) TnxPut(tnxID int64, key, value []byte) (rev int64, err error) {
s.tmu.Lock() s.tmu.Lock()
defer s.tmu.Unlock() defer s.tmu.Unlock()
if tnxID != s.tnxID { if tnxID != s.tnxID {
return 0, ErrTnxIDMismatch return 0, ErrTnxIDMismatch
} }
s.put(key, value, s.currentIndex+1) s.put(key, value, s.currentRev.main+1)
return int64(s.currentIndex + 1), nil return int64(s.currentRev.main + 1), nil
} }
func (s *store) TnxDeleteRange(tnxID int64, key, end []byte) (n, index int64, err error) { func (s *store) TnxDeleteRange(tnxID int64, key, end []byte) (n, rev int64, err error) {
s.tmu.Lock() s.tmu.Lock()
defer s.tmu.Unlock() defer s.tmu.Unlock()
if tnxID != s.tnxID { if tnxID != s.tnxID {
return 0, 0, ErrTnxIDMismatch return 0, 0, ErrTnxIDMismatch
} }
n = s.deleteRange(key, end, s.currentIndex+1) n = s.deleteRange(key, end, s.currentRev.main+1)
if n != 0 || s.subIndex != 0 { if n != 0 || s.currentRev.sub != 0 {
index = int64(s.currentIndex + 1) rev = int64(s.currentRev.main + 1)
} }
return n, index, nil return n, rev, nil
}
func (s *store) Compact(rev int64) error {
s.mu.Lock()
defer s.mu.Unlock()
if rev <= s.compactMainRev {
return ErrCompacted
}
s.compactMainRev = rev
rbytes := make([]byte, 8+1+8)
revToBytes(reversion{main: rev}, rbytes)
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafePut(keyBucketName, scheduledCompactKeyName, rbytes)
tx.Unlock()
keep := s.kvindex.Compact(rev)
go s.scheduleCompaction(rev, keep)
return nil
} }
// range is a keyword in Go, add Keys suffix. // range is a keyword in Go, add Keys suffix.
func (s *store) rangeKeys(key, end []byte, limit, rangeIndex int64) (kvs []storagepb.KeyValue, index int64) { func (s *store) rangeKeys(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error) {
if rangeIndex <= 0 { if rangeRev <= 0 {
index = int64(s.currentIndex) rev = int64(s.currentRev.main)
if s.subIndex > 0 { if s.currentRev.sub > 0 {
index += 1 rev += 1
} }
} else { } else {
index = rangeIndex rev = rangeRev
}
if rev <= s.compactMainRev {
return nil, 0, ErrCompacted
} }
pairs := s.kvindex.Range(key, end, uint64(index)) _, revpairs := s.kvindex.Range(key, end, int64(rev))
if len(pairs) == 0 { if len(revpairs) == 0 {
return nil, index return nil, rev, nil
} }
if limit > 0 && len(pairs) > int(limit) { if limit > 0 && len(revpairs) > int(limit) {
pairs = pairs[:limit] revpairs = revpairs[:limit]
} }
tx := s.b.BatchTx() tx := s.b.BatchTx()
tx.Lock() tx.Lock()
defer tx.Unlock() defer tx.Unlock()
for _, revpair := range revpairs {
revbytes := make([]byte, 8+1+8)
revToBytes(revpair, revbytes)
for _, pair := range pairs { _, vs := tx.UnsafeRange(keyBucketName, revbytes, nil, 0)
ibytes := make([]byte, 8) if len(vs) != 1 {
endbytes := make([]byte, 8) log.Fatalf("storage: range cannot find rev (%d,%d)", revpair.main, revpair.sub)
binary.BigEndian.PutUint64(ibytes, pair.index)
binary.BigEndian.PutUint64(endbytes, pair.index+1)
found := false
var kv *storagepb.KeyValue
vs := tx.UnsafeRange(keyBucketName, ibytes, endbytes, 0)
for _, v := range vs {
var e storagepb.Event
err := e.Unmarshal(v)
if err != nil {
log.Fatalf("storage: range cannot unmarshal event: %v", err)
}
if bytes.Equal(e.Kv.Key, pair.key) {
if e.Type == storagepb.PUT {
kv = &e.Kv
} else {
kv = nil
}
found = true
}
} }
if !found { e := &storagepb.Event{}
log.Fatalf("storage: range cannot find key %s at index %d", string(pair.key), pair.index) if err := e.Unmarshal(vs[0]); err != nil {
log.Fatalf("storage: cannot unmarshal event: %v", err)
} }
if kv != nil { if e.Type == storagepb.PUT {
kvs = append(kvs, *kv) kvs = append(kvs, e.Kv)
} }
} }
return kvs, index return kvs, rev, nil
} }
func (s *store) put(key, value []byte, index uint64) { func (s *store) put(key, value []byte, rev int64) {
ibytes := make([]byte, 8+1+4) ibytes := make([]byte, 8+1+8)
indexToBytes(index, s.subIndex, ibytes) revToBytes(reversion{main: rev, sub: s.currentRev.sub}, ibytes)
event := storagepb.Event{ event := storagepb.Event{
Type: storagepb.PUT, Type: storagepb.PUT,
@ -214,24 +226,24 @@ func (s *store) put(key, value []byte, index uint64) {
tx.Lock() tx.Lock()
defer tx.Unlock() defer tx.Unlock()
tx.UnsafePut(keyBucketName, ibytes, d) tx.UnsafePut(keyBucketName, ibytes, d)
s.kvindex.Put(key, index) s.kvindex.Put(key, reversion{main: rev, sub: s.currentRev.sub})
s.subIndex += 1 s.currentRev.sub += 1
} }
func (s *store) deleteRange(key, end []byte, index uint64) int64 { func (s *store) deleteRange(key, end []byte, rev int64) int64 {
var n int64 var n int64
rindex := index rrev := rev
if s.subIndex > 0 { if s.currentRev.sub > 0 {
rindex += 1 rrev += 1
} }
pairs := s.kvindex.Range(key, end, rindex) keys, _ := s.kvindex.Range(key, end, rrev)
if len(pairs) == 0 { if len(keys) == 0 {
return 0 return 0
} }
for _, pair := range pairs { for _, key := range keys {
ok := s.delete(pair.key, index) ok := s.delete(key, rev)
if ok { if ok {
n++ n++
} }
@ -239,19 +251,39 @@ func (s *store) deleteRange(key, end []byte, index uint64) int64 {
return n return n
} }
func (s *store) delete(key []byte, index uint64) bool { func (s *store) delete(key []byte, mainrev int64) bool {
gindex := index grev := mainrev
if s.subIndex > 0 { if s.currentRev.sub > 0 {
gindex += 1 grev += 1
} }
_, err := s.kvindex.Get(key, gindex) rev, err := s.kvindex.Get(key, grev)
if err != nil { if err != nil {
// key not exist // key not exist
return false return false
} }
ibytes := make([]byte, 8+1+4) tx := s.b.BatchTx()
indexToBytes(index, s.subIndex, ibytes) tx.Lock()
defer tx.Unlock()
revbytes := make([]byte, 8+1+8)
revToBytes(rev, revbytes)
_, vs := tx.UnsafeRange(keyBucketName, revbytes, nil, 0)
if len(vs) != 1 {
log.Fatalf("storage: delete cannot find rev (%d,%d)", rev.main, rev.sub)
}
e := &storagepb.Event{}
if err := e.Unmarshal(vs[0]); err != nil {
log.Fatalf("storage: cannot unmarshal event: %v", err)
}
if e.Type == storagepb.DELETE {
return false
}
ibytes := make([]byte, 8+1+8)
revToBytes(reversion{main: mainrev, sub: s.currentRev.sub}, ibytes)
event := storagepb.Event{ event := storagepb.Event{
Type: storagepb.DELETE, Type: storagepb.DELETE,
@ -265,20 +297,11 @@ func (s *store) delete(key []byte, index uint64) bool {
log.Fatalf("storage: cannot marshal event: %v", err) log.Fatalf("storage: cannot marshal event: %v", err)
} }
tx := s.b.BatchTx()
tx.Lock()
defer tx.Unlock()
tx.UnsafePut(keyBucketName, ibytes, d) tx.UnsafePut(keyBucketName, ibytes, d)
err = s.kvindex.Tombstone(key, index) err = s.kvindex.Tombstone(key, reversion{main: mainrev, sub: s.currentRev.sub})
if err != nil { if err != nil {
log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err) log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
} }
s.subIndex += 1 s.currentRev.sub += 1
return true return true
} }
func indexToBytes(index uint64, subindex uint32, bytes []byte) {
binary.BigEndian.PutUint64(bytes, index)
bytes[8] = '_'
binary.BigEndian.PutUint32(bytes[9:], subindex)
}

View File

@ -0,0 +1,42 @@
package storage
import (
"encoding/binary"
"time"
)
func (s *store) scheduleCompaction(compactMainRev int64, keep map[reversion]struct{}) {
end := make([]byte, 8)
binary.BigEndian.PutUint64(end, uint64(compactMainRev+1))
batchsize := int64(10000)
last := make([]byte, 8+1+8)
for {
var rev reversion
tx := s.b.BatchTx()
tx.Lock()
keys, _ := tx.UnsafeRange(keyBucketName, last, end, batchsize)
for _, key := range keys {
rev = bytesToRev(key)
if _, ok := keep[rev]; !ok {
tx.UnsafeDelete(keyBucketName, key)
}
}
if len(keys) == 0 {
rbytes := make([]byte, 8+1+8)
revToBytes(reversion{main: compactMainRev}, rbytes)
tx.UnsafePut(keyBucketName, finishedCompactKeyName, rbytes)
tx.Unlock()
return
}
// update last
revToBytes(reversion{main: rev.main, sub: rev.sub + 1}, last)
tx.Unlock()
time.Sleep(100 * time.Millisecond)
}
}

View File

@ -1,6 +1,7 @@
package storage package storage
import ( import (
"bytes"
"crypto/rand" "crypto/rand"
"os" "os"
"testing" "testing"
@ -16,9 +17,9 @@ func TestRange(t *testing.T) {
tests := []struct { tests := []struct {
key, end []byte key, end []byte
index int64 rev int64
windex int64 wrev int64
// TODO: change this to the actual kv // TODO: change this to the actual kv
wN int64 wN int64
}{ }{
@ -41,12 +42,15 @@ func TestRange(t *testing.T) {
} }
for i, tt := range tests { for i, tt := range tests {
kvs, index := s.Range(tt.key, tt.end, 0, tt.index) kvs, rev, err := s.Range(tt.key, tt.end, 0, tt.rev)
if err != nil {
t.Fatal(err)
}
if len(kvs) != int(tt.wN) { if len(kvs) != int(tt.wN) {
t.Errorf("#%d: len(kvs) = %d, want %d", i, len(kvs), tt.wN) t.Errorf("#%d: len(kvs) = %d, want %d", i, len(kvs), tt.wN)
} }
if index != tt.windex { if rev != tt.wrev {
t.Errorf("#%d: index = %d, wang %d", i, tt.index, tt.windex) t.Errorf("#%d: rev = %d, want %d", i, tt.rev, tt.wrev)
} }
} }
} }
@ -55,8 +59,8 @@ func TestSimpleDeleteRange(t *testing.T) {
tests := []struct { tests := []struct {
key, end []byte key, end []byte
windex int64 wrev int64
wN int64 wN int64
}{ }{
{ {
[]byte("foo"), []byte("foo1"), []byte("foo"), []byte("foo1"),
@ -83,12 +87,12 @@ func TestSimpleDeleteRange(t *testing.T) {
s.Put([]byte("foo1"), []byte("bar1")) s.Put([]byte("foo1"), []byte("bar1"))
s.Put([]byte("foo2"), []byte("bar2")) s.Put([]byte("foo2"), []byte("bar2"))
n, index := s.DeleteRange(tt.key, tt.end) n, rev := s.DeleteRange(tt.key, tt.end)
if n != tt.wN { if n != tt.wN {
t.Errorf("#%d: n = %d, want %d", i, n, tt.wN) t.Errorf("#%d: n = %d, want %d", i, n, tt.wN)
} }
if index != tt.windex { if rev != tt.wrev {
t.Errorf("#%d: index = %d, wang %d", i, index, tt.windex) t.Errorf("#%d: rev = %d, wang %d", i, rev, tt.wrev)
} }
os.Remove("test") os.Remove("test")
@ -104,49 +108,61 @@ func TestRangeInSequence(t *testing.T) {
s.Put([]byte("foo2"), []byte("bar2")) s.Put([]byte("foo2"), []byte("bar2"))
// remove foo // remove foo
n, index := s.DeleteRange([]byte("foo"), nil) n, rev := s.DeleteRange([]byte("foo"), nil)
if n != 1 || index != 4 { if n != 1 || rev != 4 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 4) t.Fatalf("n = %d, index = %d, want (%d, %d)", n, rev, 1, 4)
} }
// before removal foo // before removal foo
kvs, index := s.Range([]byte("foo"), []byte("foo3"), 0, 3) kvs, rev, err := s.Range([]byte("foo"), []byte("foo3"), 0, 3)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 3 { if len(kvs) != 3 {
t.Fatalf("len(kvs) = %d, want %d", len(kvs), 3) t.Fatalf("len(kvs) = %d, want %d", len(kvs), 3)
} }
// after removal foo // after removal foo
kvs, index = s.Range([]byte("foo"), []byte("foo3"), 0, 4) kvs, rev, err = s.Range([]byte("foo"), []byte("foo3"), 0, 4)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 2 { if len(kvs) != 2 {
t.Fatalf("len(kvs) = %d, want %d", len(kvs), 2) t.Fatalf("len(kvs) = %d, want %d", len(kvs), 2)
} }
// remove again -> expect nothing // remove again -> expect nothing
n, index = s.DeleteRange([]byte("foo"), nil) n, rev = s.DeleteRange([]byte("foo"), nil)
if n != 0 || index != 4 { if n != 0 || rev != 4 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 0, 4) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 0, 4)
} }
// remove foo1 // remove foo1
n, index = s.DeleteRange([]byte("foo"), []byte("foo2")) n, rev = s.DeleteRange([]byte("foo"), []byte("foo2"))
if n != 1 || index != 5 { if n != 1 || rev != 5 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 5) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 1, 5)
} }
// after removal foo1 // after removal foo1
kvs, index = s.Range([]byte("foo"), []byte("foo3"), 0, 5) kvs, rev, err = s.Range([]byte("foo"), []byte("foo3"), 0, 5)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 1 { if len(kvs) != 1 {
t.Fatalf("len(kvs) = %d, want %d", len(kvs), 1) t.Fatalf("len(kvs) = %d, want %d", len(kvs), 1)
} }
// remove foo2 // remove foo2
n, index = s.DeleteRange([]byte("foo2"), []byte("foo3")) n, rev = s.DeleteRange([]byte("foo2"), []byte("foo3"))
if n != 1 || index != 6 { if n != 1 || rev != 6 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 6) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 1, 6)
} }
// after removal foo2 // after removal foo2
kvs, index = s.Range([]byte("foo"), []byte("foo3"), 0, 6) kvs, rev, err = s.Range([]byte("foo"), []byte("foo3"), 0, 6)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 0 { if len(kvs) != 0 {
t.Fatalf("len(kvs) = %d, want %d", len(kvs), 0) t.Fatalf("len(kvs) = %d, want %d", len(kvs), 0)
} }
@ -163,15 +179,15 @@ func TestOneTnx(t *testing.T) {
s.TnxPut(id, []byte("foo2"), []byte("bar2")) s.TnxPut(id, []byte("foo2"), []byte("bar2"))
// remove foo // remove foo
n, index, err := s.TnxDeleteRange(id, []byte("foo"), nil) n, rev, err := s.TnxDeleteRange(id, []byte("foo"), nil)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if n != 1 || index != 1 { if n != 1 || rev != 1 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 1) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 1, 1)
} }
kvs, index, err := s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0) kvs, rev, err := s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -180,25 +196,25 @@ func TestOneTnx(t *testing.T) {
} }
// remove again -> expect nothing // remove again -> expect nothing
n, index, err = s.TnxDeleteRange(id, []byte("foo"), nil) n, rev, err = s.TnxDeleteRange(id, []byte("foo"), nil)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if n != 0 || index != 1 { if n != 0 || rev != 1 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 0, 1) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 0, 1)
} }
// remove foo1 // remove foo1
n, index, err = s.TnxDeleteRange(id, []byte("foo"), []byte("foo2")) n, rev, err = s.TnxDeleteRange(id, []byte("foo"), []byte("foo2"))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if n != 1 || index != 1 { if n != 1 || rev != 1 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 1) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 1, 1)
} }
// after removal foo1 // after removal foo1
kvs, index, err = s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0) kvs, rev, err = s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -207,16 +223,16 @@ func TestOneTnx(t *testing.T) {
} }
// remove foo2 // remove foo2
n, index, err = s.TnxDeleteRange(id, []byte("foo2"), []byte("foo3")) n, rev, err = s.TnxDeleteRange(id, []byte("foo2"), []byte("foo3"))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if n != 1 || index != 1 { if n != 1 || rev != 1 {
t.Fatalf("n = %d, index = %d, want (%d, %d)", n, index, 1, 1) t.Fatalf("n = %d, rev = %d, want (%d, %d)", n, rev, 1, 1)
} }
// after removal foo2 // after removal foo2
kvs, index, err = s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0) kvs, rev, err = s.TnxRange(id, []byte("foo"), []byte("foo3"), 0, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -230,12 +246,77 @@ func TestOneTnx(t *testing.T) {
} }
// After tnx // After tnx
kvs, index := s.Range([]byte("foo"), []byte("foo3"), 0, 1) kvs, rev, err := s.Range([]byte("foo"), []byte("foo3"), 0, 1)
if err != nil {
t.Fatal(err)
}
if len(kvs) != 0 { if len(kvs) != 0 {
t.Fatalf("len(kvs) = %d, want %d", len(kvs), 0) t.Fatalf("len(kvs) = %d, want %d", len(kvs), 0)
} }
if index != 1 { if rev != 1 {
t.Fatalf("index = %d, want %d", index, 1) t.Fatalf("rev = %d, want %d", rev, 1)
}
}
func TestCompaction(t *testing.T) {
s := newStore("test")
defer os.Remove("test")
s.Put([]byte("foo"), []byte("bar"))
s.Put([]byte("foo1"), []byte("bar1"))
s.Put([]byte("foo2"), []byte("bar2"))
s.Put([]byte("foo"), []byte("bar11"))
s.Put([]byte("foo1"), []byte("bar12"))
s.Put([]byte("foo2"), []byte("bar13"))
s.Put([]byte("foo1"), []byte("bar14"))
s.DeleteRange([]byte("foo"), []byte("foo200"))
s.Put([]byte("foo4"), []byte("bar4"))
err := s.Compact(4)
if err != nil {
t.Errorf("unexpect compact error %v", err)
}
err = s.Compact(4)
if err != ErrCompacted {
t.Errorf("err = %v, want %v", err, ErrCompacted)
}
_, _, err = s.Range([]byte("foo"), nil, 0, 4)
if err != ErrCompacted {
t.Errorf("err = %v, want %v", err, ErrCompacted)
}
// compact should not compact the last value of foo
kvs, rev, err := s.Range([]byte("foo"), nil, 0, 5)
if err != nil {
t.Errorf("unexpected range error %v", err)
}
if !bytes.Equal(kvs[0].Value, []byte("bar11")) {
t.Errorf("value = %s, want %s", string(kvs[0].Value), "bar11")
}
if rev != 5 {
t.Errorf("rev = %d, want %d", rev, 5)
}
// compact everything
err = s.Compact(8)
if err != nil {
t.Errorf("unexpect compact error %v", err)
}
kvs, rev, err = s.Range([]byte("foo"), []byte("fop"), 0, 0)
if err != nil {
t.Errorf("unexpected range error %v", err)
}
if len(kvs) != 1 {
t.Errorf("len(kvs) = %d, want %d", len(kvs), 1)
}
if !bytes.Equal(kvs[0].Value, []byte("bar4")) {
t.Errorf("value = %s, want %s", string(kvs[0].Value), "bar4")
}
if rev != 9 {
t.Errorf("rev = %d, want %d", rev, 9)
} }
} }

21
storage/reversion.go Normal file
View File

@ -0,0 +1,21 @@
package storage
import "encoding/binary"
type reversion struct {
main int64
sub int64
}
func revToBytes(rev reversion, bytes []byte) {
binary.BigEndian.PutUint64(bytes, uint64(rev.main))
bytes[8] = '_'
binary.BigEndian.PutUint64(bytes[9:], uint64(rev.sub))
}
func bytesToRev(bytes []byte) reversion {
return reversion{
main: int64(binary.BigEndian.Uint64(bytes[0:8])),
sub: int64(binary.BigEndian.Uint64(bytes[9:])),
}
}