From ef3db821be5fbc862cd8e3c94cff6133ba7663de Mon Sep 17 00:00:00 2001 From: Oliver Tonnhofer Date: Tue, 6 Aug 2013 16:14:29 +0200 Subject: [PATCH] refactored IdRefs mashaling into cache/binary --- cache/binary/diff.go | 88 +++++++++++++++++++ cache/binary/diff_test.go | 55 ++++++++++++ cache/diff.go | 173 ++++++++++---------------------------- cache/diff_test.go | 97 ++++++--------------- element/element.go | 5 ++ 5 files changed, 217 insertions(+), 201 deletions(-) create mode 100644 cache/binary/diff.go create mode 100644 cache/binary/diff_test.go diff --git a/cache/binary/diff.go b/cache/binary/diff.go new file mode 100644 index 0000000..07fb158 --- /dev/null +++ b/cache/binary/diff.go @@ -0,0 +1,88 @@ +package binary + +import ( + "bytes" + "encoding/binary" + + "goposm/element" +) + +func MarshalIdRefsBunch(idRefs []element.IdRefs) []byte { + buf := make([]byte, len(idRefs)*(4+1+6)+binary.MaxVarintLen64) + + lastRef := int64(0) + lastId := int64(0) + nextPos := 0 + + nextPos += binary.PutUvarint(buf[nextPos:], uint64(len(idRefs))) + + for _, idRef := range idRefs { + if len(buf)-nextPos < binary.MaxVarintLen64 { + tmp := make([]byte, len(buf)*2) + copy(tmp, buf) + buf = tmp + } + nextPos += binary.PutVarint(buf[nextPos:], idRef.Id-lastId) + lastId = idRef.Id + } + for _, idRef := range idRefs { + if len(buf)-nextPos < binary.MaxVarintLen64 { + tmp := make([]byte, len(buf)*2) + copy(tmp, buf) + buf = tmp + } + nextPos += binary.PutUvarint(buf[nextPos:], uint64(len(idRef.Refs))) + } + for _, idRef := range idRefs { + for _, ref := range idRef.Refs { + if len(buf)-nextPos < binary.MaxVarintLen64 { + tmp := make([]byte, len(buf)*2) + copy(tmp, buf) + buf = tmp + } + nextPos += binary.PutVarint(buf[nextPos:], ref-lastRef) + lastRef = ref + } + } + return buf[:nextPos] +} + +func UnmarshalIdRefsBunch(buf []byte) []element.IdRefs { + r := bytes.NewBuffer(buf) + n, err := binary.ReadUvarint(r) + if err != nil { + return nil + } + + idRefs := make([]element.IdRefs, n) + + last := int64(0) + for i := 0; uint64(i) < n; i++ { + idRefs[i].Id, err = binary.ReadVarint(r) + if err != nil { + panic(err) + } + idRefs[i].Id += last + last = idRefs[i].Id + } + var numRefs uint64 + for i := 0; uint64(i) < n; i++ { + numRefs, err = binary.ReadUvarint(r) + if err != nil { + panic(err) + } + idRefs[i].Refs = make([]int64, numRefs) + } + last = 0 + for idIdx := 0; uint64(idIdx) < n; idIdx++ { + for refIdx := 0; refIdx < len(idRefs[idIdx].Refs); refIdx++ { + idRefs[idIdx].Refs[refIdx], err = binary.ReadVarint(r) + if err != nil { + panic(err) + } + idRefs[idIdx].Refs[refIdx] += last + last = idRefs[idIdx].Refs[refIdx] + } + } + return idRefs +} diff --git a/cache/binary/diff_test.go b/cache/binary/diff_test.go new file mode 100644 index 0000000..43f9a80 --- /dev/null +++ b/cache/binary/diff_test.go @@ -0,0 +1,55 @@ +package binary + +import ( + "testing" + + "goposm/element" +) + +func TestmarshalBunch(t *testing.T) { + bunch := []element.IdRefs{ + {123923123, []int64{1213123}}, + {123923133, []int64{1231237}}, + {123924123, []int64{912412210, 912412213}}, + {123924129, []int64{812412213}}, + {123924130, []int64{91241213}}, + {123924132, []int64{912412210, 9124213, 212412210}}, + } + + buf := MarshalIdRefsBunch(bunch) + newBunch := UnmarshalIdRefsBunch(buf) + + t.Log(len(buf), float64(len(buf))/6.0) + + if len(newBunch) != 6 { + t.Fatal(newBunch) + } + if newBunch[0].Id != 123923123 || newBunch[0].Refs[0] != 1213123 { + t.Fatal(newBunch[0]) + } + if newBunch[1].Id != 123923133 || newBunch[1].Refs[0] != 1231237 { + t.Fatal(newBunch[1]) + } + if newBunch[2].Id != 123924123 || newBunch[2].Refs[0] != 912412210 || newBunch[2].refs[1] != 912412213 { + t.Fatal(newBunch[2]) + } + if newBunch[5].Id != 123924132 || newBunch[5].Refs[2] != 212412210 { + t.Fatal(newBunch[5]) + } +} + +func BenchmarkMarshalBunch(b *testing.B) { + bunch := []idRefs{ + {123923123, []int64{1213123}}, + {123923133, []int64{1231237}}, + {123924123, []int64{912412210, 912412213}}, + {123924129, []int64{812412213}}, + {123924130, []int64{91241213}}, + {123924132, []int64{912412210, 9124213, 212412210}}, + } + + for i := 0; i < b.N; i++ { + buf := MarshalIdRefsBunch(bunch) + UnmarshalIdRefsBunch(buf) + } +} diff --git a/cache/diff.go b/cache/diff.go index 323b22b..6cbf91e 100644 --- a/cache/diff.go +++ b/cache/diff.go @@ -1,16 +1,16 @@ package cache import ( - "bytes" - "encoding/binary" "github.com/jmhodges/levigo" - "goposm/element" "log" "os" "path/filepath" "runtime" "sort" "sync" + + "goposm/cache/binary" + "goposm/element" ) type byInt64 []int64 @@ -117,15 +117,10 @@ type idRef struct { ref int64 } -type idRefs struct { - id int64 - refs []int64 -} - // idRefBunch stores multiple IdRefs type idRefBunch struct { id int64 // the bunch id - idRefs []idRefs + idRefs []element.IdRefs } // idRefBunches can hold multiple idRefBunch @@ -133,10 +128,10 @@ type idRefBunches map[int64]idRefBunch func (bunches *idRefBunches) add(bunchId, id, ref int64) { idRefs := bunches.getCreate(bunchId, id) - idRefs.refs = insertRefs(idRefs.refs, ref) + idRefs.Refs = insertRefs(idRefs.Refs, ref) } -func (bunches *idRefBunches) getCreate(bunchId, id int64) *idRefs { +func (bunches *idRefBunches) getCreate(bunchId, id int64) *element.IdRefs { bunch, ok := (*bunches)[bunchId] if !ok { bunch = idRefBunch{id: bunchId} @@ -147,35 +142,35 @@ func (bunches *idRefBunches) getCreate(bunchId, id int64) *idRefs { return result } -func (bunch *idRefBunch) get(id int64) *idRefs { - var result *idRefs +func (bunch *idRefBunch) get(id int64) *element.IdRefs { + var result *element.IdRefs i := sort.Search(len(bunch.idRefs), func(i int) bool { - return bunch.idRefs[i].id >= id + return bunch.idRefs[i].Id >= id }) - if i < len(bunch.idRefs) && bunch.idRefs[i].id == id { + if i < len(bunch.idRefs) && bunch.idRefs[i].Id == id { result = &bunch.idRefs[i] } return result } -func (bunch *idRefBunch) getCreate(id int64) *idRefs { - var result *idRefs +func (bunch *idRefBunch) getCreate(id int64) *element.IdRefs { + var result *element.IdRefs i := sort.Search(len(bunch.idRefs), func(i int) bool { - return bunch.idRefs[i].id >= id + return bunch.idRefs[i].Id >= id }) - if i < len(bunch.idRefs) && bunch.idRefs[i].id >= id { - if bunch.idRefs[i].id == id { + if i < len(bunch.idRefs) && bunch.idRefs[i].Id >= id { + if bunch.idRefs[i].Id == id { result = &bunch.idRefs[i] } else { - bunch.idRefs = append(bunch.idRefs, idRefs{}) + bunch.idRefs = append(bunch.idRefs, element.IdRefs{}) copy(bunch.idRefs[i+1:], bunch.idRefs[i:]) - bunch.idRefs[i] = idRefs{id: id} + bunch.idRefs[i] = element.IdRefs{Id: id} result = &bunch.idRefs[i] } } else { - bunch.idRefs = append(bunch.idRefs, idRefs{id: id}) + bunch.idRefs = append(bunch.idRefs, element.IdRefs{Id: id}) result = &bunch.idRefs[len(bunch.idRefs)-1] } @@ -359,7 +354,7 @@ func (index *bunchRefCache) writeRefs(idRefs idRefBunches) error { return index.db.Write(index.wo, batch) } -func mergeBunch(bunch, newBunch []idRefs) []idRefs { +func mergeBunch(bunch, newBunch []element.IdRefs) []element.IdRefs { lastIdx := 0 NextIdRef: @@ -367,23 +362,23 @@ NextIdRef: for _, newIdRefs := range newBunch { // search place in bunch for i := lastIdx; i < len(bunch); i++ { - if bunch[i].id == newIdRefs.id { + if bunch[i].Id == newIdRefs.Id { // id already present - if len(newIdRefs.refs) == 0 { + if len(newIdRefs.Refs) == 0 { // no new refs -> delete bunch = append(bunch[:i], bunch[i+1:]...) } else { // otherwise add refs - for _, r := range newIdRefs.refs { - bunch[i].refs = insertRefs(bunch[i].refs, r) + for _, r := range newIdRefs.Refs { + bunch[i].Refs = insertRefs(bunch[i].Refs, r) } } lastIdx = i continue NextIdRef } - if bunch[i].id > newIdRefs.id { + if bunch[i].Id > newIdRefs.Id { // insert before - if len(newIdRefs.refs) > 0 { - bunch = append(bunch, idRefs{}) + if len(newIdRefs.Refs) > 0 { + bunch = append(bunch, element.IdRefs{}) copy(bunch[i+1:], bunch[i:]) bunch[i] = newIdRefs } @@ -392,7 +387,7 @@ NextIdRef: } } // insert at the end - if len(newIdRefs.refs) > 0 { + if len(newIdRefs.Refs) > 0 { bunch = append(bunch, newIdRefs) lastIdx = len(bunch) - 1 } @@ -400,16 +395,16 @@ NextIdRef: return bunch } -func (index *bunchRefCache) loadMergeMarshal(keyBuf []byte, newBunch []idRefs) []byte { +func (index *bunchRefCache) loadMergeMarshal(keyBuf []byte, newBunch []element.IdRefs) []byte { data, err := index.db.Get(index.ro, keyBuf) if err != nil { panic(err) } - var bunch []idRefs + var bunch []element.IdRefs if data != nil { - bunch = unmarshalBunch(data) + bunch = binary.UnmarshalIdRefsBunch(data) } if bunch == nil { @@ -418,7 +413,7 @@ func (index *bunchRefCache) loadMergeMarshal(keyBuf []byte, newBunch []idRefs) [ bunch = mergeBunch(bunch, newBunch) } - data = marshalBunch(bunch) + data = binary.MarshalIdRefsBunch(bunch) return data } @@ -434,9 +429,9 @@ func (index *bunchRefCache) Get(id int64) []int64 { } if data != nil { - for _, idRef := range unmarshalBunch(data) { - if idRef.id == id { - return idRef.refs + for _, idRef := range binary.UnmarshalIdRefsBunch(data) { + if idRef.Id == id { + return idRef.Refs } } } @@ -451,16 +446,16 @@ func (index *bunchRefCache) Add(id, ref int64) error { return err } - var idRefs []idRefs + var idRefs []element.IdRefs if data != nil { - idRefs = unmarshalBunch(data) + idRefs = binary.UnmarshalIdRefsBunch(data) } idRefBunch := idRefBunch{index.getBunchId(id), idRefs} idRef := idRefBunch.getCreate(id) - idRef.refs = insertRefs(idRef.refs, ref) + idRef.Refs = insertRefs(idRef.Refs, ref) - data = marshalBunch(idRefBunch.idRefs) + data = binary.MarshalIdRefsBunch(idRefBunch.idRefs) return index.db.Put(index.wo, keyBuf, data) } @@ -478,12 +473,12 @@ func (index *bunchRefCache) DeleteRef(id, ref int64) error { } if data != nil { - idRefs := unmarshalBunch(data) + idRefs := binary.UnmarshalIdRefsBunch(data) idRefBunch := idRefBunch{index.getBunchId(id), idRefs} idRef := idRefBunch.get(id) if idRef != nil { - idRef.refs = deleteRefs(idRef.refs, ref) - data := marshalBunch(idRefs) + idRef.Refs = deleteRefs(idRef.Refs, ref) + data := binary.MarshalIdRefsBunch(idRefs) return index.db.Put(index.wo, keyBuf, data) } } @@ -503,12 +498,12 @@ func (index *bunchRefCache) Delete(id int64) error { } if data != nil { - idRefs := unmarshalBunch(data) + idRefs := binary.UnmarshalIdRefsBunch(data) idRefBunch := idRefBunch{index.getBunchId(id), idRefs} idRef := idRefBunch.get(id) if idRef != nil { - idRef.refs = []int64{} - data := marshalBunch(idRefs) + idRef.Refs = []int64{} + data := binary.MarshalIdRefsBunch(idRefs) return index.db.Put(index.wo, keyBuf, data) } } @@ -545,83 +540,3 @@ func (index *WaysRefIndex) AddFromMembers(relId int64, members []element.Member) } } } - -func marshalBunch(idRefs []idRefs) []byte { - buf := make([]byte, len(idRefs)*(4+1+6)+binary.MaxVarintLen64) - - lastRef := int64(0) - lastId := int64(0) - nextPos := 0 - - nextPos += binary.PutUvarint(buf[nextPos:], uint64(len(idRefs))) - - for _, idRef := range idRefs { - if len(buf)-nextPos < binary.MaxVarintLen64 { - tmp := make([]byte, len(buf)*2) - copy(tmp, buf) - buf = tmp - } - nextPos += binary.PutVarint(buf[nextPos:], idRef.id-lastId) - lastId = idRef.id - } - for _, idRef := range idRefs { - if len(buf)-nextPos < binary.MaxVarintLen64 { - tmp := make([]byte, len(buf)*2) - copy(tmp, buf) - buf = tmp - } - nextPos += binary.PutUvarint(buf[nextPos:], uint64(len(idRef.refs))) - } - for _, idRef := range idRefs { - for _, ref := range idRef.refs { - if len(buf)-nextPos < binary.MaxVarintLen64 { - tmp := make([]byte, len(buf)*2) - copy(tmp, buf) - buf = tmp - } - nextPos += binary.PutVarint(buf[nextPos:], ref-lastRef) - lastRef = ref - } - } - return buf[:nextPos] -} - -func unmarshalBunch(buf []byte) []idRefs { - r := bytes.NewBuffer(buf) - n, err := binary.ReadUvarint(r) - if err != nil { - return nil - } - - idRefs := make([]idRefs, n) - - last := int64(0) - for i := 0; uint64(i) < n; i++ { - idRefs[i].id, err = binary.ReadVarint(r) - if err != nil { - panic(err) - } - idRefs[i].id += last - last = idRefs[i].id - } - var numRefs uint64 - for i := 0; uint64(i) < n; i++ { - numRefs, err = binary.ReadUvarint(r) - if err != nil { - panic(err) - } - idRefs[i].refs = make([]int64, numRefs) - } - last = 0 - for idIdx := 0; uint64(idIdx) < n; idIdx++ { - for refIdx := 0; refIdx < len(idRefs[idIdx].refs); refIdx++ { - idRefs[idIdx].refs[refIdx], err = binary.ReadVarint(r) - if err != nil { - panic(err) - } - idRefs[idIdx].refs[refIdx] += last - last = idRefs[idIdx].refs[refIdx] - } - } - return idRefs -} diff --git a/cache/diff_test.go b/cache/diff_test.go index 9da40fe..d79279d 100644 --- a/cache/diff_test.go +++ b/cache/diff_test.go @@ -1,10 +1,11 @@ package cache import ( - "goposm/element" "io/ioutil" "os" "testing" + + "goposm/element" ) func TestInsertRefs(t *testing.T) { @@ -114,54 +115,6 @@ func TestWriteDiff(t *testing.T) { } } -func TestmarshalBunch(t *testing.T) { - bunch := []idRefs{ - {123923123, []int64{1213123}}, - {123923133, []int64{1231237}}, - {123924123, []int64{912412210, 912412213}}, - {123924129, []int64{812412213}}, - {123924130, []int64{91241213}}, - {123924132, []int64{912412210, 9124213, 212412210}}, - } - - buf := marshalBunch(bunch) - newBunch := unmarshalBunch(buf) - - t.Log(len(buf), float64(len(buf))/6.0) - - if len(newBunch) != 6 { - t.Fatal(newBunch) - } - if newBunch[0].id != 123923123 || newBunch[0].refs[0] != 1213123 { - t.Fatal(newBunch[0]) - } - if newBunch[1].id != 123923133 || newBunch[1].refs[0] != 1231237 { - t.Fatal(newBunch[1]) - } - if newBunch[2].id != 123924123 || newBunch[2].refs[0] != 912412210 || newBunch[2].refs[1] != 912412213 { - t.Fatal(newBunch[2]) - } - if newBunch[5].id != 123924132 || newBunch[5].refs[2] != 212412210 { - t.Fatal(newBunch[5]) - } -} - -func BenchmarkMarshalBunch(b *testing.B) { - bunch := []idRefs{ - {123923123, []int64{1213123}}, - {123923133, []int64{1231237}}, - {123924123, []int64{912412210, 912412213}}, - {123924129, []int64{812412213}}, - {123924130, []int64{91241213}}, - {123924132, []int64{912412210, 9124213, 212412210}}, - } - - for i := 0; i < b.N; i++ { - buf := marshalBunch(bunch) - unmarshalBunch(buf) - } -} - func BenchmarkWriteDiff(b *testing.B) { b.StopTimer() cache_dir, _ := ioutil.TempDir("", "goposm_test") @@ -185,35 +138,35 @@ func BenchmarkWriteDiff(b *testing.B) { } func TestMergeIdRefs(t *testing.T) { - bunch := []idRefs{} + bunch := []element.IdRefs{} - bunch = mergeBunch(bunch, []idRefs{idRefs{50, []int64{1}}}) - if b := bunch[0]; b.id != 50 || b.refs[0] != 1 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{50, []int64{1}}}) + if b := bunch[0]; b.Id != 50 || b.Refs[0] != 1 { t.Fatal(bunch) } // before - bunch = mergeBunch(bunch, []idRefs{idRefs{40, []int64{3}}}) - if b := bunch[0]; b.id != 40 || b.refs[0] != 3 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{40, []int64{3}}}) + if b := bunch[0]; b.Id != 40 || b.Refs[0] != 3 { t.Fatal(bunch) } // after - bunch = mergeBunch(bunch, []idRefs{idRefs{70, []int64{4}}}) - if b := bunch[2]; b.id != 70 || b.refs[0] != 4 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{70, []int64{4}}}) + if b := bunch[2]; b.Id != 70 || b.Refs[0] != 4 { t.Fatal(bunch) } // in between - bunch = mergeBunch(bunch, []idRefs{idRefs{60, []int64{5}}}) - if b := bunch[2]; b.id != 60 || b.refs[0] != 5 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{60, []int64{5}}}) + if b := bunch[2]; b.Id != 60 || b.Refs[0] != 5 { t.Fatal(bunch) } // same (50:1 already inserted) - bunch = mergeBunch(bunch, []idRefs{idRefs{50, []int64{0, 5}}}) - if b := bunch[1]; b.id != 50 || len(b.refs) != 3 || - b.refs[0] != 0 || b.refs[1] != 1 || b.refs[2] != 5 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{50, []int64{0, 5}}}) + if b := bunch[1]; b.Id != 50 || len(b.Refs) != 3 || + b.Refs[0] != 0 || b.Refs[1] != 1 || b.Refs[2] != 5 { t.Fatal(bunch) } @@ -222,15 +175,15 @@ func TestMergeIdRefs(t *testing.T) { } // remove multiple - bunch = mergeBunch(bunch, []idRefs{idRefs{40, []int64{}}, idRefs{60, []int64{}}}) - if bunch[0].id != 50 || bunch[1].id != 70 || len(bunch) != 2 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{40, []int64{}}, element.IdRefs{60, []int64{}}}) + if bunch[0].Id != 50 || bunch[1].Id != 70 || len(bunch) != 2 { t.Fatal(bunch) } // add multiple - bunch = mergeBunch(bunch, []idRefs{idRefs{40, []int64{1}}, idRefs{60, []int64{1}}, idRefs{80, []int64{1}}}) - if len(bunch) != 5 || bunch[0].id != 40 || - bunch[2].id != 60 || bunch[4].id != 80 { + bunch = mergeBunch(bunch, []element.IdRefs{element.IdRefs{40, []int64{1}}, element.IdRefs{60, []int64{1}}, element.IdRefs{80, []int64{1}}}) + if len(bunch) != 5 || bunch[0].Id != 40 || + bunch[2].Id != 60 || bunch[4].Id != 80 { t.Fatal(bunch) } @@ -240,37 +193,37 @@ func TestIdRefBunches(t *testing.T) { bunches := make(idRefBunches) bunches.add(1, 100, 999) - if r := bunches[1].idRefs[0]; r.id != 100 || r.refs[0] != 999 { + if r := bunches[1].idRefs[0]; r.Id != 100 || r.Refs[0] != 999 { t.Fatal(bunches) } // before bunches.add(1, 99, 888) - if r := bunches[1].idRefs[0]; r.id != 99 || r.refs[0] != 888 { + if r := bunches[1].idRefs[0]; r.Id != 99 || r.Refs[0] != 888 { t.Fatal(bunches) } // after bunches.add(1, 102, 777) - if r := bunches[1].idRefs[2]; r.id != 102 || r.refs[0] != 777 { + if r := bunches[1].idRefs[2]; r.Id != 102 || r.Refs[0] != 777 { t.Fatal(bunches) } // in between bunches.add(1, 101, 666) - if r := bunches[1].idRefs[2]; r.id != 101 || r.refs[0] != 666 { + if r := bunches[1].idRefs[2]; r.Id != 101 || r.Refs[0] != 666 { t.Fatal(bunches) } // same id bunches.add(1, 100, 998) - if r := bunches[1].idRefs[1]; r.id != 100 || r.refs[0] != 998 || r.refs[1] != 999 { + if r := bunches[1].idRefs[1]; r.Id != 100 || r.Refs[0] != 998 || r.Refs[1] != 999 { t.Fatal(bunches) } // duplicate with same id and same ref bunches.add(1, 100, 998) - if r := bunches[1].idRefs[1]; r.id != 100 || r.refs[0] != 998 || r.refs[1] != 999 { + if r := bunches[1].idRefs[1]; r.Id != 100 || r.Refs[0] != 998 || r.Refs[1] != 999 { t.Fatal(bunches) } diff --git a/element/element.go b/element/element.go index 33b13d3..915bb29 100644 --- a/element/element.go +++ b/element/element.go @@ -58,3 +58,8 @@ type Relation struct { OSMElem Members []Member `json:"members"` } + +type IdRefs struct { + Id int64 + Refs []int64 +}