diff --git a/cache/delta.go b/cache/delta.go index 5c8ed37..a851f7c 100644 --- a/cache/delta.go +++ b/cache/delta.go @@ -6,10 +6,17 @@ import ( bin "encoding/binary" "goposm/binary" "goposm/element" + "sort" "sync" ) -func packNodes(nodes map[int64]element.Node) *DeltaCoords { +type Nodes []element.Node + +func (s Nodes) Len() int { return len(s) } +func (s Nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s Nodes) Less(i, j int) bool { return s[i].Id < s[j].Id } + +func packNodes(nodes []element.Node) *DeltaCoords { var lastLon, lastLat int64 var lon, lat int64 var lastId int64 @@ -18,14 +25,14 @@ func packNodes(nodes map[int64]element.Node) *DeltaCoords { lats := make([]int64, len(nodes)) i := 0 - for id, nd := range nodes { + for _, nd := range nodes { lon = int64(binary.CoordToInt(nd.Long)) lat = int64(binary.CoordToInt(nd.Lat)) - ids[i] = id - lastId + ids[i] = nd.Id - lastId lons[i] = lon - lastLon lats[i] = lat - lastLat - lastId = id + lastId = nd.Id lastLon = lon lastLat = lat i++ @@ -33,8 +40,8 @@ func packNodes(nodes map[int64]element.Node) *DeltaCoords { return &DeltaCoords{Ids: ids, Lats: lats, Lons: lons} } -func unpackNodes(deltaCoords *DeltaCoords) map[int64]element.Node { - nodes := make(map[int64]element.Node, len(deltaCoords.Ids)) +func unpackNodes(deltaCoords *DeltaCoords) []element.Node { + nodes := make([]element.Node, len(deltaCoords.Ids)) var lastLon, lastLat int64 var lon, lat int64 @@ -44,7 +51,7 @@ func unpackNodes(deltaCoords *DeltaCoords) map[int64]element.Node { id = lastId + deltaCoords.Ids[i] lon = lastLon + deltaCoords.Lats[i] lat = lastLat + deltaCoords.Lons[i] - nodes[id] = element.Node{ + nodes[i] = element.Node{ OSMElem: element.OSMElem{Id: int64(id)}, Long: binary.IntToCoord(uint32(lon)), Lat: binary.IntToCoord(uint32(lat)), @@ -60,7 +67,7 @@ func unpackNodes(deltaCoords *DeltaCoords) map[int64]element.Node { type CoordsBunch struct { sync.Mutex id int64 - coords map[int64]element.Node + coords []element.Node elem *list.Element needsWrite bool } @@ -98,18 +105,28 @@ func (self *DeltaCoordsCache) GetCoord(id int64) (element.Node, bool) { bunchId := getBunchId(id) bunch := self.getBunch(bunchId) defer bunch.Unlock() - node, ok := bunch.coords[id] - if !ok { - return element.Node{}, false + idx := sort.Search(len(bunch.coords), func(i int) bool { + return bunch.coords[i].Id >= id + }) + if idx < len(bunch.coords) && bunch.coords[idx].Id == id { + return bunch.coords[idx], true } - return node, true + return element.Node{}, false } -func (self *DeltaCoordsCache) FillWay(way *element.Way) { - way.Nodes = make([]element.Node, len(way.Refs)) - for i, id := range way.Refs { - way.Nodes[i], _ = self.GetCoord(id) +func (self *DeltaCoordsCache) FillWay(way *element.Way) bool { + if way == nil { + return false } + way.Nodes = make([]element.Node, len(way.Refs)) + var ok bool + for i, id := range way.Refs { + way.Nodes[i], ok = self.GetCoord(id) + if !ok { + return false + } + } + return true } func (self *DeltaCoordsCache) PutCoords(nodes []element.Node) { @@ -120,9 +137,8 @@ func (self *DeltaCoordsCache) PutCoords(nodes []element.Node) { bunchId := getBunchId(node.Id) if bunchId != currentBunchId { bunch := self.getBunch(currentBunchId) - for _, nd := range nodes[start : i-1] { - bunch.coords[nd.Id] = nd - } + bunch.coords = append(bunch.coords, nodes[start:i-1]...) + sort.Sort(Nodes(bunch.coords)) currentBunchId = bunchId start = int64(i) bunch.needsWrite = true @@ -130,14 +146,13 @@ func (self *DeltaCoordsCache) PutCoords(nodes []element.Node) { } } bunch := self.getBunch(currentBunchId) - for _, nd := range nodes[start:] { - bunch.coords[nd.Id] = nd - } + bunch.coords = append(bunch.coords, nodes[start:]...) + sort.Sort(Nodes(bunch.coords)) bunch.needsWrite = true bunch.Unlock() } -func (p *DeltaCoordsCache) putCoordsPacked(bunchId int64, nodes map[int64]element.Node) { +func (p *DeltaCoordsCache) putCoordsPacked(bunchId int64, nodes []element.Node) { if len(nodes) == 0 { return } @@ -152,7 +167,7 @@ func (p *DeltaCoordsCache) putCoordsPacked(bunchId int64, nodes map[int64]elemen p.db.Put(p.wo, keyBuf, data) } -func (p *DeltaCoordsCache) getCoordsPacked(bunchId int64) map[int64]element.Node { +func (p *DeltaCoordsCache) getCoordsPacked(bunchId int64) []element.Node { keyBuf := make([]byte, 8) bin.PutVarint(keyBuf, bunchId) @@ -160,6 +175,9 @@ func (p *DeltaCoordsCache) getCoordsPacked(bunchId int64) map[int64]element.Node if err != nil { panic(err) } + if data == nil { + return make([]element.Node, 0) + } deltaCoords := &DeltaCoords{} err = proto.Unmarshal(data, deltaCoords) if err != nil { @@ -171,7 +189,7 @@ func (p *DeltaCoordsCache) getCoordsPacked(bunchId int64) map[int64]element.Node } func getBunchId(nodeId int64) int64 { - return nodeId / (1024 * 32) + return nodeId / (1024 * 8) } func (self *DeltaCoordsCache) getBunch(bunchId int64) *CoordsBunch { @@ -181,11 +199,7 @@ func (self *DeltaCoordsCache) getBunch(bunchId int64) *CoordsBunch { if !ok { elem := self.lruList.PushFront(bunchId) nodes := self.getCoordsPacked(bunchId) - if nodes == nil { - bunch = &CoordsBunch{elem: elem} - } else { - bunch = &CoordsBunch{id: bunchId, coords: nodes, elem: elem} - } + bunch = &CoordsBunch{id: bunchId, coords: nodes, elem: elem} self.table[bunchId] = bunch } else { self.lruList.MoveToFront(bunch.elem) @@ -200,6 +214,7 @@ func (self *DeltaCoordsCache) CheckCapacity() { elem := self.lruList.Back() bunchId := self.lruList.Remove(elem).(int64) bunch := self.table[bunchId] + bunch.elem = nil if bunch.needsWrite { self.putCoordsPacked(bunchId, bunch.coords) } diff --git a/cache/index.go b/cache/index.go new file mode 100644 index 0000000..3437c69 --- /dev/null +++ b/cache/index.go @@ -0,0 +1,65 @@ +package cache + +import ( + "code.google.com/p/goprotobuf/proto" + bin "encoding/binary" + "sync" +) + +type RefIndex struct { + Cache + mu sync.Mutex +} + +func NewRefIndex(path string) (*RefIndex, error) { + index := RefIndex{} + err := index.open(path) + if err != nil { + return nil, err + } + return &index, nil +} + +func (index *RefIndex) Add(id, ref int64) error { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, id) + data, err := index.db.Get(index.ro, keyBuf) + if err != nil { + panic(err) + } + refs := &Refs{} + if data != nil { + err = proto.Unmarshal(data, refs) + if err != nil { + panic(err) + } + } + + if refs.Ids == nil { + refs.Ids = make([]int64, 0, 1) + } + // TODO change to delta encoding + // TODO check for duplicates + refs.Ids = append(refs.Ids, ref) + + data, err = proto.Marshal(refs) + if err != nil { + panic(err) + } + err = index.db.Put(index.wo, keyBuf, data) + return err +} + +func (index *RefIndex) Get(id int64) []int64 { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, id) + data, err := index.db.Get(index.ro, keyBuf) + refs := &Refs{} + if data != nil { + err = proto.Unmarshal(data, refs) + if err != nil { + panic(err) + } + } + return refs.Ids +} diff --git a/cache/internal.pb.go b/cache/internal.pb.go index c410b68..f754f9e 100644 --- a/cache/internal.pb.go +++ b/cache/internal.pb.go @@ -1,5 +1,5 @@ // Code generated by protoc-gen-go. -// source: internal.proto +// source: cache/internal.proto // DO NOT EDIT! package cache @@ -45,5 +45,21 @@ func (m *DeltaCoords) GetLons() []int64 { return nil } +type Refs struct { + Ids []int64 `protobuf:"zigzag64,1,rep,packed,name=ids" json:"ids,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Refs) Reset() { *m = Refs{} } +func (m *Refs) String() string { return proto.CompactTextString(m) } +func (*Refs) ProtoMessage() {} + +func (m *Refs) GetIds() []int64 { + if m != nil { + return m.Ids + } + return nil +} + func init() { } diff --git a/cache/internal.proto b/cache/internal.proto index c67b37c..ea79d9c 100644 --- a/cache/internal.proto +++ b/cache/internal.proto @@ -1,4 +1,4 @@ -package imposm.cache.internal; +package cache; message DeltaCoords { repeated sint64 ids = 1 [packed = true]; @@ -6,3 +6,6 @@ message DeltaCoords { repeated sint64 lons = 3 [packed = true]; } +message Refs { + repeated sint64 ids = 1 [packed = true]; +} diff --git a/parser.go b/parser.go index 4d82636..da2dbdf 100644 --- a/parser.go +++ b/parser.go @@ -26,8 +26,15 @@ func parse(cache *cache.OSMCache, filename string) { waitParser.Add(1) go func() { for pos := range positions { - parser.ParseBlock(pos, coords, nodes, ways, relations) + parser.ParseBlock( + pos, + coords, + nodes, + ways, + relations, + ) } + //runtime.GC() waitParser.Done() }() } @@ -92,34 +99,52 @@ func parse(cache *cache.OSMCache, filename string) { } func main() { - f, err := os.Create("/tmp/goposm.pprof") - if err != nil { - log.Fatal(err) + if false { + f, err := os.Create("/tmp/goposm.pprof") + if err != nil { + log.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() } - pprof.StartCPUProfile(f) - defer pprof.StopCPUProfile() + log.SetFlags(log.LstdFlags | log.Llongfile) - runtime.GOMAXPROCS(runtime.NumCPU()) + //runtime.GOMAXPROCS(runtime.NumCPU()) flag.Parse() - cache, err := cache.NewOSMCache("/tmp/goposm") + osmCache, err := cache.NewOSMCache("/tmp/goposm") if err != nil { log.Fatal(err) } - defer cache.Close() + defer osmCache.Close() - parse(cache, flag.Arg(0)) + parse(osmCache, flag.Arg(0)) + fmt.Println("foo") - rel := cache.Relations.Iter() - for r := range rel { - fmt.Println(r) - } + //rel := osmCache.Relations.Iter() + //for r := range rel { + //fmt.Println(r) + //} - way := cache.Ways.Iter() + way := osmCache.Ways.Iter() i := 0 + refCache, err := cache.NewRefIndex("/tmp/refindex") + if err != nil { + log.Fatal(err) + } for w := range way { i += 1 - cache.Coords.FillWay(w) - //fmt.Println(i) + ok := osmCache.Coords.FillWay(w) + if !ok { + continue + } + if true { + for _, node := range w.Nodes { + refCache.Add(node.Id, w.Id) + } + } + if i%1000 == 0 { + fmt.Println(i) + } } fmt.Println(i) //parser.PBFStats(os.Args[1]) diff --git a/parser/parser.test b/parser/parser.test deleted file mode 100755 index 0fd350f..0000000 Binary files a/parser/parser.test and /dev/null differ