From 1ad4f7931cfe2ca116434c6c54ec7ae52992463a Mon Sep 17 00:00:00 2001 From: Oliver Tonnhofer Date: Tue, 2 Apr 2013 22:07:27 +0200 Subject: [PATCH] hackahoy! parsing of nodes/ways/relations; marshaling --- binary/serialize.go | 64 +++++++++++++------ cache/db.go | 51 ++++++++++++++- cache/db_test.go | 122 +++++++++++++++++++++++++++++++++++- element/element.go | 31 +++++++--- parser.go | 2 +- parser/pbf.go | 147 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 352 insertions(+), 65 deletions(-) diff --git a/binary/serialize.go b/binary/serialize.go index 9d6b7f3..dcba134 100644 --- a/binary/serialize.go +++ b/binary/serialize.go @@ -1,29 +1,21 @@ package binary import ( + "bytes" "code.google.com/p/goprotobuf/proto" + bin "encoding/binary" "goposm/element" "goposm/model" ) -// struct MarshalError { -// msg string -// } +const COORD_FACTOR float64 = 11930464.7083 // ((2<<31)-1)/360.0 -func tagsFromArray(arr []string) *element.Tags { - result := make(element.Tags) - for i := 0; i < len(arr); i += 2 { - result[arr[i]] = arr[i+1] - } - return &result +func coordToInt(coord float64) uint32 { + return uint32((coord + 180.0) * COORD_FACTOR) } -func tagsAsArray(tags *element.Tags) []string { - result := make([]string, 0, 2*len(*tags)) - for key, val := range *tags { - result = append(result, key, val) - } - return result +func intToCoord(coord uint32) float64 { + return float64((float64(coord) / COORD_FACTOR) - 180.0) } func Marshal(elem interface{}) ([]byte, error) { @@ -37,12 +29,46 @@ func Marshal(elem interface{}) ([]byte, error) { return []byte{}, nil } +func MarshalCoord(node *element.Node) ([]byte, error) { + data := make([]byte, 8) + + buf := bytes.NewBuffer(data) + err := bin.Write(buf, bin.LittleEndian, coordToInt(node.Long)) + if err != nil { + return nil, err + } + err = bin.Write(buf, bin.LittleEndian, coordToInt(node.Lat)) + if err != nil { + return nil, err + } + return data, nil +} + +func UnmarshalCoord(id int64, data []byte) (node *element.Node, err error) { + var long, lat uint32 + buf := bytes.NewBuffer(data) + err = bin.Read(buf, bin.LittleEndian, &long) + if err != nil { + return nil, err + } + err = bin.Read(buf, bin.LittleEndian, &lat) + if err != nil { + return nil, err + } + + node = &element.Node{} + node.Id = id + node.Long = intToCoord(long) + node.Lat = intToCoord(lat) + return node, nil +} + func MarshalNode(node *element.Node) ([]byte, error) { pbfNode := &model.Node{} nodeId := node.Id pbfNode.Id = &nodeId pbfNode.FromWgsCoord(node.Long, node.Lat) - pbfNode.Tags = tagsAsArray(&node.Tags) + pbfNode.Tags = node.TagsAsArray() return proto.Marshal(pbfNode) } @@ -56,7 +82,7 @@ func UnmarshalNode(data []byte) (node *element.Node, err error) { node = &element.Node{} node.Id = *pbfNode.Id node.Long, node.Lat = pbfNode.WgsCoord() - node.Tags = *tagsFromArray(pbfNode.Tags) + node.TagsFromArray(pbfNode.Tags) return node, nil } @@ -64,7 +90,7 @@ func MarshalWay(way *element.Way) ([]byte, error) { pbfWay := &model.Way{} pbfWay.Id = &way.Id pbfWay.Nodes = way.Nodes - pbfWay.Tags = tagsAsArray(&way.Tags) + pbfWay.Tags = way.TagsAsArray() return proto.Marshal(pbfWay) } @@ -78,6 +104,6 @@ func UnmarshalWay(data []byte) (way *element.Way, err error) { way = &element.Way{} way.Id = *pbfWay.Id way.Nodes = pbfWay.Nodes - way.Tags = *tagsFromArray(pbfWay.Tags) + way.TagsFromArray(pbfWay.Tags) return way, nil } diff --git a/cache/db.go b/cache/db.go index b82792c..84737ee 100644 --- a/cache/db.go +++ b/cache/db.go @@ -16,6 +16,7 @@ type Cache struct { func NewCache(path string) *Cache { result := &Cache{} opts := levigo.NewOptions() + opts.SetCache(levigo.NewLRUCache(1024 * 1024 * 50)) opts.SetCreateIfMissing(true) db, err := levigo.Open(path, opts) if err != nil { @@ -28,6 +29,30 @@ func NewCache(path string) *Cache { } func (p *Cache) PutCoord(node *element.Node) { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, int64(node.Id)) + data, err := binary.MarshalCoord(node) + if err != nil { + panic(err) + } + p.db.Put(p.wo, keyBuf, data) +} + +func (p *Cache) GetCoord(id int64) *element.Node { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, int64(id)) + data, err := p.db.Get(p.ro, keyBuf) + if err != nil { + panic(err) + } + node, err := binary.UnmarshalCoord(id, data) + if err != nil { + panic(err) + } + return node +} + +func (p *Cache) PutNode(node *element.Node) { keyBuf := make([]byte, 8) bin.PutVarint(keyBuf, int64(node.Id)) data, err := binary.MarshalNode(node) @@ -37,7 +62,7 @@ func (p *Cache) PutCoord(node *element.Node) { p.db.Put(p.wo, keyBuf, data) } -func (p *Cache) GetCoord(id element.OSMID) *element.Node { +func (p *Cache) GetNode(id int64) *element.Node { keyBuf := make([]byte, 8) bin.PutVarint(keyBuf, int64(id)) data, err := p.db.Get(p.ro, keyBuf) @@ -51,6 +76,30 @@ func (p *Cache) GetCoord(id element.OSMID) *element.Node { return node } +func (p *Cache) PutWay(way *element.Way) { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, int64(way.Id)) + data, err := binary.MarshalWay(way) + if err != nil { + panic(err) + } + p.db.Put(p.wo, keyBuf, data) +} + +func (p *Cache) GetWay(id int64) *element.Way { + keyBuf := make([]byte, 8) + bin.PutVarint(keyBuf, int64(id)) + data, err := p.db.Get(p.ro, keyBuf) + if err != nil { + panic(err) + } + way, err := binary.UnmarshalWay(data) + if err != nil { + panic(err) + } + return way +} + func (p *Cache) Close() { p.db.Close() } diff --git a/cache/db_test.go b/cache/db_test.go index f9ab6cc..a69aaa4 100644 --- a/cache/db_test.go +++ b/cache/db_test.go @@ -19,7 +19,7 @@ func TestCreateCache(t *testing.T) { } } -func TestReadWriteNode(t *testing.T) { +func TestReadWriteCoord(t *testing.T) { cache_dir, _ := ioutil.TempDir("", "goposm_test") defer os.RemoveAll(cache_dir) @@ -32,9 +32,127 @@ func TestReadWriteNode(t *testing.T) { cache = NewCache(cache_dir) defer cache.Close() - data := cache.GetCoord(element.OSMID(1)) + data := cache.GetCoord(1) if data.Id != 1 { t.Errorf("unexpected result of GetNode(1): %v", data) } } + +func TestReadWriteNode(t *testing.T) { + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + node := &element.Node{} + node.Id = 1 + cache.PutNode(node) + cache.Close() + + cache = NewCache(cache_dir) + defer cache.Close() + + data := cache.GetNode(1) + + if data.Id != 1 { + t.Errorf("unexpected result of GetNode(1): %v", data) + } +} + +func TestReadWriteWay(t *testing.T) { + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + way := &element.Way{} + way.Id = 1 + cache.PutWay(way) + cache.Close() + + cache = NewCache(cache_dir) + defer cache.Close() + + data := cache.GetWay(1) + + if data.Id != 1 { + t.Errorf("unexpected result of GetWay(1): %v", data) + } +} + +func BenchmarkWriteWay(b *testing.B) { + b.StopTimer() + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + defer cache.Close() + + b.StartTimer() + way := &element.Way{} + for i := 0; i < b.N; i++ { + way.Id = int64(i) + cache.PutWay(way) + } +} + +func BenchmarkReadWay(b *testing.B) { + b.StopTimer() + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + defer cache.Close() + + way := &element.Way{} + for i := 0; i < b.N; i++ { + way.Id = int64(i) + cache.PutWay(way) + } + + b.StartTimer() + for i := int64(0); i < int64(b.N); i++ { + if cache.GetWay(i).Id != i { + b.Fail() + } + } + +} + +func BenchmarkWriteCoord(b *testing.B) { + b.StopTimer() + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + defer cache.Close() + + b.StartTimer() + node := &element.Node{} + for i := 0; i < b.N; i++ { + node.Id = int64(i) + cache.PutCoord(node) + } +} + +func BenchmarkReadCoord(b *testing.B) { + b.StopTimer() + cache_dir, _ := ioutil.TempDir("", "goposm_test") + defer os.RemoveAll(cache_dir) + + cache := NewCache(cache_dir) + defer cache.Close() + + node := &element.Node{} + for i := 0; i < b.N; i++ { + node.Id = int64(i) + cache.PutCoord(node) + } + + b.StartTimer() + for i := int64(0); i < int64(b.N); i++ { + if cache.GetCoord(i).Id != i { + b.Fail() + } + } + +} diff --git a/element/element.go b/element/element.go index 31309e2..daf4c94 100644 --- a/element/element.go +++ b/element/element.go @@ -8,24 +8,22 @@ type OSMElem struct { } type Node struct { - Id int64 - Tags Tags + OSMElem Lat float64 Long float64 } type Way struct { - Id int64 - Tags Tags + OSMElem Nodes []int64 } type MemberType int const ( - NODE MemberType = iota - WAY - RELATION + NODE MemberType = 0 + WAY = 1 + RELATION = 2 ) type Member struct { @@ -35,7 +33,22 @@ type Member struct { } type Relation struct { - Id int64 - Tags Tags + OSMElem Members []Member } + +func (elem *OSMElem) TagsFromArray(arr []string) { + result := make(Tags) + for i := 0; i < len(arr); i += 2 { + result[arr[i]] = arr[i+1] + } + elem.Tags = result +} + +func (elem *OSMElem) TagsAsArray() []string { + result := make([]string, 0, 2*len(elem.Tags)) + for key, val := range elem.Tags { + result = append(result, key, val) + } + return result +} diff --git a/parser.go b/parser.go index 8c35d0e..0fb96fe 100644 --- a/parser.go +++ b/parser.go @@ -7,6 +7,6 @@ import ( ) func main() { - parser.BlockPositions(os.Args[1]) + parser.PBFStats(os.Args[1]) fmt.Println("done") } diff --git a/parser/pbf.go b/parser/pbf.go index 7a7bcc1..dd9f274 100644 --- a/parser/pbf.go +++ b/parser/pbf.go @@ -2,7 +2,6 @@ package parser import ( "fmt" - "goposm/binary" "goposm/element" "log" "os" @@ -30,6 +29,10 @@ func Open(filename string) (f *PBF, err error) { return f, nil } +func (pbf *PBF) Close() error { + return pbf.file.Close() +} + func (pbf *PBF) NextDataPosition() (offset int64, size int32) { header := pbf.nextBlobHeader() size = header.GetDatasize() @@ -44,25 +47,6 @@ func (pbf *PBF) NextDataPosition() (offset int64, size int32) { return } -func DenseNodeTags(stringtable []string, keyvals []int32) (tags map[string]string, nextPos int) { - tags = make(map[string]string) - nextPos = 0 - for { - keyId := keyvals[nextPos] - nextPos += 1 - if keyId == 0 { - return - } - key := stringtable[keyId] - valId := keyvals[nextPos] - nextPos += 1 - val := stringtable[valId] - - tags[key] = val - } - return -} - const COORD_FACTOR float64 = 11930464.7083 // ((2<<31)-1)/360.0 func coordToInt(coord float64) uint32 { @@ -76,7 +60,7 @@ func intToCoord(coord uint32) float64 { func ReadDenseNodes( dense *osmpbf.DenseNodes, block *osmpbf.PrimitiveBlock, - stringtable *StringTable) (nodes []element.Node) { + stringtable StringTable) (nodes []element.Node) { var lastId int64 var lastLon, lastLat int64 @@ -91,7 +75,7 @@ func ReadDenseNodes( lastId += dense.Id[i] lastLon += dense.Lon[i] lastLat += dense.Lat[i] - nodes[i].Id = element.OSMID(lastId) + nodes[i].Id = lastId nodes[i].Long = (coordScale * float64(lonOffset+(granularity*lastLon))) nodes[i].Lat = (coordScale * float64(latOffset+(granularity*lastLat))) if dense.KeysVals[lastKeyValPos] != 0 { @@ -103,7 +87,7 @@ func ReadDenseNodes( return nodes } -func ParseDenseNodeTags(stringtable *StringTable, keysVals *[]int32, pos *int) map[string]string { +func ParseDenseNodeTags(stringtable StringTable, keysVals *[]int32, pos *int) map[string]string { result := make(map[string]string) for { if *pos >= len(*keysVals) { @@ -116,22 +100,120 @@ func ParseDenseNodeTags(stringtable *StringTable, keysVals *[]int32, pos *int) m } val := (*keysVals)[*pos] *pos += 1 - result[(*stringtable)[key]] = (*stringtable)[val] + result[stringtable[key]] = stringtable[val] + } + return result +} +func ParseTags(stringtable StringTable, keys []uint32, vals []uint32) map[string]string { + tags := make(map[string]string) + for i := 0; i < len(keys); i++ { + key := stringtable[keys[i]] + val := stringtable[vals[i]] + tags[key] = val + } + return tags +} + +func ReadNodes( + nodes []*osmpbf.Node, + block *osmpbf.PrimitiveBlock, + stringtable StringTable) []element.Node { + + result := make([]element.Node, len(nodes)) + granularity := int64(block.GetGranularity()) + latOffset := block.GetLatOffset() + lonOffset := block.GetLonOffset() + coordScale := 0.000000001 + + for i := range nodes { + id := *nodes[i].Id + lon := *nodes[i].Lon + lat := *nodes[i].Lat + result[i].Id = id + result[i].Long = (coordScale * float64(lonOffset+(granularity*lon))) + result[i].Lat = (coordScale * float64(latOffset+(granularity*lat))) + result[i].Tags = ParseTags(stringtable, nodes[i].Keys, nodes[i].Vals) + } + return result +} + +func ParseDeltaRefs(refs []int64) []int64 { + result := make([]int64, len(refs)) + var lastRef int64 + + for i, refDelta := range refs { + lastRef += refDelta + result[i] = lastRef + } + return result +} + +func ReadWays( + ways []*osmpbf.Way, + block *osmpbf.PrimitiveBlock, + stringtable StringTable) []element.Way { + + result := make([]element.Way, len(ways)) + + for i := range ways { + id := *ways[i].Id + result[i].Id = id + result[i].Tags = ParseTags(stringtable, ways[i].Keys, ways[i].Vals) + result[i].Nodes = ParseDeltaRefs(ways[i].Refs) + } + return result +} + +func ParseRelationMembers(rel *osmpbf.Relation, stringtable StringTable) []element.Member { + result := make([]element.Member, len(rel.Memids)) + + var lastId int64 + for i := range rel.Memids { + lastId += rel.Memids[i] + result[i].Id = lastId + result[i].Role = stringtable[rel.RolesSid[i]] + result[i].Type = element.MemberType(rel.Types[i]) + } + return result +} + +func ReadRelations( + relations []*osmpbf.Relation, + block *osmpbf.PrimitiveBlock, + stringtable StringTable) []element.Relation { + + result := make([]element.Relation, len(relations)) + + for i := range relations { + id := *relations[i].Id + result[i].Id = id + result[i].Tags = ParseTags(stringtable, relations[i].Keys, relations[i].Vals) + result[i].Members = ParseRelationMembers(relations[i], stringtable) } return result } type StringTable []string -func NewStringTable(source *osmpbf.StringTable) *StringTable { +func NewStringTable(source *osmpbf.StringTable) StringTable { result := make(StringTable, len(source.S)) for i, bytes := range source.S { result[i] = string(bytes) } - return &result + return result } -func BlockPositions(filename string) { +func PBFBlockPositions(filename string) chan BlockPosition { + pbf, err := Open(filename) + if err != nil { + log.Fatal(err) + } + defer pbf.Close() + + return pbf.BlockPositions() +} + +func PBFStats(filename string) { pbf, err := Open(filename) if err != nil { log.Fatal(err) @@ -146,17 +228,16 @@ func BlockPositions(filename string) { for _, group := range block.Primitivegroup { dense := group.GetDense() if dense != nil { - nodes := ReadDenseNodes(dense, block, stringtable) - lon, lat := nodes[0].Long, nodes[0].Lat - data, _ := binary.Marshal(nodes[0]) - fmt.Printf("len: %d", len(data)) - fmt.Printf("%v", data) - fmt.Printf("%12d %10.8f %10.8f\n", nodes[0].Id, lon, lat) + _ = ReadDenseNodes(dense, block, stringtable) nodesCounter += len(dense.Id) } + _ = ReadNodes(group.Nodes, block, stringtable) nodesCounter += len(group.Nodes) waysCounter += len(group.Ways) + _ = ReadWays(group.Ways, block, stringtable) relationsCounter += len(group.Relations) + _ = ReadRelations(group.Relations, block, stringtable) + } } fmt.Printf("nodes: %v\tways: %v\trelations:%v\n", nodesCounter, waysCounter, relationsCounter)