escape commonKey/tagCodePoint runes in key (closes #122)

master
Oliver Tonnhofer 2016-10-10 09:31:11 +02:00
parent c312bd2229
commit 4a472f563a
2 changed files with 119 additions and 23 deletions

71
cache/binary/tags.go vendored
View File

@ -15,8 +15,9 @@ package binary
// etc.) are converted to a single ASCII control char (0x01-0x1f)
import (
"github.com/omniscale/imposm3/element"
"unicode/utf8"
"github.com/omniscale/imposm3/element"
)
type codepoint rune
@ -38,6 +39,8 @@ const maxCodePoint = codepoint('\uF8FF')
var nextCodePoint = codepoint('\uE000')
const escapeRune = '\ufffd' // unicode replacement char
func addTagCodePoint(key, value string) {
if nextCodePoint > maxCodePoint {
panic("all codepoints used!")
@ -71,20 +74,33 @@ func tagsFromArray(arr []string) element.Tags {
}
result := make(element.Tags)
for i := 0; i < len(arr); i += 1 {
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 &&
codepoint(r) >= minCodePoint &&
codepoint(r) < nextCodePoint {
tag, ok := codePointToTag[codepoint(r)]
if !ok {
panic("missing tag for codepoint")
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 {
if r == escapeRune {
// remove escape rune
result[arr[i][size:]] = arr[i+1]
i++
continue
} else if codepoint(r) >= minCodePoint &&
codepoint(r) < nextCodePoint {
tag, ok := codePointToTag[codepoint(r)]
if !ok {
panic("missing tag for codepoint")
}
result[tag.Key] = tag.Value
continue
}
result[tag.Key] = tag.Value
} else if len(arr[i]) > 0 && arr[i][0] < 32 {
result[codePointToCommonKey[arr[i][0]]] = arr[i][1:]
} else {
result[arr[i]] = arr[i+1]
i++
continue
}
if len(arr) <= i+1 {
// notify users affected by #112
// TODO remove check in the future to avoid misleading message
// if a similar issue shows up
panic("Internal cache corrupt, see: https://github.com/omniscale/imposm3/issues/122")
}
result[arr[i]] = arr[i+1]
i++
}
return result
}
@ -95,21 +111,32 @@ func tagsAsArray(tags element.Tags) []string {
}
result := make([]string, 0, 2*len(tags))
for key, val := range tags {
if valMap, ok := tagsToCodePoint[key]; ok {
if codePoint, ok := valMap[val]; ok {
result = append(result, string(codePoint))
continue
}
}
if codepoint, ok := commonKeys[key]; ok {
result = append(result, string(codepoint)+val)
continue
}
result = append(result, key, val)
result = appendTag(result, key, val)
}
return result
}
func appendTag(arr []string, key, val string) []string {
if valMap, ok := tagsToCodePoint[key]; ok {
if codePoint, ok := valMap[val]; ok {
return append(arr, string(codePoint))
}
}
if codepoint, ok := commonKeys[key]; ok {
return append(arr, string(codepoint)+val)
}
// escape first char/rune if it is a commonKey/tagCodePoint
if key[0] < 32 {
key = string(escapeRune) + key
} else if r, size := utf8.DecodeRuneInString(key); size >= 3 &&
(codepoint(r) >= minCodePoint &&
codepoint(r) <= maxCodePoint) ||
(r == escapeRune) {
key = string(escapeRune) + key
}
return append(arr, key, val)
}
func init() {
//
// DO NOT EDIT, REMOVE, REORDER ANY OF THE FOLLOWING LINES!

View File

@ -1,9 +1,11 @@
package binary
import (
"github.com/omniscale/imposm3/element"
"reflect"
"sort"
"testing"
"github.com/omniscale/imposm3/element"
)
func TestTagsAsAndFromArray(t *testing.T) {
@ -34,6 +36,73 @@ func TestTagsAsAndFromArray(t *testing.T) {
}
}
func TestTagsArrayEncoding(t *testing.T) {
for i, check := range []struct {
// Test tags with string array of key/value instead of
// element.Tags to have fixed order
tags []string
expected []string
}{
{
[]string{"name", "foo", "highway", "residential", "oneway", "yes"},
[]string{
"\x01foo",
"\ue001",
"\ue008",
},
},
{ // ascii control characters are escaped
[]string{"name", "\tfoo", "\tfoo", "bar"},
[]string{
"\x01\tfoo",
"\ufffd\tfoo", "bar",
},
},
{ // private unicode characters are escaped
[]string{"\ue008foo", "bar", "\ue008", "baz"},
[]string{
"\ufffd\ue008foo", "bar",
"\ufffd\ue008", "baz",
},
},
{ // replacement characters (our escape token) are escaped as well
[]string{"\ufffd\ue008foo", "bar", "\ufffd\ufffd\ufffd\ue008foo", "bar"},
[]string{
"\ufffd\ufffd\ue008foo", "bar",
"\ufffd\ufffd\ufffd\ufffd\ue008foo", "bar",
},
},
} {
var actual []string
for i := 0; i < len(check.tags); i += 2 {
actual = appendTag(actual, check.tags[i], check.tags[i+1])
}
if len(check.expected) != len(actual) {
t.Errorf("case %d: unexpected tag array %#v != %#v", i, actual, check.expected)
continue
}
actualTags := tagsFromArray(actual)
expectedTags := make(element.Tags)
for i := 0; i < len(check.tags); i += 2 {
expectedTags[check.tags[i]] = check.tags[i+1]
}
if !reflect.DeepEqual(actualTags, expectedTags) {
t.Errorf("case %d: unexpected tags %#v != %#v", i, actualTags, expectedTags)
}
}
}
func TestTagsArrayIssue122Panic(t *testing.T) {
tagsFromArray([]string{"foo", "bar"})
defer func() {
p := recover()
if p == nil {
t.Fatal("did not panic")
}
}()
tagsFromArray([]string{"foo"})
}
func TestCodePoints(t *testing.T) {
// codepoints should never change, so check a few for sanity
if c := tagsToCodePoint["building"]["yes"]; c != codepoint('\ue000') {