escape commonKey/tagCodePoint runes in key (closes #122)
parent
c312bd2229
commit
4a472f563a
|
@ -15,8 +15,9 @@ package binary
|
|||
// etc.) are converted to a single ASCII control char (0x01-0x1f)
|
||||
|
||||
import (
|
||||
"github.com/omniscale/imposm3/element"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/omniscale/imposm3/element"
|
||||
)
|
||||
|
||||
type codepoint rune
|
||||
|
@ -38,6 +39,8 @@ const maxCodePoint = codepoint('\uF8FF')
|
|||
|
||||
var nextCodePoint = codepoint('\uE000')
|
||||
|
||||
const escapeRune = '\ufffd' // unicode replacement char
|
||||
|
||||
func addTagCodePoint(key, value string) {
|
||||
if nextCodePoint > maxCodePoint {
|
||||
panic("all codepoints used!")
|
||||
|
@ -71,20 +74,33 @@ func tagsFromArray(arr []string) element.Tags {
|
|||
}
|
||||
result := make(element.Tags)
|
||||
for i := 0; i < len(arr); i += 1 {
|
||||
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 &&
|
||||
codepoint(r) >= minCodePoint &&
|
||||
codepoint(r) < nextCodePoint {
|
||||
tag, ok := codePointToTag[codepoint(r)]
|
||||
if !ok {
|
||||
panic("missing tag for codepoint")
|
||||
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 {
|
||||
if r == escapeRune {
|
||||
// remove escape rune
|
||||
result[arr[i][size:]] = arr[i+1]
|
||||
i++
|
||||
continue
|
||||
} else if codepoint(r) >= minCodePoint &&
|
||||
codepoint(r) < nextCodePoint {
|
||||
tag, ok := codePointToTag[codepoint(r)]
|
||||
if !ok {
|
||||
panic("missing tag for codepoint")
|
||||
}
|
||||
result[tag.Key] = tag.Value
|
||||
continue
|
||||
}
|
||||
result[tag.Key] = tag.Value
|
||||
} else if len(arr[i]) > 0 && arr[i][0] < 32 {
|
||||
result[codePointToCommonKey[arr[i][0]]] = arr[i][1:]
|
||||
} else {
|
||||
result[arr[i]] = arr[i+1]
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if len(arr) <= i+1 {
|
||||
// notify users affected by #112
|
||||
// TODO remove check in the future to avoid misleading message
|
||||
// if a similar issue shows up
|
||||
panic("Internal cache corrupt, see: https://github.com/omniscale/imposm3/issues/122")
|
||||
}
|
||||
result[arr[i]] = arr[i+1]
|
||||
i++
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
@ -95,21 +111,32 @@ func tagsAsArray(tags element.Tags) []string {
|
|||
}
|
||||
result := make([]string, 0, 2*len(tags))
|
||||
for key, val := range tags {
|
||||
if valMap, ok := tagsToCodePoint[key]; ok {
|
||||
if codePoint, ok := valMap[val]; ok {
|
||||
result = append(result, string(codePoint))
|
||||
continue
|
||||
}
|
||||
}
|
||||
if codepoint, ok := commonKeys[key]; ok {
|
||||
result = append(result, string(codepoint)+val)
|
||||
continue
|
||||
}
|
||||
result = append(result, key, val)
|
||||
result = appendTag(result, key, val)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func appendTag(arr []string, key, val string) []string {
|
||||
if valMap, ok := tagsToCodePoint[key]; ok {
|
||||
if codePoint, ok := valMap[val]; ok {
|
||||
return append(arr, string(codePoint))
|
||||
}
|
||||
}
|
||||
if codepoint, ok := commonKeys[key]; ok {
|
||||
return append(arr, string(codepoint)+val)
|
||||
}
|
||||
// escape first char/rune if it is a commonKey/tagCodePoint
|
||||
if key[0] < 32 {
|
||||
key = string(escapeRune) + key
|
||||
} else if r, size := utf8.DecodeRuneInString(key); size >= 3 &&
|
||||
(codepoint(r) >= minCodePoint &&
|
||||
codepoint(r) <= maxCodePoint) ||
|
||||
(r == escapeRune) {
|
||||
key = string(escapeRune) + key
|
||||
}
|
||||
return append(arr, key, val)
|
||||
}
|
||||
|
||||
func init() {
|
||||
//
|
||||
// DO NOT EDIT, REMOVE, REORDER ANY OF THE FOLLOWING LINES!
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
package binary
|
||||
|
||||
import (
|
||||
"github.com/omniscale/imposm3/element"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/omniscale/imposm3/element"
|
||||
)
|
||||
|
||||
func TestTagsAsAndFromArray(t *testing.T) {
|
||||
|
@ -34,6 +36,73 @@ func TestTagsAsAndFromArray(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTagsArrayEncoding(t *testing.T) {
|
||||
for i, check := range []struct {
|
||||
// Test tags with string array of key/value instead of
|
||||
// element.Tags to have fixed order
|
||||
tags []string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
[]string{"name", "foo", "highway", "residential", "oneway", "yes"},
|
||||
[]string{
|
||||
"\x01foo",
|
||||
"\ue001",
|
||||
"\ue008",
|
||||
},
|
||||
},
|
||||
{ // ascii control characters are escaped
|
||||
[]string{"name", "\tfoo", "\tfoo", "bar"},
|
||||
[]string{
|
||||
"\x01\tfoo",
|
||||
"\ufffd\tfoo", "bar",
|
||||
},
|
||||
},
|
||||
{ // private unicode characters are escaped
|
||||
[]string{"\ue008foo", "bar", "\ue008", "baz"},
|
||||
[]string{
|
||||
"\ufffd\ue008foo", "bar",
|
||||
"\ufffd\ue008", "baz",
|
||||
},
|
||||
},
|
||||
{ // replacement characters (our escape token) are escaped as well
|
||||
[]string{"\ufffd\ue008foo", "bar", "\ufffd\ufffd\ufffd\ue008foo", "bar"},
|
||||
[]string{
|
||||
"\ufffd\ufffd\ue008foo", "bar",
|
||||
"\ufffd\ufffd\ufffd\ufffd\ue008foo", "bar",
|
||||
},
|
||||
},
|
||||
} {
|
||||
var actual []string
|
||||
for i := 0; i < len(check.tags); i += 2 {
|
||||
actual = appendTag(actual, check.tags[i], check.tags[i+1])
|
||||
}
|
||||
if len(check.expected) != len(actual) {
|
||||
t.Errorf("case %d: unexpected tag array %#v != %#v", i, actual, check.expected)
|
||||
continue
|
||||
}
|
||||
actualTags := tagsFromArray(actual)
|
||||
expectedTags := make(element.Tags)
|
||||
for i := 0; i < len(check.tags); i += 2 {
|
||||
expectedTags[check.tags[i]] = check.tags[i+1]
|
||||
}
|
||||
if !reflect.DeepEqual(actualTags, expectedTags) {
|
||||
t.Errorf("case %d: unexpected tags %#v != %#v", i, actualTags, expectedTags)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTagsArrayIssue122Panic(t *testing.T) {
|
||||
tagsFromArray([]string{"foo", "bar"})
|
||||
defer func() {
|
||||
p := recover()
|
||||
if p == nil {
|
||||
t.Fatal("did not panic")
|
||||
}
|
||||
}()
|
||||
tagsFromArray([]string{"foo"})
|
||||
}
|
||||
|
||||
func TestCodePoints(t *testing.T) {
|
||||
// codepoints should never change, so check a few for sanity
|
||||
if c := tagsToCodePoint["building"]["yes"]; c != codepoint('\ue000') {
|
||||
|
|
Loading…
Reference in New Issue