escape commonKey/tagCodePoint runes in key (closes #122)
parent
c312bd2229
commit
4a472f563a
|
@ -15,8 +15,9 @@ package binary
|
||||||
// etc.) are converted to a single ASCII control char (0x01-0x1f)
|
// etc.) are converted to a single ASCII control char (0x01-0x1f)
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/omniscale/imposm3/element"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/omniscale/imposm3/element"
|
||||||
)
|
)
|
||||||
|
|
||||||
type codepoint rune
|
type codepoint rune
|
||||||
|
@ -38,6 +39,8 @@ const maxCodePoint = codepoint('\uF8FF')
|
||||||
|
|
||||||
var nextCodePoint = codepoint('\uE000')
|
var nextCodePoint = codepoint('\uE000')
|
||||||
|
|
||||||
|
const escapeRune = '\ufffd' // unicode replacement char
|
||||||
|
|
||||||
func addTagCodePoint(key, value string) {
|
func addTagCodePoint(key, value string) {
|
||||||
if nextCodePoint > maxCodePoint {
|
if nextCodePoint > maxCodePoint {
|
||||||
panic("all codepoints used!")
|
panic("all codepoints used!")
|
||||||
|
@ -71,20 +74,33 @@ func tagsFromArray(arr []string) element.Tags {
|
||||||
}
|
}
|
||||||
result := make(element.Tags)
|
result := make(element.Tags)
|
||||||
for i := 0; i < len(arr); i += 1 {
|
for i := 0; i < len(arr); i += 1 {
|
||||||
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 &&
|
if r, size := utf8.DecodeRuneInString(arr[i]); size >= 3 {
|
||||||
codepoint(r) >= minCodePoint &&
|
if r == escapeRune {
|
||||||
codepoint(r) < nextCodePoint {
|
// remove escape rune
|
||||||
tag, ok := codePointToTag[codepoint(r)]
|
result[arr[i][size:]] = arr[i+1]
|
||||||
if !ok {
|
i++
|
||||||
panic("missing tag for codepoint")
|
continue
|
||||||
|
} else if codepoint(r) >= minCodePoint &&
|
||||||
|
codepoint(r) < nextCodePoint {
|
||||||
|
tag, ok := codePointToTag[codepoint(r)]
|
||||||
|
if !ok {
|
||||||
|
panic("missing tag for codepoint")
|
||||||
|
}
|
||||||
|
result[tag.Key] = tag.Value
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
result[tag.Key] = tag.Value
|
|
||||||
} else if len(arr[i]) > 0 && arr[i][0] < 32 {
|
} else if len(arr[i]) > 0 && arr[i][0] < 32 {
|
||||||
result[codePointToCommonKey[arr[i][0]]] = arr[i][1:]
|
result[codePointToCommonKey[arr[i][0]]] = arr[i][1:]
|
||||||
} else {
|
continue
|
||||||
result[arr[i]] = arr[i+1]
|
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
|
if len(arr) <= i+1 {
|
||||||
|
// notify users affected by #112
|
||||||
|
// TODO remove check in the future to avoid misleading message
|
||||||
|
// if a similar issue shows up
|
||||||
|
panic("Internal cache corrupt, see: https://github.com/omniscale/imposm3/issues/122")
|
||||||
|
}
|
||||||
|
result[arr[i]] = arr[i+1]
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
@ -95,21 +111,32 @@ func tagsAsArray(tags element.Tags) []string {
|
||||||
}
|
}
|
||||||
result := make([]string, 0, 2*len(tags))
|
result := make([]string, 0, 2*len(tags))
|
||||||
for key, val := range tags {
|
for key, val := range tags {
|
||||||
if valMap, ok := tagsToCodePoint[key]; ok {
|
result = appendTag(result, key, val)
|
||||||
if codePoint, ok := valMap[val]; ok {
|
|
||||||
result = append(result, string(codePoint))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if codepoint, ok := commonKeys[key]; ok {
|
|
||||||
result = append(result, string(codepoint)+val)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
result = append(result, key, val)
|
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func appendTag(arr []string, key, val string) []string {
|
||||||
|
if valMap, ok := tagsToCodePoint[key]; ok {
|
||||||
|
if codePoint, ok := valMap[val]; ok {
|
||||||
|
return append(arr, string(codePoint))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if codepoint, ok := commonKeys[key]; ok {
|
||||||
|
return append(arr, string(codepoint)+val)
|
||||||
|
}
|
||||||
|
// escape first char/rune if it is a commonKey/tagCodePoint
|
||||||
|
if key[0] < 32 {
|
||||||
|
key = string(escapeRune) + key
|
||||||
|
} else if r, size := utf8.DecodeRuneInString(key); size >= 3 &&
|
||||||
|
(codepoint(r) >= minCodePoint &&
|
||||||
|
codepoint(r) <= maxCodePoint) ||
|
||||||
|
(r == escapeRune) {
|
||||||
|
key = string(escapeRune) + key
|
||||||
|
}
|
||||||
|
return append(arr, key, val)
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
//
|
//
|
||||||
// DO NOT EDIT, REMOVE, REORDER ANY OF THE FOLLOWING LINES!
|
// DO NOT EDIT, REMOVE, REORDER ANY OF THE FOLLOWING LINES!
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
package binary
|
package binary
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/omniscale/imposm3/element"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/omniscale/imposm3/element"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestTagsAsAndFromArray(t *testing.T) {
|
func TestTagsAsAndFromArray(t *testing.T) {
|
||||||
|
@ -34,6 +36,73 @@ func TestTagsAsAndFromArray(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTagsArrayEncoding(t *testing.T) {
|
||||||
|
for i, check := range []struct {
|
||||||
|
// Test tags with string array of key/value instead of
|
||||||
|
// element.Tags to have fixed order
|
||||||
|
tags []string
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
[]string{"name", "foo", "highway", "residential", "oneway", "yes"},
|
||||||
|
[]string{
|
||||||
|
"\x01foo",
|
||||||
|
"\ue001",
|
||||||
|
"\ue008",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ // ascii control characters are escaped
|
||||||
|
[]string{"name", "\tfoo", "\tfoo", "bar"},
|
||||||
|
[]string{
|
||||||
|
"\x01\tfoo",
|
||||||
|
"\ufffd\tfoo", "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ // private unicode characters are escaped
|
||||||
|
[]string{"\ue008foo", "bar", "\ue008", "baz"},
|
||||||
|
[]string{
|
||||||
|
"\ufffd\ue008foo", "bar",
|
||||||
|
"\ufffd\ue008", "baz",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ // replacement characters (our escape token) are escaped as well
|
||||||
|
[]string{"\ufffd\ue008foo", "bar", "\ufffd\ufffd\ufffd\ue008foo", "bar"},
|
||||||
|
[]string{
|
||||||
|
"\ufffd\ufffd\ue008foo", "bar",
|
||||||
|
"\ufffd\ufffd\ufffd\ufffd\ue008foo", "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
var actual []string
|
||||||
|
for i := 0; i < len(check.tags); i += 2 {
|
||||||
|
actual = appendTag(actual, check.tags[i], check.tags[i+1])
|
||||||
|
}
|
||||||
|
if len(check.expected) != len(actual) {
|
||||||
|
t.Errorf("case %d: unexpected tag array %#v != %#v", i, actual, check.expected)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
actualTags := tagsFromArray(actual)
|
||||||
|
expectedTags := make(element.Tags)
|
||||||
|
for i := 0; i < len(check.tags); i += 2 {
|
||||||
|
expectedTags[check.tags[i]] = check.tags[i+1]
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(actualTags, expectedTags) {
|
||||||
|
t.Errorf("case %d: unexpected tags %#v != %#v", i, actualTags, expectedTags)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTagsArrayIssue122Panic(t *testing.T) {
|
||||||
|
tagsFromArray([]string{"foo", "bar"})
|
||||||
|
defer func() {
|
||||||
|
p := recover()
|
||||||
|
if p == nil {
|
||||||
|
t.Fatal("did not panic")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
tagsFromArray([]string{"foo"})
|
||||||
|
}
|
||||||
|
|
||||||
func TestCodePoints(t *testing.T) {
|
func TestCodePoints(t *testing.T) {
|
||||||
// codepoints should never change, so check a few for sanity
|
// codepoints should never change, so check a few for sanity
|
||||||
if c := tagsToCodePoint["building"]["yes"]; c != codepoint('\ue000') {
|
if c := tagsToCodePoint["building"]["yes"]; c != codepoint('\ue000') {
|
||||||
|
|
Loading…
Reference in New Issue