commit
261aa31dc5
1
go.mod
1
go.mod
|
@ -42,6 +42,7 @@ require (
|
|||
go.uber.org/zap v1.15.0
|
||||
golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc
|
||||
golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7
|
||||
golang.org/x/text v0.3.3 // indirect
|
||||
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2
|
||||
google.golang.org/grpc v1.26.0
|
||||
gopkg.in/cheggaaa/pb.v1 v1.0.25
|
||||
|
|
2
go.sum
2
go.sum
|
@ -203,6 +203,8 @@ golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfru
|
|||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM=
|
||||
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
|
|
@ -78,8 +78,8 @@ type SpanningTransformer interface {
|
|||
// considering the error err.
|
||||
//
|
||||
// A nil error means that all input bytes are known to be identical to the
|
||||
// output produced by the Transformer. A nil error can be be returned
|
||||
// regardless of whether atEOF is true. If err is nil, then then n must
|
||||
// output produced by the Transformer. A nil error can be returned
|
||||
// regardless of whether atEOF is true. If err is nil, then n must
|
||||
// equal len(src); the converse is not necessarily true.
|
||||
//
|
||||
// ErrEndOfSpan means that the Transformer output may differ from the
|
||||
|
@ -493,7 +493,7 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
|
|||
return dstL.n, srcL.p, err
|
||||
}
|
||||
|
||||
// Deprecated: use runes.Remove instead.
|
||||
// Deprecated: Use runes.Remove instead.
|
||||
func RemoveFunc(f func(r rune) bool) Transformer {
|
||||
return removeF(f)
|
||||
}
|
||||
|
@ -648,7 +648,8 @@ func String(t Transformer, s string) (result string, n int, err error) {
|
|||
// Transform the remaining input, growing dst and src buffers as necessary.
|
||||
for {
|
||||
n := copy(src, s[pSrc:])
|
||||
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
|
||||
atEOF := pSrc+n == len(s)
|
||||
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], atEOF)
|
||||
pDst += nDst
|
||||
pSrc += nSrc
|
||||
|
||||
|
@ -659,6 +660,9 @@ func String(t Transformer, s string) (result string, n int, err error) {
|
|||
dst = grow(dst, pDst)
|
||||
}
|
||||
} else if err == ErrShortSrc {
|
||||
if atEOF {
|
||||
return string(dst[:pDst]), pSrc, err
|
||||
}
|
||||
if nSrc == 0 {
|
||||
src = grow(src, 0)
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
// Package bidi contains functionality for bidirectional text support.
|
||||
//
|
||||
// See http://www.unicode.org/reports/tr9.
|
||||
// See https://www.unicode.org/reports/tr9.
|
||||
//
|
||||
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
|
||||
// and without notice.
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
|
||||
// This file contains a port of the reference implementation of the
|
||||
// Bidi Parentheses Algorithm:
|
||||
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
|
||||
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
|
||||
//
|
||||
// The implementation in this file covers definitions BD14-BD16 and rule N0
|
||||
// of UAX#9.
|
||||
|
@ -246,7 +246,7 @@ func (p *bracketPairer) getStrongTypeN0(index int) Class {
|
|||
// assuming the given embedding direction.
|
||||
//
|
||||
// It returns ON if no strong type is found. If a single strong type is found,
|
||||
// it returns this this type. Otherwise it returns the embedding direction.
|
||||
// it returns this type. Otherwise it returns the embedding direction.
|
||||
//
|
||||
// TODO: use separate type for "strong" directionality.
|
||||
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
|
||||
|
|
|
@ -7,7 +7,7 @@ package bidi
|
|||
import "log"
|
||||
|
||||
// This implementation is a port based on the reference implementation found at:
|
||||
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
|
||||
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
|
||||
//
|
||||
// described in Unicode Bidirectional Algorithm (UAX #9).
|
||||
//
|
||||
|
@ -480,15 +480,15 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
|
|||
|
||||
// Rule W1.
|
||||
// Changes all NSMs.
|
||||
preceedingCharacterType := s.sos
|
||||
precedingCharacterType := s.sos
|
||||
for i, t := range s.types {
|
||||
if t == NSM {
|
||||
s.types[i] = preceedingCharacterType
|
||||
s.types[i] = precedingCharacterType
|
||||
} else {
|
||||
if t.in(LRI, RLI, FSI, PDI) {
|
||||
preceedingCharacterType = ON
|
||||
precedingCharacterType = ON
|
||||
}
|
||||
preceedingCharacterType = t
|
||||
precedingCharacterType = t
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
// +build go1.10
|
||||
// +build go1.10,!go1.13
|
||||
|
||||
package bidi
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -407,7 +407,7 @@ func decomposeHangul(buf []byte, r rune) int {
|
|||
|
||||
// decomposeHangul algorithmically decomposes a Hangul rune into
|
||||
// its Jamo components.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||
func (rb *reorderBuffer) decomposeHangul(r rune) {
|
||||
r -= hangulBase
|
||||
x := r % jamoTCount
|
||||
|
@ -420,7 +420,7 @@ func (rb *reorderBuffer) decomposeHangul(r rune) {
|
|||
}
|
||||
|
||||
// combineHangul algorithmically combines Jamo character components into Hangul.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
|
||||
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
|
||||
func (rb *reorderBuffer) combineHangul(s, i, k int) {
|
||||
b := rb.rune[:]
|
||||
bn := rb.nrune
|
||||
|
@ -461,6 +461,10 @@ func (rb *reorderBuffer) combineHangul(s, i, k int) {
|
|||
// It should only be used to recompose a single segment, as it will not
|
||||
// handle alternations between Hangul and non-Hangul characters correctly.
|
||||
func (rb *reorderBuffer) compose() {
|
||||
// Lazily load the map used by the combine func below, but do
|
||||
// it outside of the loop.
|
||||
recompMapOnce.Do(buildRecompMap)
|
||||
|
||||
// UAX #15, section X5 , including Corrigendum #5
|
||||
// "In any character sequence beginning with starter S, a character C is
|
||||
// blocked from S if and only if there is some character B between S
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package norm
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
// This file contains Form-specific logic and wrappers for data in tables.go.
|
||||
|
||||
// Rune info is stored in a separate trie per composing form. A composing form
|
||||
|
@ -178,6 +180,17 @@ func (p Properties) TrailCCC() uint8 {
|
|||
return ccc[p.tccc]
|
||||
}
|
||||
|
||||
func buildRecompMap() {
|
||||
recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
|
||||
var buf [8]byte
|
||||
for i := 0; i < len(recompMapPacked); i += 8 {
|
||||
copy(buf[:], recompMapPacked[i:i+8])
|
||||
key := binary.BigEndian.Uint32(buf[:4])
|
||||
val := binary.BigEndian.Uint32(buf[4:])
|
||||
recompMap[key] = rune(val)
|
||||
}
|
||||
}
|
||||
|
||||
// Recomposition
|
||||
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
|
||||
// This clips off the bits of three entries, but we know this will not
|
||||
|
@ -186,8 +199,14 @@ func (p Properties) TrailCCC() uint8 {
|
|||
// Note that the recomposition map for NFC and NFKC are identical.
|
||||
|
||||
// combine returns the combined rune or 0 if it doesn't exist.
|
||||
//
|
||||
// The caller is responsible for calling
|
||||
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
|
||||
func combine(a, b rune) rune {
|
||||
key := uint32(uint16(a))<<16 + uint32(uint16(b))
|
||||
if recompMap == nil {
|
||||
panic("caller error") // see func comment
|
||||
}
|
||||
return recompMap[key]
|
||||
}
|
||||
|
||||
|
|
|
@ -128,8 +128,9 @@ func (i *Iter) Next() []byte {
|
|||
func nextASCIIBytes(i *Iter) []byte {
|
||||
p := i.p + 1
|
||||
if p >= i.rb.nsrc {
|
||||
p0 := i.p
|
||||
i.setDone()
|
||||
return i.rb.src.bytes[i.p:p]
|
||||
return i.rb.src.bytes[p0:p]
|
||||
}
|
||||
if i.rb.src.bytes[p] < utf8.RuneSelf {
|
||||
p0 := i.p
|
||||
|
|
|
@ -29,8 +29,8 @@ import (
|
|||
// proceed independently on both sides:
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// References: http://unicode.org/reports/tr15/ and
|
||||
// http://unicode.org/notes/tn5/.
|
||||
// References: https://unicode.org/reports/tr15/ and
|
||||
// https://unicode.org/notes/tn5/.
|
||||
type Form int
|
||||
|
||||
const (
|
||||
|
|
|
@ -60,8 +60,8 @@ func (w *normWriter) Close() error {
|
|||
}
|
||||
|
||||
// Writer returns a new writer that implements Write(b)
|
||||
// by writing f(b) to w. The returned writer may use an
|
||||
// an internal buffer to maintain state across Write calls.
|
||||
// by writing f(b) to w. The returned writer may use an
|
||||
// internal buffer to maintain state across Write calls.
|
||||
// Calling its Close method writes any buffered data to w.
|
||||
func (f Form) Writer(w io.Writer) io.WriteCloser {
|
||||
wr := &normWriter{rb: reorderBuffer{}, w: w}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -18,7 +18,6 @@ func (Form) Reset() {}
|
|||
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
|
||||
// least of size MaxTransformChunkSize to be guaranteed of progress.
|
||||
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
n := 0
|
||||
// Cap the maximum number of src bytes to check.
|
||||
b := src
|
||||
eof := atEOF
|
||||
|
@ -27,13 +26,14 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
|
|||
eof = false
|
||||
b = b[:ns]
|
||||
}
|
||||
i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
|
||||
n += copy(dst[n:], b[n:i])
|
||||
i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof)
|
||||
n := copy(dst, b[:i])
|
||||
if !ok {
|
||||
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
|
||||
return nDst + n, nSrc + n, err
|
||||
}
|
||||
if n < len(src) && !atEOF {
|
||||
|
||||
if err == nil && n < len(src) && !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return n, n, err
|
||||
|
@ -79,7 +79,7 @@ func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
|
|||
nSrc += n
|
||||
nDst += n
|
||||
if ok {
|
||||
if n < rb.nsrc && !atEOF {
|
||||
if err == nil && n < rb.nsrc && !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
|
|
|
@ -164,7 +164,8 @@ golang.org/x/net/trace
|
|||
# golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5
|
||||
golang.org/x/sys/unix
|
||||
golang.org/x/sys/windows
|
||||
# golang.org/x/text v0.3.0
|
||||
# golang.org/x/text v0.3.3
|
||||
## explicit
|
||||
golang.org/x/text/secure/bidirule
|
||||
golang.org/x/text/transform
|
||||
golang.org/x/text/unicode/bidi
|
||||
|
|
Loading…
Reference in New Issue