35 lines
1008 B
JavaScript
35 lines
1008 B
JavaScript
/**
|
|
* @flow
|
|
*/
|
|
|
|
/**
|
|
* @param {number} codeUnit A Unicode code-unit, in range [0, 0x10FFFF]
|
|
* @return {boolean} Whether code-unit is in a surrogate (hi/low) range
|
|
*/
|
|
function inSurrogateRange(codeUnit) {
|
|
return 0xD800 <= codeUnit && codeUnit <= 0xDFFF;
|
|
}
|
|
|
|
|
|
/**
|
|
* Return the length of the original Unicode character at given position in the
|
|
* String by looking into the UTF-16 code unit; that is equal to 1 for any
|
|
* non-surrogate characters in BMP ([U+0000..U+D7FF] and [U+E000, U+FFFF]); and
|
|
* returns 2 for the hi/low surrogates ([U+D800..U+DFFF]), which are in fact
|
|
* representing non-BMP characters ([U+10000..U+10FFFF]).
|
|
*
|
|
* Examples:
|
|
* - '\u0020' => 1
|
|
* - '\u3020' => 1
|
|
* - '\uD835' => 2
|
|
* - '\uD835\uDDEF' => 2
|
|
* - '\uDDEF' => 2
|
|
*
|
|
* @param {string} str Non-empty string
|
|
* @param {number} pos Position in the string to look for one code unit
|
|
* @return {number} Number 1 or 2
|
|
*/
|
|
function utf16Length(str, pos) {
|
|
return 1 + inSurrogateRange(str.charCodeAt(pos));
|
|
}
|