/** * @flow */ /** * @param {number} codeUnit A Unicode code-unit, in range [0, 0x10FFFF] * @return {boolean} Whether code-unit is in a surrogate (hi/low) range */ function inSurrogateRange(codeUnit) { return 0xD800 <= codeUnit && codeUnit <= 0xDFFF; } /** * Return the length of the original Unicode character at given position in the * String by looking into the UTF-16 code unit; that is equal to 1 for any * non-surrogate characters in BMP ([U+0000..U+D7FF] and [U+E000, U+FFFF]); and * returns 2 for the hi/low surrogates ([U+D800..U+DFFF]), which are in fact * representing non-BMP characters ([U+10000..U+10FFFF]). * * Examples: * - '\u0020' => 1 * - '\u3020' => 1 * - '\uD835' => 2 * - '\uD835\uDDEF' => 2 * - '\uDDEF' => 2 * * @param {string} str Non-empty string * @param {number} pos Position in the string to look for one code unit * @return {number} Number 1 or 2 */ function utf16Length(str, pos) { return 1 + inSurrogateRange(str.charCodeAt(pos)); }