fix(markdown): handle punctuation variants (#3254)
* fix(markdown): handle punctuation variants * docs: add commentmaster
parent
ece764a049
commit
d08df0b221
|
@ -49,6 +49,7 @@
|
|||
"strip-bom": "3.0.0",
|
||||
"typescript": "2.5.3",
|
||||
"typescript-eslint-parser": "git://github.com/eslint/typescript-eslint-parser.git#9c71a627da36e97da52ed2731d58509c952b67ae",
|
||||
"unicode-regex": "1.0.1",
|
||||
"unified": "6.1.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
@ -11,13 +11,7 @@ const fill = docBuilders.fill;
|
|||
const align = docBuilders.align;
|
||||
const docPrinter = require("./doc-printer");
|
||||
const printDocToString = docPrinter.printDocToString;
|
||||
const asciiPunctuationPattern = util.asciiPunctuationPattern;
|
||||
const getCjkRegex = require("cjk-regex");
|
||||
|
||||
const punctuationPattern = `(?:${[
|
||||
getCjkRegex.punctuations().source,
|
||||
`[${asciiPunctuationPattern}]`
|
||||
].join("|")})`;
|
||||
const punctuationCharRange = util.punctuationCharRange;
|
||||
|
||||
const SINGLE_LINE_NODE_TYPES = [
|
||||
"heading",
|
||||
|
@ -87,7 +81,10 @@ function genericPrint(path, options, print) {
|
|||
.replace(/[*]/g, "\\*") // escape all `*`
|
||||
.replace(
|
||||
new RegExp(
|
||||
`(^|${punctuationPattern})(_+)|(_+)(${punctuationPattern}|$)`,
|
||||
[
|
||||
`(^|[${punctuationCharRange}])(_+)`,
|
||||
`(_+)([${punctuationCharRange}]|$)`
|
||||
].join("|"),
|
||||
"g"
|
||||
),
|
||||
(_, text1, underscore1, underscore2, text2) =>
|
||||
|
@ -118,14 +115,14 @@ function genericPrint(path, options, print) {
|
|||
prevNode.type === "sentence" &&
|
||||
prevNode.children.length > 0 &&
|
||||
prevNode.children[prevNode.children.length - 1].type === "word" &&
|
||||
new RegExp(`[^${asciiPunctuationPattern}]$`).test(
|
||||
new RegExp(`[^${punctuationCharRange}]$`).test(
|
||||
prevNode.children[prevNode.children.length - 1].value
|
||||
)) ||
|
||||
(nextNode &&
|
||||
nextNode.type === "sentence" &&
|
||||
nextNode.children.length > 0 &&
|
||||
nextNode.children[0].type === "word" &&
|
||||
new RegExp(`^[^${asciiPunctuationPattern}]`).test(
|
||||
new RegExp(`^[^${punctuationCharRange}]`).test(
|
||||
nextNode.children[0].value
|
||||
));
|
||||
const style =
|
||||
|
|
35
src/util.js
35
src/util.js
|
@ -3,19 +3,27 @@
|
|||
const stringWidth = require("string-width");
|
||||
const emojiRegex = require("emoji-regex")();
|
||||
const escapeStringRegexp = require("escape-string-regexp");
|
||||
|
||||
const getCjkRegex = require("cjk-regex");
|
||||
const cjkRegex = getCjkRegex();
|
||||
const getUnicodeRegex = require("unicode-regex");
|
||||
|
||||
// the `g` flag is dangerous in RegExp#test()
|
||||
// https://stackoverflow.com/a/21373261
|
||||
const cjkPunctuationRegex = new RegExp(getCjkRegex.punctuations().source, "");
|
||||
const cjkPattern = getCjkRegex().source;
|
||||
|
||||
// http://spec.commonmark.org/0.25/#ascii-punctuation-character
|
||||
const asciiPunctuationPattern = escapeStringRegexp(
|
||||
const asciiPunctuationCharRange = escapeStringRegexp(
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
);
|
||||
|
||||
// http://spec.commonmark.org/0.25/#punctuation-character
|
||||
const punctuationCharRange = `${asciiPunctuationCharRange}${getUnicodeRegex([
|
||||
"Pc",
|
||||
"Pd",
|
||||
"Pe",
|
||||
"Pf",
|
||||
"Pi",
|
||||
"Po",
|
||||
"Ps"
|
||||
]).source.slice(1, -1)}`; // remove bracket expression `[` and `]`
|
||||
|
||||
function isExportDeclaration(node) {
|
||||
if (node) {
|
||||
switch (node.type) {
|
||||
|
@ -672,10 +680,7 @@ function splitText(text) {
|
|||
const nodes = [];
|
||||
|
||||
text
|
||||
.replace(
|
||||
new RegExp(`(${cjkRegex.source})\n(${cjkRegex.source})`, "g"),
|
||||
"$1$2"
|
||||
)
|
||||
.replace(new RegExp(`(${cjkPattern})\n(${cjkPattern})`, "g"), "$1$2")
|
||||
// `\s` but exclude full-width whitspace (`\u3000`)
|
||||
.split(/([^\S\u3000]+)/)
|
||||
.forEach((token, index, tokens) => {
|
||||
|
@ -692,7 +697,7 @@ function splitText(text) {
|
|||
}
|
||||
|
||||
token
|
||||
.split(new RegExp(`(${cjkRegex.source})`))
|
||||
.split(new RegExp(`(${cjkPattern})`))
|
||||
.forEach((innerToken, innerIndex, innerTokens) => {
|
||||
if (
|
||||
(innerIndex === 0 || innerIndex === innerTokens.length - 1) &&
|
||||
|
@ -714,7 +719,7 @@ function splitText(text) {
|
|||
}
|
||||
|
||||
// CJK character
|
||||
const kind = cjkPunctuationRegex.test(innerToken)
|
||||
const kind = new RegExp(`[${punctuationCharRange}]`).test(innerToken)
|
||||
? KIND_CJK_PUNCTUATION
|
||||
: KIND_CJK_CHARACTER;
|
||||
appendNode({ type: "word", value: innerToken, kind });
|
||||
|
@ -729,10 +734,10 @@ function splitText(text) {
|
|||
if (
|
||||
(lastNode.kind === KIND_NON_CJK &&
|
||||
node.kind === KIND_CJK_CHARACTER &&
|
||||
!new RegExp(`[${asciiPunctuationPattern}]$`).test(lastNode.value)) ||
|
||||
!new RegExp(`[${punctuationCharRange}]$`).test(lastNode.value)) ||
|
||||
(lastNode.kind === KIND_CJK_CHARACTER &&
|
||||
node.kind === KIND_NON_CJK &&
|
||||
!new RegExp(`^[${asciiPunctuationPattern}]`).test(node.value))
|
||||
!new RegExp(`^[${punctuationCharRange}]`).test(node.value))
|
||||
) {
|
||||
nodes.push({ type: "whitespace", value: " " });
|
||||
} else if (
|
||||
|
@ -766,7 +771,7 @@ function getStringWidth(text) {
|
|||
}
|
||||
|
||||
module.exports = {
|
||||
asciiPunctuationPattern,
|
||||
punctuationCharRange,
|
||||
getStringWidth,
|
||||
splitText,
|
||||
mapDoc,
|
||||
|
|
|
@ -9,6 +9,8 @@ exports[`cjk.md 1`] = `
|
|||
|
||||
This ia an english paragraph with a CJK quote "中文".
|
||||
|
||||
This ia an english paragraph with a CJK quote “中文“.
|
||||
|
||||
扩展运算符(spread)是三个点(\`...\`)。
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
這是一段很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長
|
||||
|
@ -30,6 +32,8 @@ English 混合著中文的一段 Paragraph!
|
|||
|
||||
This ia an english paragraph with a CJK quote "中文".
|
||||
|
||||
This ia an english paragraph with a CJK quote “中文“.
|
||||
|
||||
扩展运算符(spread)是三个点(\`...\`)。
|
||||
|
||||
`;
|
||||
|
@ -43,6 +47,8 @@ exports[`cjk.md 2`] = `
|
|||
|
||||
This ia an english paragraph with a CJK quote "中文".
|
||||
|
||||
This ia an english paragraph with a CJK quote “中文“.
|
||||
|
||||
扩展运算符(spread)是三个点(\`...\`)。
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
這是一段很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長的段落
|
||||
|
@ -53,6 +59,8 @@ This ia an english paragraph with a CJK quote "中文".
|
|||
|
||||
This ia an english paragraph with a CJK quote "中文".
|
||||
|
||||
This ia an english paragraph with a CJK quote “中文“.
|
||||
|
||||
扩展运算符(spread)是三个点(\`...\`)。
|
||||
|
||||
`;
|
||||
|
|
|
@ -6,4 +6,6 @@
|
|||
|
||||
This ia an english paragraph with a CJK quote "中文".
|
||||
|
||||
This ia an english paragraph with a CJK quote “中文“.
|
||||
|
||||
扩展运算符(spread)是三个点(`...`)。
|
||||
|
|
|
@ -4279,6 +4279,10 @@ unherit@^1.0.4:
|
|||
inherits "^2.0.1"
|
||||
xtend "^4.0.1"
|
||||
|
||||
unicode-regex@1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/unicode-regex/-/unicode-regex-1.0.1.tgz#f819e050191d5b9561a339a58dd3b9095ed94b35"
|
||||
|
||||
unified@6.1.5:
|
||||
version "6.1.5"
|
||||
resolved "https://registry.yarnpkg.com/unified/-/unified-6.1.5.tgz#716937872621a63135e62ced2f3ac6a063c6fb87"
|
||||
|
|
Loading…
Reference in New Issue