refactor(markdown): expose `hasPunctuation` to AST for better debugging (#3272)

* refactor(markdown): extract `punctuationRegex` * refactor: expose `has(Leading|Trailing)Punctuaion` to AST for better debugging * refactor: tweak * refactor: tweak * fix: no regression * test: fix typo
2017-11-16 13:59:01 +08:00 · 2017-11-16 13:59:01 +08:00 · 378cfee508
parent 2332c5c8bf
commit 378cfee508
4 changed files with 133 additions and 20 deletions
--- a/src/printer-markdown.js
+++ b/src/printer-markdown.js
@ -11,7 +11,6 @@ const fill = docBuilders.fill;
 const align = docBuilders.align;
 const docPrinter = require("./doc-printer");
 const printDocToString = docPrinter.printDocToString;
-const punctuationCharRange = util.punctuationCharRange;

 const SINGLE_LINE_NODE_TYPES = [
  "heading",
@ -82,8 +81,8 @@ function genericPrint(path, options, print) {
        .replace(
          new RegExp(
            [
-              `(^|[${punctuationCharRange}])(_+)`,
-              `(_+)([${punctuationCharRange}]|$)`
+              `(^|[${util.punctuationCharRange}])(_+)`,
+              `(_+)([${util.punctuationCharRange}]|$)`
            ].join("|"),
            "g"
          ),
@ -114,17 +113,13 @@ function genericPrint(path, options, print) {
        (prevNode &&
          prevNode.type === "sentence" &&
          prevNode.children.length > 0 &&
-          prevNode.children[prevNode.children.length - 1].type === "word" &&
-          new RegExp(`[^${punctuationCharRange}]$`).test(
-            prevNode.children[prevNode.children.length - 1].value
-          )) ||
+          util.getLast(prevNode.children).type === "word" &&
+          !util.getLast(prevNode.children).hasTrailingPunctuation) ||
        (nextNode &&
          nextNode.type === "sentence" &&
          nextNode.children.length > 0 &&
          nextNode.children[0].type === "word" &&
-          new RegExp(`^[^${punctuationCharRange}]`).test(
-            nextNode.children[0].value
-          ));
+          !nextNode.children[0].hasLeadingPunctuation);
      const style =
        hasPrevOrNextWord || getAncestorNode(path, "emphasis") ? "*" : "_";
      return concat([style, printChildren(path, options, print), style]);
@ -211,7 +206,7 @@ function genericPrint(path, options, print) {
    case "html": {
      const parentNode = path.getParentNode();
      return parentNode.type === "root" &&
-        parentNode.children[parentNode.children.length - 1] === node
+        util.getLast(parentNode.children) === node
        ? node.value.trimRight()
        : node.value;
    }
@ -618,7 +613,7 @@ function printTitle(title) {

 function normalizeParts(parts) {
  return parts.reduce((current, part) => {
-    const lastPart = current[current.length - 1];
+    const lastPart = util.getLast(current);

    if (typeof lastPart === "string" && typeof part === "string") {
      current.splice(-1, 1, lastPart + part);
--- a/src/util.js
+++ b/src/util.js
@ -24,6 +24,8 @@ const punctuationCharRange = `${asciiPunctuationCharRange}${getUnicodeRegex([
  "Ps"
 ]).source.slice(1, -1)}`; // remove bracket expression `[` and `]`

+const punctuationRegex = new RegExp(`[${punctuationCharRange}]`);
+
 function isExportDeclaration(node) {
  if (node) {
    switch (node.type) {
@ -712,32 +714,49 @@ function splitText(text) {
              appendNode({
                type: "word",
                value: innerToken,
-                kind: KIND_NON_CJK
+                kind: KIND_NON_CJK,
+                hasLeadingPunctuation: punctuationRegex.test(innerToken[0]),
+                hasTrailingPunctuation: punctuationRegex.test(
+                  getLast(innerToken)
+                )
              });
            }
            return;
          }

          // CJK character
-          const kind = new RegExp(`[${punctuationCharRange}]`).test(innerToken)
-            ? KIND_CJK_PUNCTUATION
-            : KIND_CJK_CHARACTER;
-          appendNode({ type: "word", value: innerToken, kind });
+          appendNode(
+            punctuationRegex.test(innerToken)
+              ? {
+                  type: "word",
+                  value: innerToken,
+                  kind: KIND_CJK_PUNCTUATION,
+                  hasLeadingPunctuation: true,
+                  hasTrailingPunctuation: true
+                }
+              : {
+                  type: "word",
+                  value: innerToken,
+                  kind: KIND_CJK_CHARACTER,
+                  hasLeadingPunctuation: false,
+                  hasTrailingPunctuation: false
+                }
+          );
        });
    });

  return nodes;

  function appendNode(node) {
-    const lastNode = nodes[nodes.length - 1];
+    const lastNode = getLast(nodes);
    if (lastNode && lastNode.type === "word") {
      if (
        (lastNode.kind === KIND_NON_CJK &&
          node.kind === KIND_CJK_CHARACTER &&
-          !new RegExp(`[${punctuationCharRange}]$`).test(lastNode.value)) ||
+          !lastNode.hasTrailingPunctuation) ||
        (lastNode.kind === KIND_CJK_CHARACTER &&
          node.kind === KIND_NON_CJK &&
-          !new RegExp(`^[${punctuationCharRange}]`).test(node.value))
+          !node.hasLeadingPunctuation)
      ) {
        nodes.push({ type: "whitespace", value: " " });
      } else if (
@ -771,6 +790,7 @@ function getStringWidth(text) {
 }

 module.exports = {
+  punctuationRegex,
  punctuationCharRange,
  getStringWidth,
  splitText,
--- a/tests/markdown_emphasis/snapshots/jsfmt.spec.js.snap
+++ b/tests/markdown_emphasis/snapshots/jsfmt.spec.js.snap
@ -18,6 +18,73 @@ _bug_?

 `;

+exports[`special.md 1`] = `
+0*1*2
+
+!*1*2
+
+0*1*!
+
+!*1*!
+
+0*1*2
+
+！*1*2
+
+0*1*！
+
+！*1*！
+
+0_1_2
+
+!_1_2
+
+0_1_! <!-- remark-misparsing, should be formatted as \`0_1\\_!\` -->
+
+!_1_!
+
+0_1_2
+
+！_1_2
+
+0_1_！ <!-- remark-misparsing, should be formatted as \`0_1\\_！\` -->
+
+！_1_！
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+0*1*2
+
+!*1*2
+
+0*1*!
+
+!_1_!
+
+0*1*2
+
+！*1*2
+
+0*1*！
+
+！_1_！
+
+0_1_2
+
+!\\_1_2
+
+0*1*! <!-- remark-misparsing, should be formatted as \`0_1\\_!\` -->
+
+!_1_!
+
+0_1_2
+
+！\\_1_2
+
+0*1*！ <!-- remark-misparsing, should be formatted as \`0_1\\_！\` -->
+
+！_1_！
+
+`;
+
 exports[`underscore.md 1`] = `
 _123_
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/tests/markdown_emphasis/special.md
+++ b/tests/markdown_emphasis/special.md
@ -0,0 +1,31 @@
+0*1*2
+
+!*1*2
+
+0*1*!
+
+!*1*!
+
+0*1*2
+
+！*1*2
+
+0*1*！
+
+！*1*！
+
+0_1_2
+
+!_1_2
+
+0_1_! <!-- remark-misparsing, should be formatted as `0_1\_!` -->
+
+!_1_!
+
+0_1_2
+
+！_1_2
+
+0_1_！ <!-- remark-misparsing, should be formatted as `0_1\_！` -->
+
+！_1_！