From 50c9115632e8b90088e514bc63090e07e9b49b4e Mon Sep 17 00:00:00 2001 From: Ika Date: Mon, 3 Sep 2018 23:27:50 +0800 Subject: [PATCH] feat: add printer.preprocess (#5041) Sometimes we need to transform the ast to make it easier to print, but it's currently done in the parser (markdown and yaml), which leads the output ast harder to use for external users (custom parser), adding `printer.preprocess` can solve this issue. And also this way we could move the trailing newline for json from `ast-to-doc.js` to `language-js`. --- docs/plugins.md | 6 + src/language-js/preprocess.js | 17 ++ src/language-js/printer-estree-json.js | 4 + src/language-js/printer-estree.js | 4 + src/language-markdown/mdx.js | 2 +- src/language-markdown/parser-markdown.js | 284 +--------------------- src/language-markdown/preprocess.js | 277 +++++++++++++++++++++ src/language-markdown/printer-markdown.js | 16 +- src/language-markdown/utils.js | 16 ++ src/language-yaml/parser-yaml.js | 28 +-- src/language-yaml/printer-yaml.js | 31 ++- src/main/ast-to-doc.js | 17 +- 12 files changed, 378 insertions(+), 324 deletions(-) create mode 100644 src/language-js/preprocess.js create mode 100644 src/language-markdown/preprocess.js diff --git a/docs/plugins.md b/docs/plugins.md index e9d58100..0e522c97 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -182,6 +182,12 @@ A plugin can implement how a pragma comment is inserted in the resulting code wh function insertPragma(text: string): string; ``` +_(Optional)_ The preprocess function can process the ast from parser before passing into `print` function. + +```ts +function preprocess(ast: AST, options: object): AST; +``` + ### `options` `options` is an object containing the custom options your plugin supports. diff --git a/src/language-js/preprocess.js b/src/language-js/preprocess.js new file mode 100644 index 00000000..3548cd85 --- /dev/null +++ b/src/language-js/preprocess.js @@ -0,0 +1,17 @@ +"use strict"; + +function preprocess(ast, options) { + switch (options.parser) { + case "json": + case "json5": + case "json-stringify": + return Object.assign({}, ast, { + type: "JsonRoot", + node: Object.assign({}, ast, { comments: [] }) + }); + default: + return ast; + } +} + +module.exports = preprocess; diff --git a/src/language-js/printer-estree-json.js b/src/language-js/printer-estree-json.js index 55f58a41..5f82f15d 100644 --- a/src/language-js/printer-estree-json.js +++ b/src/language-js/printer-estree-json.js @@ -1,10 +1,13 @@ "use strict"; const { concat, hardline, indent, join } = require("../doc").builders; +const preprocess = require("./preprocess"); function genericPrint(path, options, print) { const node = path.getValue(); switch (node.type) { + case "JsonRoot": + return concat([path.call(print, "node"), hardline]); case "ArrayExpression": return node.elements.length === 0 ? "[]" @@ -71,6 +74,7 @@ function clean(node, newNode /*, parent*/) { } module.exports = { + preprocess, print: genericPrint, massageAstNode: clean }; diff --git a/src/language-js/printer-estree.js b/src/language-js/printer-estree.js index 8008512a..d0fb83d1 100644 --- a/src/language-js/printer-estree.js +++ b/src/language-js/printer-estree.js @@ -34,6 +34,7 @@ const clean = require("./clean"); const insertPragma = require("./pragma").insertPragma; const handleComments = require("./comments"); const pathNeedsParens = require("./needs-parens"); +const preprocess = require("./preprocess"); const { builders: { @@ -342,6 +343,8 @@ function printPathNoParens(path, options, print, args) { let parts = []; switch (n.type) { + case "JsonRoot": + return concat([path.call(print, "node"), hardline]); case "File": // Print @babel/parser's InterpreterDirective here so that // leading comments on the `Program` node get printed after the hashbang. @@ -5979,6 +5982,7 @@ function rawText(node) { } module.exports = { + preprocess, print: genericPrint, embed, insertPragma, diff --git a/src/language-markdown/mdx.js b/src/language-markdown/mdx.js index 988e7bec..b231145f 100644 --- a/src/language-markdown/mdx.js +++ b/src/language-markdown/mdx.js @@ -38,7 +38,7 @@ const tokenizeEsSyntax = (eat, value) => { if (isExport(subvalue) || isImport(subvalue)) { return eat(subvalue)({ - type: "importExport", + type: isExport(subvalue) ? "export" : "import", value: subvalue }); } diff --git a/src/language-markdown/parser-markdown.js b/src/language-markdown/parser-markdown.js index 499e78c8..a9fd710d 100644 --- a/src/language-markdown/parser-markdown.js +++ b/src/language-markdown/parser-markdown.js @@ -4,12 +4,9 @@ const remarkParse = require("remark-parse"); const unified = require("unified"); const pragma = require("./pragma"); const parseFrontMatter = require("../utils/front-matter"); -const { getOrderedListItemInfo, splitText } = require("./utils"); +const { mapAst } = require("./utils"); const mdx = require("./mdx"); -// 0x0 ~ 0x10ffff -const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; - /** * based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications: * @@ -25,7 +22,7 @@ const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; * interface InlineCode { children: Array } */ function createParse({ isMDX }) { - return (text, parsers, opts) => { + return text => { const processor = unified() .use( remarkParse, @@ -40,40 +37,18 @@ function createParse({ isMDX }) { .use(frontMatter) .use(isMDX ? mdx.esSyntax : identity) .use(liquid) - .use(restoreUnescapedCharacter(text)) - .use(mergeContinuousTexts) - .use(transformInlineCode) - .use(transformIndentedCodeblockAndMarkItsParentList(text)) - .use(markAlignedList(text, opts)) - .use(splitTextIntoSentences(opts)) - .use(isMDX ? htmlToJsx : identity) - .use(isMDX ? mergeContinuousImportExport : identity); + .use(isMDX ? htmlToJsx : identity); return processor.runSync(processor.parse(text)); }; } -function map(ast, handler) { - return (function preorder(node, index, parentStack) { - parentStack = parentStack || []; - - const newNode = Object.assign({}, handler(node, index, parentStack)); - if (newNode.children) { - newNode.children = newNode.children.map((child, index) => { - return preorder(child, index, [newNode].concat(parentStack)); - }); - } - - return newNode; - })(ast, null, null); -} - function identity(x) { return x; } function htmlToJsx() { return ast => - map(ast, (node, index, [parent]) => { + mapAst(ast, (node, index, [parent]) => { if ( node.type !== "html" || /^$/.test(node.value) || @@ -87,114 +62,6 @@ function htmlToJsx() { }); } -function mergeContinuousImportExport() { - return mergeChildren( - (prevNode, node) => - prevNode.type === "importExport" && node.type === "importExport", - (prevNode, node) => ({ - type: "importExport", - value: prevNode.value + "\n\n" + node.value, - position: { - start: prevNode.position.start, - end: node.position.end - } - }) - ); -} - -function transformInlineCode() { - return ast => - map(ast, node => { - if (node.type !== "inlineCode") { - return node; - } - - return Object.assign({}, node, { - value: node.value.replace(/\s+/g, " ") - }); - }); -} - -function restoreUnescapedCharacter(originalText) { - return () => ast => - map(ast, node => { - return node.type !== "text" - ? node - : Object.assign({}, node, { - value: - node.value !== "*" && - node.value !== "_" && // handle these two cases in printer - isSingleCharRegex.test(node.value) && - node.position.end.offset - node.position.start.offset !== - node.value.length - ? originalText.slice( - node.position.start.offset, - node.position.end.offset - ) - : node.value - }); - }); -} - -function mergeChildren(shouldMerge, mergeNode) { - return ast => - map(ast, node => { - if (!node.children) { - return node; - } - const children = node.children.reduce((current, child) => { - const lastChild = current[current.length - 1]; - if (lastChild && shouldMerge(lastChild, child)) { - current.splice(-1, 1, mergeNode(lastChild, child)); - } else { - current.push(child); - } - return current; - }, []); - return Object.assign({}, node, { children }); - }); -} - -function mergeContinuousTexts() { - return mergeChildren( - (prevNode, node) => prevNode.type === "text" && node.type === "text", - (prevNode, node) => ({ - type: "text", - value: prevNode.value + node.value, - position: { - start: prevNode.position.start, - end: node.position.end - } - }) - ); -} - -function splitTextIntoSentences(options) { - return () => ast => - map(ast, (node, index, [parentNode]) => { - if (node.type !== "text") { - return node; - } - - let value = node.value; - - if (parentNode.type === "paragraph") { - if (index === 0) { - value = value.trimLeft(); - } - if (index === parentNode.children.length - 1) { - value = value.trimRight(); - } - } - - return { - type: "sentence", - position: node.position, - children: splitText(value, options) - }; - }); -} - function frontMatter() { const proto = this.Parser.prototype; proto.blockMethods = ["frontMatter"].concat(proto.blockMethods); @@ -231,149 +98,6 @@ function liquid() { }; } -function transformIndentedCodeblockAndMarkItsParentList(originalText) { - return () => ast => - map(ast, (node, index, parentStack) => { - if (node.type === "code") { - // the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it - const isIndented = /^\n?( {4,}|\t)/.test( - originalText.slice( - node.position.start.offset, - node.position.end.offset - ) - ); - - node.isIndented = isIndented; - - if (isIndented) { - for (let i = 0; i < parentStack.length; i++) { - const parent = parentStack[i]; - - // no need to check checked items - if (parent.hasIndentedCodeblock) { - break; - } - - if (parent.type === "list") { - parent.hasIndentedCodeblock = true; - } - } - } - } - return node; - }); -} - -function markAlignedList(originalText, options) { - return () => ast => - map(ast, (node, index, parentStack) => { - if (node.type === "list" && node.children.length !== 0) { - // if one of its parents is not aligned, it's not possible to be aligned in sub-lists - for (let i = 0; i < parentStack.length; i++) { - const parent = parentStack[i]; - if (parent.type === "list" && !parent.isAligned) { - node.isAligned = false; - return node; - } - } - - node.isAligned = isAligned(node); - } - - return node; - }); - - function getListItemStart(listItem) { - return listItem.children.length === 0 - ? -1 - : listItem.children[0].position.start.column - 1; - } - - function isAligned(list) { - if (!list.ordered) { - /** - * - 123 - * - 123 - */ - return true; - } - - const [firstItem, secondItem] = list.children; - - const firstInfo = getOrderedListItemInfo(firstItem, originalText); - - if (firstInfo.leadingSpaces.length > 1) { - /** - * 1. 123 - * - * 1. 123 - * 1. 123 - */ - return true; - } - - const firstStart = getListItemStart(firstItem); - - if (firstStart === -1) { - /** - * 1. - * - * 1. - * 1. - */ - return false; - } - - if (list.children.length === 1) { - /** - * aligned: - * - * 11. 123 - * - * not aligned: - * - * 1. 123 - */ - return firstStart % options.tabWidth === 0; - } - - const secondStart = getListItemStart(secondItem); - - if (firstStart !== secondStart) { - /** - * 11. 123 - * 1. 123 - * - * 1. 123 - * 11. 123 - */ - return false; - } - - if (firstStart % options.tabWidth === 0) { - /** - * 11. 123 - * 12. 123 - */ - return true; - } - - /** - * aligned: - * - * 11. 123 - * 1. 123 - * - * not aligned: - * - * 1. 123 - * 2. 123 - */ - const secondInfo = getOrderedListItemInfo(secondItem, originalText); - return secondInfo.leadingSpaces.length > 1; - } -} - const baseParser = { astFormat: "mdast", hasPragma: pragma.hasPragma, diff --git a/src/language-markdown/preprocess.js b/src/language-markdown/preprocess.js new file mode 100644 index 00000000..64ca75af --- /dev/null +++ b/src/language-markdown/preprocess.js @@ -0,0 +1,277 @@ +"use strict"; + +const { getOrderedListItemInfo, mapAst, splitText } = require("./utils"); + +// 0x0 ~ 0x10ffff +const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; + +function preprocess(ast, options) { + ast = restoreUnescapedCharacter(ast, options); + ast = mergeContinuousTexts(ast); + ast = transformInlineCode(ast); + ast = transformIndentedCodeblockAndMarkItsParentList(ast, options); + ast = markAlignedList(ast, options); + ast = splitTextIntoSentences(ast, options); + ast = transformImportExport(ast); + ast = mergeContinuousImportExport(ast); + return ast; +} + +function transformImportExport(ast) { + return mapAst(ast, node => { + if (node.type !== "import" && node.type !== "export") { + return node; + } + + return Object.assign({}, node, { type: "importExport" }); + }); +} + +function transformInlineCode(ast) { + return mapAst(ast, node => { + if (node.type !== "inlineCode") { + return node; + } + + return Object.assign({}, node, { + value: node.value.replace(/\s+/g, " ") + }); + }); +} + +function restoreUnescapedCharacter(ast, options) { + return mapAst(ast, node => { + return node.type !== "text" + ? node + : Object.assign({}, node, { + value: + node.value !== "*" && + node.value !== "_" && // handle these two cases in printer + isSingleCharRegex.test(node.value) && + node.position.end.offset - node.position.start.offset !== + node.value.length + ? options.originalText.slice( + node.position.start.offset, + node.position.end.offset + ) + : node.value + }); + }); +} + +function mergeContinuousImportExport(ast) { + return mergeChildren( + ast, + (prevNode, node) => + prevNode.type === "importExport" && node.type === "importExport", + (prevNode, node) => ({ + type: "importExport", + value: prevNode.value + "\n\n" + node.value, + position: { + start: prevNode.position.start, + end: node.position.end + } + }) + ); +} + +function mergeChildren(ast, shouldMerge, mergeNode) { + return mapAst(ast, node => { + if (!node.children) { + return node; + } + const children = node.children.reduce((current, child) => { + const lastChild = current[current.length - 1]; + if (lastChild && shouldMerge(lastChild, child)) { + current.splice(-1, 1, mergeNode(lastChild, child)); + } else { + current.push(child); + } + return current; + }, []); + return Object.assign({}, node, { children }); + }); +} + +function mergeContinuousTexts(ast) { + return mergeChildren( + ast, + (prevNode, node) => prevNode.type === "text" && node.type === "text", + (prevNode, node) => ({ + type: "text", + value: prevNode.value + node.value, + position: { + start: prevNode.position.start, + end: node.position.end + } + }) + ); +} + +function splitTextIntoSentences(ast, options) { + return mapAst(ast, (node, index, [parentNode]) => { + if (node.type !== "text") { + return node; + } + + let value = node.value; + + if (parentNode.type === "paragraph") { + if (index === 0) { + value = value.trimLeft(); + } + if (index === parentNode.children.length - 1) { + value = value.trimRight(); + } + } + + return { + type: "sentence", + position: node.position, + children: splitText(value, options) + }; + }); +} + +function transformIndentedCodeblockAndMarkItsParentList(ast, options) { + return mapAst(ast, (node, index, parentStack) => { + if (node.type === "code") { + // the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it + const isIndented = /^\n?( {4,}|\t)/.test( + options.originalText.slice( + node.position.start.offset, + node.position.end.offset + ) + ); + + node.isIndented = isIndented; + + if (isIndented) { + for (let i = 0; i < parentStack.length; i++) { + const parent = parentStack[i]; + + // no need to check checked items + if (parent.hasIndentedCodeblock) { + break; + } + + if (parent.type === "list") { + parent.hasIndentedCodeblock = true; + } + } + } + } + return node; + }); +} + +function markAlignedList(ast, options) { + return mapAst(ast, (node, index, parentStack) => { + if (node.type === "list" && node.children.length !== 0) { + // if one of its parents is not aligned, it's not possible to be aligned in sub-lists + for (let i = 0; i < parentStack.length; i++) { + const parent = parentStack[i]; + if (parent.type === "list" && !parent.isAligned) { + node.isAligned = false; + return node; + } + } + + node.isAligned = isAligned(node); + } + + return node; + }); + + function getListItemStart(listItem) { + return listItem.children.length === 0 + ? -1 + : listItem.children[0].position.start.column - 1; + } + + function isAligned(list) { + if (!list.ordered) { + /** + * - 123 + * - 123 + */ + return true; + } + + const [firstItem, secondItem] = list.children; + + const firstInfo = getOrderedListItemInfo(firstItem, options.originalText); + + if (firstInfo.leadingSpaces.length > 1) { + /** + * 1. 123 + * + * 1. 123 + * 1. 123 + */ + return true; + } + + const firstStart = getListItemStart(firstItem); + + if (firstStart === -1) { + /** + * 1. + * + * 1. + * 1. + */ + return false; + } + + if (list.children.length === 1) { + /** + * aligned: + * + * 11. 123 + * + * not aligned: + * + * 1. 123 + */ + return firstStart % options.tabWidth === 0; + } + + const secondStart = getListItemStart(secondItem); + + if (firstStart !== secondStart) { + /** + * 11. 123 + * 1. 123 + * + * 1. 123 + * 11. 123 + */ + return false; + } + + if (firstStart % options.tabWidth === 0) { + /** + * 11. 123 + * 12. 123 + */ + return true; + } + + /** + * aligned: + * + * 11. 123 + * 1. 123 + * + * not aligned: + * + * 1. 123 + * 2. 123 + */ + const secondInfo = getOrderedListItemInfo(secondItem, options.originalText); + return secondInfo.leadingSpaces.length > 1; + } +} + +module.exports = preprocess; diff --git a/src/language-markdown/printer-markdown.js b/src/language-markdown/printer-markdown.js index 2af1374f..821498ac 100644 --- a/src/language-markdown/printer-markdown.js +++ b/src/language-markdown/printer-markdown.js @@ -3,6 +3,7 @@ const privateUtil = require("../common/util"); const embed = require("./embed"); const pragma = require("./pragma"); +const preprocess = require("./preprocess"); const { builders: { concat, @@ -882,7 +883,8 @@ function clean(ast, newObj, parent) { if ( ast.type === "code" || ast.type === "yaml" || - ast.type === "importExport" || + ast.type === "import" || + ast.type === "export" || ast.type === "jsx" ) { delete newObj.value; @@ -892,10 +894,15 @@ function clean(ast, newObj, parent) { delete newObj.isAligned; } - // for whitespace: "\n" and " " are considered the same - if (ast.type === "whitespace" && ast.value === "\n") { - newObj.value = " "; + // texts can be splitted or merged + if (ast.type === "text") { + return null; } + + if (ast.type === "inlineCode") { + newObj.value = ast.value.replace(/[ \t\n]+/g, " "); + } + // for insert pragma if ( parent && @@ -924,6 +931,7 @@ function hasPrettierIgnore(path) { } module.exports = { + preprocess, print: genericPrint, embed, massageAstNode: clean, diff --git a/src/language-markdown/utils.js b/src/language-markdown/utils.js index 7f3dbfb6..83736b60 100644 --- a/src/language-markdown/utils.js +++ b/src/language-markdown/utils.js @@ -173,7 +173,23 @@ function getFencedCodeBlockValue(node, originalText) { } } +function mapAst(ast, handler) { + return (function preorder(node, index, parentStack) { + parentStack = parentStack || []; + + const newNode = Object.assign({}, handler(node, index, parentStack)); + if (newNode.children) { + newNode.children = newNode.children.map((child, index) => { + return preorder(child, index, [newNode].concat(parentStack)); + }); + } + + return newNode; + })(ast, null, null); +} + module.exports = { + mapAst, splitText, punctuationPattern, getFencedCodeBlockValue, diff --git a/src/language-yaml/parser-yaml.js b/src/language-yaml/parser-yaml.js index 1a5313e5..cbcd5d02 100644 --- a/src/language-yaml/parser-yaml.js +++ b/src/language-yaml/parser-yaml.js @@ -2,36 +2,10 @@ const createError = require("../common/parser-create-error"); const { hasPragma } = require("./pragma"); -const { defineShortcut, mapNode } = require("./utils"); - -function defineShortcuts(node) { - switch (node.type) { - case "document": - defineShortcut(node, "head", () => node.children[0]); - defineShortcut(node, "body", () => node.children[1]); - break; - case "documentBody": - case "sequenceItem": - case "flowSequenceItem": - case "mappingKey": - case "mappingValue": - defineShortcut(node, "content", () => node.children[0]); - break; - case "mappingItem": - case "flowMappingItem": - defineShortcut(node, "key", () => node.children[0]); - defineShortcut(node, "value", () => node.children[1]); - break; - } - return node; -} function parse(text) { try { - const root = mapNode( - require("yaml-unist-parser").parse(text), - defineShortcuts - ); + const root = require("yaml-unist-parser").parse(text); /** * suppress `comment not printed` error diff --git a/src/language-yaml/printer-yaml.js b/src/language-yaml/printer-yaml.js index 420e6945..91e06370 100644 --- a/src/language-yaml/printer-yaml.js +++ b/src/language-yaml/printer-yaml.js @@ -16,7 +16,9 @@ const { isLastDescendantNode, isNextLineEmpty, isNode, - isEmptyNode + isEmptyNode, + defineShortcut, + mapNode } = require("./utils"); const docBuilders = require("../doc").builders; const { @@ -37,6 +39,32 @@ const { softline } = docBuilders; +function preprocess(ast) { + return mapNode(ast, defineShortcuts); +} + +function defineShortcuts(node) { + switch (node.type) { + case "document": + defineShortcut(node, "head", () => node.children[0]); + defineShortcut(node, "body", () => node.children[1]); + break; + case "documentBody": + case "sequenceItem": + case "flowSequenceItem": + case "mappingKey": + case "mappingValue": + defineShortcut(node, "content", () => node.children[0]); + break; + case "mappingItem": + case "flowMappingItem": + defineShortcut(node, "key", () => node.children[0]); + defineShortcut(node, "value", () => node.children[1]); + break; + } + return node; +} + function genericPrint(path, options, print) { const node = path.getValue(); const parentNode = path.getParentNode(); @@ -676,6 +704,7 @@ function clean(node, newNode /*, parent */) { } module.exports = { + preprocess, print: genericPrint, massageAstNode: clean, insertPragma diff --git a/src/main/ast-to-doc.js b/src/main/ast-to-doc.js index 9607f3c1..70ce71e7 100644 --- a/src/main/ast-to-doc.js +++ b/src/main/ast-to-doc.js @@ -12,10 +12,13 @@ const hardline = docBuilders.hardline; const addAlignmentToDoc = docBuilders.addAlignmentToDoc; const docUtils = doc.utils; -function printAstToDoc(ast, options, addAlignmentSize) { - addAlignmentSize = addAlignmentSize || 0; - +function printAstToDoc(ast, options, addAlignmentSize = 0) { const printer = options.printer; + + if (printer.preprocess) { + ast = printer.preprocess(ast, options); + } + const cache = new Map(); function printGenerically(path, args) { @@ -59,14 +62,6 @@ function printAstToDoc(ast, options, addAlignmentSize) { } docUtils.propagateBreaks(doc); - if ( - options.parser === "json" || - options.parser === "json5" || - options.parser === "json-stringify" - ) { - doc = concat([doc, hardline]); - } - return doc; }