diff --git a/docs/plugins.md b/docs/plugins.md index e9d58100..0e522c97 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -182,6 +182,12 @@ A plugin can implement how a pragma comment is inserted in the resulting code wh function insertPragma(text: string): string; ``` +_(Optional)_ The preprocess function can process the ast from parser before passing into `print` function. + +```ts +function preprocess(ast: AST, options: object): AST; +``` + ### `options` `options` is an object containing the custom options your plugin supports. diff --git a/src/language-js/preprocess.js b/src/language-js/preprocess.js new file mode 100644 index 00000000..3548cd85 --- /dev/null +++ b/src/language-js/preprocess.js @@ -0,0 +1,17 @@ +"use strict"; + +function preprocess(ast, options) { + switch (options.parser) { + case "json": + case "json5": + case "json-stringify": + return Object.assign({}, ast, { + type: "JsonRoot", + node: Object.assign({}, ast, { comments: [] }) + }); + default: + return ast; + } +} + +module.exports = preprocess; diff --git a/src/language-js/printer-estree-json.js b/src/language-js/printer-estree-json.js index 55f58a41..5f82f15d 100644 --- a/src/language-js/printer-estree-json.js +++ b/src/language-js/printer-estree-json.js @@ -1,10 +1,13 @@ "use strict"; const { concat, hardline, indent, join } = require("../doc").builders; +const preprocess = require("./preprocess"); function genericPrint(path, options, print) { const node = path.getValue(); switch (node.type) { + case "JsonRoot": + return concat([path.call(print, "node"), hardline]); case "ArrayExpression": return node.elements.length === 0 ? "[]" @@ -71,6 +74,7 @@ function clean(node, newNode /*, parent*/) { } module.exports = { + preprocess, print: genericPrint, massageAstNode: clean }; diff --git a/src/language-js/printer-estree.js b/src/language-js/printer-estree.js index 8008512a..d0fb83d1 100644 --- a/src/language-js/printer-estree.js +++ b/src/language-js/printer-estree.js @@ -34,6 +34,7 @@ const clean = require("./clean"); const insertPragma = require("./pragma").insertPragma; const handleComments = require("./comments"); const pathNeedsParens = require("./needs-parens"); +const preprocess = require("./preprocess"); const { builders: { @@ -342,6 +343,8 @@ function printPathNoParens(path, options, print, args) { let parts = []; switch (n.type) { + case "JsonRoot": + return concat([path.call(print, "node"), hardline]); case "File": // Print @babel/parser's InterpreterDirective here so that // leading comments on the `Program` node get printed after the hashbang. @@ -5979,6 +5982,7 @@ function rawText(node) { } module.exports = { + preprocess, print: genericPrint, embed, insertPragma, diff --git a/src/language-markdown/mdx.js b/src/language-markdown/mdx.js index 988e7bec..b231145f 100644 --- a/src/language-markdown/mdx.js +++ b/src/language-markdown/mdx.js @@ -38,7 +38,7 @@ const tokenizeEsSyntax = (eat, value) => { if (isExport(subvalue) || isImport(subvalue)) { return eat(subvalue)({ - type: "importExport", + type: isExport(subvalue) ? "export" : "import", value: subvalue }); } diff --git a/src/language-markdown/parser-markdown.js b/src/language-markdown/parser-markdown.js index 499e78c8..a9fd710d 100644 --- a/src/language-markdown/parser-markdown.js +++ b/src/language-markdown/parser-markdown.js @@ -4,12 +4,9 @@ const remarkParse = require("remark-parse"); const unified = require("unified"); const pragma = require("./pragma"); const parseFrontMatter = require("../utils/front-matter"); -const { getOrderedListItemInfo, splitText } = require("./utils"); +const { mapAst } = require("./utils"); const mdx = require("./mdx"); -// 0x0 ~ 0x10ffff -const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; - /** * based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications: * @@ -25,7 +22,7 @@ const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; * interface InlineCode { children: Array } */ function createParse({ isMDX }) { - return (text, parsers, opts) => { + return text => { const processor = unified() .use( remarkParse, @@ -40,40 +37,18 @@ function createParse({ isMDX }) { .use(frontMatter) .use(isMDX ? mdx.esSyntax : identity) .use(liquid) - .use(restoreUnescapedCharacter(text)) - .use(mergeContinuousTexts) - .use(transformInlineCode) - .use(transformIndentedCodeblockAndMarkItsParentList(text)) - .use(markAlignedList(text, opts)) - .use(splitTextIntoSentences(opts)) - .use(isMDX ? htmlToJsx : identity) - .use(isMDX ? mergeContinuousImportExport : identity); + .use(isMDX ? htmlToJsx : identity); return processor.runSync(processor.parse(text)); }; } -function map(ast, handler) { - return (function preorder(node, index, parentStack) { - parentStack = parentStack || []; - - const newNode = Object.assign({}, handler(node, index, parentStack)); - if (newNode.children) { - newNode.children = newNode.children.map((child, index) => { - return preorder(child, index, [newNode].concat(parentStack)); - }); - } - - return newNode; - })(ast, null, null); -} - function identity(x) { return x; } function htmlToJsx() { return ast => - map(ast, (node, index, [parent]) => { + mapAst(ast, (node, index, [parent]) => { if ( node.type !== "html" || /^$/.test(node.value) || @@ -87,114 +62,6 @@ function htmlToJsx() { }); } -function mergeContinuousImportExport() { - return mergeChildren( - (prevNode, node) => - prevNode.type === "importExport" && node.type === "importExport", - (prevNode, node) => ({ - type: "importExport", - value: prevNode.value + "\n\n" + node.value, - position: { - start: prevNode.position.start, - end: node.position.end - } - }) - ); -} - -function transformInlineCode() { - return ast => - map(ast, node => { - if (node.type !== "inlineCode") { - return node; - } - - return Object.assign({}, node, { - value: node.value.replace(/\s+/g, " ") - }); - }); -} - -function restoreUnescapedCharacter(originalText) { - return () => ast => - map(ast, node => { - return node.type !== "text" - ? node - : Object.assign({}, node, { - value: - node.value !== "*" && - node.value !== "_" && // handle these two cases in printer - isSingleCharRegex.test(node.value) && - node.position.end.offset - node.position.start.offset !== - node.value.length - ? originalText.slice( - node.position.start.offset, - node.position.end.offset - ) - : node.value - }); - }); -} - -function mergeChildren(shouldMerge, mergeNode) { - return ast => - map(ast, node => { - if (!node.children) { - return node; - } - const children = node.children.reduce((current, child) => { - const lastChild = current[current.length - 1]; - if (lastChild && shouldMerge(lastChild, child)) { - current.splice(-1, 1, mergeNode(lastChild, child)); - } else { - current.push(child); - } - return current; - }, []); - return Object.assign({}, node, { children }); - }); -} - -function mergeContinuousTexts() { - return mergeChildren( - (prevNode, node) => prevNode.type === "text" && node.type === "text", - (prevNode, node) => ({ - type: "text", - value: prevNode.value + node.value, - position: { - start: prevNode.position.start, - end: node.position.end - } - }) - ); -} - -function splitTextIntoSentences(options) { - return () => ast => - map(ast, (node, index, [parentNode]) => { - if (node.type !== "text") { - return node; - } - - let value = node.value; - - if (parentNode.type === "paragraph") { - if (index === 0) { - value = value.trimLeft(); - } - if (index === parentNode.children.length - 1) { - value = value.trimRight(); - } - } - - return { - type: "sentence", - position: node.position, - children: splitText(value, options) - }; - }); -} - function frontMatter() { const proto = this.Parser.prototype; proto.blockMethods = ["frontMatter"].concat(proto.blockMethods); @@ -231,149 +98,6 @@ function liquid() { }; } -function transformIndentedCodeblockAndMarkItsParentList(originalText) { - return () => ast => - map(ast, (node, index, parentStack) => { - if (node.type === "code") { - // the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it - const isIndented = /^\n?( {4,}|\t)/.test( - originalText.slice( - node.position.start.offset, - node.position.end.offset - ) - ); - - node.isIndented = isIndented; - - if (isIndented) { - for (let i = 0; i < parentStack.length; i++) { - const parent = parentStack[i]; - - // no need to check checked items - if (parent.hasIndentedCodeblock) { - break; - } - - if (parent.type === "list") { - parent.hasIndentedCodeblock = true; - } - } - } - } - return node; - }); -} - -function markAlignedList(originalText, options) { - return () => ast => - map(ast, (node, index, parentStack) => { - if (node.type === "list" && node.children.length !== 0) { - // if one of its parents is not aligned, it's not possible to be aligned in sub-lists - for (let i = 0; i < parentStack.length; i++) { - const parent = parentStack[i]; - if (parent.type === "list" && !parent.isAligned) { - node.isAligned = false; - return node; - } - } - - node.isAligned = isAligned(node); - } - - return node; - }); - - function getListItemStart(listItem) { - return listItem.children.length === 0 - ? -1 - : listItem.children[0].position.start.column - 1; - } - - function isAligned(list) { - if (!list.ordered) { - /** - * - 123 - * - 123 - */ - return true; - } - - const [firstItem, secondItem] = list.children; - - const firstInfo = getOrderedListItemInfo(firstItem, originalText); - - if (firstInfo.leadingSpaces.length > 1) { - /** - * 1. 123 - * - * 1. 123 - * 1. 123 - */ - return true; - } - - const firstStart = getListItemStart(firstItem); - - if (firstStart === -1) { - /** - * 1. - * - * 1. - * 1. - */ - return false; - } - - if (list.children.length === 1) { - /** - * aligned: - * - * 11. 123 - * - * not aligned: - * - * 1. 123 - */ - return firstStart % options.tabWidth === 0; - } - - const secondStart = getListItemStart(secondItem); - - if (firstStart !== secondStart) { - /** - * 11. 123 - * 1. 123 - * - * 1. 123 - * 11. 123 - */ - return false; - } - - if (firstStart % options.tabWidth === 0) { - /** - * 11. 123 - * 12. 123 - */ - return true; - } - - /** - * aligned: - * - * 11. 123 - * 1. 123 - * - * not aligned: - * - * 1. 123 - * 2. 123 - */ - const secondInfo = getOrderedListItemInfo(secondItem, originalText); - return secondInfo.leadingSpaces.length > 1; - } -} - const baseParser = { astFormat: "mdast", hasPragma: pragma.hasPragma, diff --git a/src/language-markdown/preprocess.js b/src/language-markdown/preprocess.js new file mode 100644 index 00000000..64ca75af --- /dev/null +++ b/src/language-markdown/preprocess.js @@ -0,0 +1,277 @@ +"use strict"; + +const { getOrderedListItemInfo, mapAst, splitText } = require("./utils"); + +// 0x0 ~ 0x10ffff +const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/; + +function preprocess(ast, options) { + ast = restoreUnescapedCharacter(ast, options); + ast = mergeContinuousTexts(ast); + ast = transformInlineCode(ast); + ast = transformIndentedCodeblockAndMarkItsParentList(ast, options); + ast = markAlignedList(ast, options); + ast = splitTextIntoSentences(ast, options); + ast = transformImportExport(ast); + ast = mergeContinuousImportExport(ast); + return ast; +} + +function transformImportExport(ast) { + return mapAst(ast, node => { + if (node.type !== "import" && node.type !== "export") { + return node; + } + + return Object.assign({}, node, { type: "importExport" }); + }); +} + +function transformInlineCode(ast) { + return mapAst(ast, node => { + if (node.type !== "inlineCode") { + return node; + } + + return Object.assign({}, node, { + value: node.value.replace(/\s+/g, " ") + }); + }); +} + +function restoreUnescapedCharacter(ast, options) { + return mapAst(ast, node => { + return node.type !== "text" + ? node + : Object.assign({}, node, { + value: + node.value !== "*" && + node.value !== "_" && // handle these two cases in printer + isSingleCharRegex.test(node.value) && + node.position.end.offset - node.position.start.offset !== + node.value.length + ? options.originalText.slice( + node.position.start.offset, + node.position.end.offset + ) + : node.value + }); + }); +} + +function mergeContinuousImportExport(ast) { + return mergeChildren( + ast, + (prevNode, node) => + prevNode.type === "importExport" && node.type === "importExport", + (prevNode, node) => ({ + type: "importExport", + value: prevNode.value + "\n\n" + node.value, + position: { + start: prevNode.position.start, + end: node.position.end + } + }) + ); +} + +function mergeChildren(ast, shouldMerge, mergeNode) { + return mapAst(ast, node => { + if (!node.children) { + return node; + } + const children = node.children.reduce((current, child) => { + const lastChild = current[current.length - 1]; + if (lastChild && shouldMerge(lastChild, child)) { + current.splice(-1, 1, mergeNode(lastChild, child)); + } else { + current.push(child); + } + return current; + }, []); + return Object.assign({}, node, { children }); + }); +} + +function mergeContinuousTexts(ast) { + return mergeChildren( + ast, + (prevNode, node) => prevNode.type === "text" && node.type === "text", + (prevNode, node) => ({ + type: "text", + value: prevNode.value + node.value, + position: { + start: prevNode.position.start, + end: node.position.end + } + }) + ); +} + +function splitTextIntoSentences(ast, options) { + return mapAst(ast, (node, index, [parentNode]) => { + if (node.type !== "text") { + return node; + } + + let value = node.value; + + if (parentNode.type === "paragraph") { + if (index === 0) { + value = value.trimLeft(); + } + if (index === parentNode.children.length - 1) { + value = value.trimRight(); + } + } + + return { + type: "sentence", + position: node.position, + children: splitText(value, options) + }; + }); +} + +function transformIndentedCodeblockAndMarkItsParentList(ast, options) { + return mapAst(ast, (node, index, parentStack) => { + if (node.type === "code") { + // the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it + const isIndented = /^\n?( {4,}|\t)/.test( + options.originalText.slice( + node.position.start.offset, + node.position.end.offset + ) + ); + + node.isIndented = isIndented; + + if (isIndented) { + for (let i = 0; i < parentStack.length; i++) { + const parent = parentStack[i]; + + // no need to check checked items + if (parent.hasIndentedCodeblock) { + break; + } + + if (parent.type === "list") { + parent.hasIndentedCodeblock = true; + } + } + } + } + return node; + }); +} + +function markAlignedList(ast, options) { + return mapAst(ast, (node, index, parentStack) => { + if (node.type === "list" && node.children.length !== 0) { + // if one of its parents is not aligned, it's not possible to be aligned in sub-lists + for (let i = 0; i < parentStack.length; i++) { + const parent = parentStack[i]; + if (parent.type === "list" && !parent.isAligned) { + node.isAligned = false; + return node; + } + } + + node.isAligned = isAligned(node); + } + + return node; + }); + + function getListItemStart(listItem) { + return listItem.children.length === 0 + ? -1 + : listItem.children[0].position.start.column - 1; + } + + function isAligned(list) { + if (!list.ordered) { + /** + * - 123 + * - 123 + */ + return true; + } + + const [firstItem, secondItem] = list.children; + + const firstInfo = getOrderedListItemInfo(firstItem, options.originalText); + + if (firstInfo.leadingSpaces.length > 1) { + /** + * 1. 123 + * + * 1. 123 + * 1. 123 + */ + return true; + } + + const firstStart = getListItemStart(firstItem); + + if (firstStart === -1) { + /** + * 1. + * + * 1. + * 1. + */ + return false; + } + + if (list.children.length === 1) { + /** + * aligned: + * + * 11. 123 + * + * not aligned: + * + * 1. 123 + */ + return firstStart % options.tabWidth === 0; + } + + const secondStart = getListItemStart(secondItem); + + if (firstStart !== secondStart) { + /** + * 11. 123 + * 1. 123 + * + * 1. 123 + * 11. 123 + */ + return false; + } + + if (firstStart % options.tabWidth === 0) { + /** + * 11. 123 + * 12. 123 + */ + return true; + } + + /** + * aligned: + * + * 11. 123 + * 1. 123 + * + * not aligned: + * + * 1. 123 + * 2. 123 + */ + const secondInfo = getOrderedListItemInfo(secondItem, options.originalText); + return secondInfo.leadingSpaces.length > 1; + } +} + +module.exports = preprocess; diff --git a/src/language-markdown/printer-markdown.js b/src/language-markdown/printer-markdown.js index 2af1374f..821498ac 100644 --- a/src/language-markdown/printer-markdown.js +++ b/src/language-markdown/printer-markdown.js @@ -3,6 +3,7 @@ const privateUtil = require("../common/util"); const embed = require("./embed"); const pragma = require("./pragma"); +const preprocess = require("./preprocess"); const { builders: { concat, @@ -882,7 +883,8 @@ function clean(ast, newObj, parent) { if ( ast.type === "code" || ast.type === "yaml" || - ast.type === "importExport" || + ast.type === "import" || + ast.type === "export" || ast.type === "jsx" ) { delete newObj.value; @@ -892,10 +894,15 @@ function clean(ast, newObj, parent) { delete newObj.isAligned; } - // for whitespace: "\n" and " " are considered the same - if (ast.type === "whitespace" && ast.value === "\n") { - newObj.value = " "; + // texts can be splitted or merged + if (ast.type === "text") { + return null; } + + if (ast.type === "inlineCode") { + newObj.value = ast.value.replace(/[ \t\n]+/g, " "); + } + // for insert pragma if ( parent && @@ -924,6 +931,7 @@ function hasPrettierIgnore(path) { } module.exports = { + preprocess, print: genericPrint, embed, massageAstNode: clean, diff --git a/src/language-markdown/utils.js b/src/language-markdown/utils.js index 7f3dbfb6..83736b60 100644 --- a/src/language-markdown/utils.js +++ b/src/language-markdown/utils.js @@ -173,7 +173,23 @@ function getFencedCodeBlockValue(node, originalText) { } } +function mapAst(ast, handler) { + return (function preorder(node, index, parentStack) { + parentStack = parentStack || []; + + const newNode = Object.assign({}, handler(node, index, parentStack)); + if (newNode.children) { + newNode.children = newNode.children.map((child, index) => { + return preorder(child, index, [newNode].concat(parentStack)); + }); + } + + return newNode; + })(ast, null, null); +} + module.exports = { + mapAst, splitText, punctuationPattern, getFencedCodeBlockValue, diff --git a/src/language-yaml/parser-yaml.js b/src/language-yaml/parser-yaml.js index 1a5313e5..cbcd5d02 100644 --- a/src/language-yaml/parser-yaml.js +++ b/src/language-yaml/parser-yaml.js @@ -2,36 +2,10 @@ const createError = require("../common/parser-create-error"); const { hasPragma } = require("./pragma"); -const { defineShortcut, mapNode } = require("./utils"); - -function defineShortcuts(node) { - switch (node.type) { - case "document": - defineShortcut(node, "head", () => node.children[0]); - defineShortcut(node, "body", () => node.children[1]); - break; - case "documentBody": - case "sequenceItem": - case "flowSequenceItem": - case "mappingKey": - case "mappingValue": - defineShortcut(node, "content", () => node.children[0]); - break; - case "mappingItem": - case "flowMappingItem": - defineShortcut(node, "key", () => node.children[0]); - defineShortcut(node, "value", () => node.children[1]); - break; - } - return node; -} function parse(text) { try { - const root = mapNode( - require("yaml-unist-parser").parse(text), - defineShortcuts - ); + const root = require("yaml-unist-parser").parse(text); /** * suppress `comment not printed` error diff --git a/src/language-yaml/printer-yaml.js b/src/language-yaml/printer-yaml.js index 420e6945..91e06370 100644 --- a/src/language-yaml/printer-yaml.js +++ b/src/language-yaml/printer-yaml.js @@ -16,7 +16,9 @@ const { isLastDescendantNode, isNextLineEmpty, isNode, - isEmptyNode + isEmptyNode, + defineShortcut, + mapNode } = require("./utils"); const docBuilders = require("../doc").builders; const { @@ -37,6 +39,32 @@ const { softline } = docBuilders; +function preprocess(ast) { + return mapNode(ast, defineShortcuts); +} + +function defineShortcuts(node) { + switch (node.type) { + case "document": + defineShortcut(node, "head", () => node.children[0]); + defineShortcut(node, "body", () => node.children[1]); + break; + case "documentBody": + case "sequenceItem": + case "flowSequenceItem": + case "mappingKey": + case "mappingValue": + defineShortcut(node, "content", () => node.children[0]); + break; + case "mappingItem": + case "flowMappingItem": + defineShortcut(node, "key", () => node.children[0]); + defineShortcut(node, "value", () => node.children[1]); + break; + } + return node; +} + function genericPrint(path, options, print) { const node = path.getValue(); const parentNode = path.getParentNode(); @@ -676,6 +704,7 @@ function clean(node, newNode /*, parent */) { } module.exports = { + preprocess, print: genericPrint, massageAstNode: clean, insertPragma diff --git a/src/main/ast-to-doc.js b/src/main/ast-to-doc.js index 9607f3c1..70ce71e7 100644 --- a/src/main/ast-to-doc.js +++ b/src/main/ast-to-doc.js @@ -12,10 +12,13 @@ const hardline = docBuilders.hardline; const addAlignmentToDoc = docBuilders.addAlignmentToDoc; const docUtils = doc.utils; -function printAstToDoc(ast, options, addAlignmentSize) { - addAlignmentSize = addAlignmentSize || 0; - +function printAstToDoc(ast, options, addAlignmentSize = 0) { const printer = options.printer; + + if (printer.preprocess) { + ast = printer.preprocess(ast, options); + } + const cache = new Map(); function printGenerically(path, args) { @@ -59,14 +62,6 @@ function printAstToDoc(ast, options, addAlignmentSize) { } docUtils.propagateBreaks(doc); - if ( - options.parser === "json" || - options.parser === "json5" || - options.parser === "json-stringify" - ) { - doc = concat([doc, hardline]); - } - return doc; }