feat: add printer.preprocess (#5041)

Sometimes we need to transform the ast to make it easier to print, but it's currently done in the parser (markdown and yaml), which leads the output ast harder to use for external users (custom parser), adding `printer.preprocess` can solve this issue.

And also this way we could move the trailing newline for json from `ast-to-doc.js` to `language-js`.
master
Ika 2018-09-03 23:27:50 +08:00 committed by GitHub
parent 6288704c41
commit 50c9115632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 378 additions and 324 deletions

View File

@ -182,6 +182,12 @@ A plugin can implement how a pragma comment is inserted in the resulting code wh
function insertPragma(text: string): string; function insertPragma(text: string): string;
``` ```
_(Optional)_ The preprocess function can process the ast from parser before passing into `print` function.
```ts
function preprocess(ast: AST, options: object): AST;
```
### `options` ### `options`
`options` is an object containing the custom options your plugin supports. `options` is an object containing the custom options your plugin supports.

View File

@ -0,0 +1,17 @@
"use strict";
function preprocess(ast, options) {
switch (options.parser) {
case "json":
case "json5":
case "json-stringify":
return Object.assign({}, ast, {
type: "JsonRoot",
node: Object.assign({}, ast, { comments: [] })
});
default:
return ast;
}
}
module.exports = preprocess;

View File

@ -1,10 +1,13 @@
"use strict"; "use strict";
const { concat, hardline, indent, join } = require("../doc").builders; const { concat, hardline, indent, join } = require("../doc").builders;
const preprocess = require("./preprocess");
function genericPrint(path, options, print) { function genericPrint(path, options, print) {
const node = path.getValue(); const node = path.getValue();
switch (node.type) { switch (node.type) {
case "JsonRoot":
return concat([path.call(print, "node"), hardline]);
case "ArrayExpression": case "ArrayExpression":
return node.elements.length === 0 return node.elements.length === 0
? "[]" ? "[]"
@ -71,6 +74,7 @@ function clean(node, newNode /*, parent*/) {
} }
module.exports = { module.exports = {
preprocess,
print: genericPrint, print: genericPrint,
massageAstNode: clean massageAstNode: clean
}; };

View File

@ -34,6 +34,7 @@ const clean = require("./clean");
const insertPragma = require("./pragma").insertPragma; const insertPragma = require("./pragma").insertPragma;
const handleComments = require("./comments"); const handleComments = require("./comments");
const pathNeedsParens = require("./needs-parens"); const pathNeedsParens = require("./needs-parens");
const preprocess = require("./preprocess");
const { const {
builders: { builders: {
@ -342,6 +343,8 @@ function printPathNoParens(path, options, print, args) {
let parts = []; let parts = [];
switch (n.type) { switch (n.type) {
case "JsonRoot":
return concat([path.call(print, "node"), hardline]);
case "File": case "File":
// Print @babel/parser's InterpreterDirective here so that // Print @babel/parser's InterpreterDirective here so that
// leading comments on the `Program` node get printed after the hashbang. // leading comments on the `Program` node get printed after the hashbang.
@ -5979,6 +5982,7 @@ function rawText(node) {
} }
module.exports = { module.exports = {
preprocess,
print: genericPrint, print: genericPrint,
embed, embed,
insertPragma, insertPragma,

View File

@ -38,7 +38,7 @@ const tokenizeEsSyntax = (eat, value) => {
if (isExport(subvalue) || isImport(subvalue)) { if (isExport(subvalue) || isImport(subvalue)) {
return eat(subvalue)({ return eat(subvalue)({
type: "importExport", type: isExport(subvalue) ? "export" : "import",
value: subvalue value: subvalue
}); });
} }

View File

@ -4,12 +4,9 @@ const remarkParse = require("remark-parse");
const unified = require("unified"); const unified = require("unified");
const pragma = require("./pragma"); const pragma = require("./pragma");
const parseFrontMatter = require("../utils/front-matter"); const parseFrontMatter = require("../utils/front-matter");
const { getOrderedListItemInfo, splitText } = require("./utils"); const { mapAst } = require("./utils");
const mdx = require("./mdx"); const mdx = require("./mdx");
// 0x0 ~ 0x10ffff
const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
/** /**
* based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications: * based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications:
* *
@ -25,7 +22,7 @@ const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
* interface InlineCode { children: Array<Sentence> } * interface InlineCode { children: Array<Sentence> }
*/ */
function createParse({ isMDX }) { function createParse({ isMDX }) {
return (text, parsers, opts) => { return text => {
const processor = unified() const processor = unified()
.use( .use(
remarkParse, remarkParse,
@ -40,40 +37,18 @@ function createParse({ isMDX }) {
.use(frontMatter) .use(frontMatter)
.use(isMDX ? mdx.esSyntax : identity) .use(isMDX ? mdx.esSyntax : identity)
.use(liquid) .use(liquid)
.use(restoreUnescapedCharacter(text)) .use(isMDX ? htmlToJsx : identity);
.use(mergeContinuousTexts)
.use(transformInlineCode)
.use(transformIndentedCodeblockAndMarkItsParentList(text))
.use(markAlignedList(text, opts))
.use(splitTextIntoSentences(opts))
.use(isMDX ? htmlToJsx : identity)
.use(isMDX ? mergeContinuousImportExport : identity);
return processor.runSync(processor.parse(text)); return processor.runSync(processor.parse(text));
}; };
} }
function map(ast, handler) {
return (function preorder(node, index, parentStack) {
parentStack = parentStack || [];
const newNode = Object.assign({}, handler(node, index, parentStack));
if (newNode.children) {
newNode.children = newNode.children.map((child, index) => {
return preorder(child, index, [newNode].concat(parentStack));
});
}
return newNode;
})(ast, null, null);
}
function identity(x) { function identity(x) {
return x; return x;
} }
function htmlToJsx() { function htmlToJsx() {
return ast => return ast =>
map(ast, (node, index, [parent]) => { mapAst(ast, (node, index, [parent]) => {
if ( if (
node.type !== "html" || node.type !== "html" ||
/^<!--[\s\S]*-->$/.test(node.value) || /^<!--[\s\S]*-->$/.test(node.value) ||
@ -87,114 +62,6 @@ function htmlToJsx() {
}); });
} }
function mergeContinuousImportExport() {
return mergeChildren(
(prevNode, node) =>
prevNode.type === "importExport" && node.type === "importExport",
(prevNode, node) => ({
type: "importExport",
value: prevNode.value + "\n\n" + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function transformInlineCode() {
return ast =>
map(ast, node => {
if (node.type !== "inlineCode") {
return node;
}
return Object.assign({}, node, {
value: node.value.replace(/\s+/g, " ")
});
});
}
function restoreUnescapedCharacter(originalText) {
return () => ast =>
map(ast, node => {
return node.type !== "text"
? node
: Object.assign({}, node, {
value:
node.value !== "*" &&
node.value !== "_" && // handle these two cases in printer
isSingleCharRegex.test(node.value) &&
node.position.end.offset - node.position.start.offset !==
node.value.length
? originalText.slice(
node.position.start.offset,
node.position.end.offset
)
: node.value
});
});
}
function mergeChildren(shouldMerge, mergeNode) {
return ast =>
map(ast, node => {
if (!node.children) {
return node;
}
const children = node.children.reduce((current, child) => {
const lastChild = current[current.length - 1];
if (lastChild && shouldMerge(lastChild, child)) {
current.splice(-1, 1, mergeNode(lastChild, child));
} else {
current.push(child);
}
return current;
}, []);
return Object.assign({}, node, { children });
});
}
function mergeContinuousTexts() {
return mergeChildren(
(prevNode, node) => prevNode.type === "text" && node.type === "text",
(prevNode, node) => ({
type: "text",
value: prevNode.value + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function splitTextIntoSentences(options) {
return () => ast =>
map(ast, (node, index, [parentNode]) => {
if (node.type !== "text") {
return node;
}
let value = node.value;
if (parentNode.type === "paragraph") {
if (index === 0) {
value = value.trimLeft();
}
if (index === parentNode.children.length - 1) {
value = value.trimRight();
}
}
return {
type: "sentence",
position: node.position,
children: splitText(value, options)
};
});
}
function frontMatter() { function frontMatter() {
const proto = this.Parser.prototype; const proto = this.Parser.prototype;
proto.blockMethods = ["frontMatter"].concat(proto.blockMethods); proto.blockMethods = ["frontMatter"].concat(proto.blockMethods);
@ -231,149 +98,6 @@ function liquid() {
}; };
} }
function transformIndentedCodeblockAndMarkItsParentList(originalText) {
return () => ast =>
map(ast, (node, index, parentStack) => {
if (node.type === "code") {
// the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it
const isIndented = /^\n?( {4,}|\t)/.test(
originalText.slice(
node.position.start.offset,
node.position.end.offset
)
);
node.isIndented = isIndented;
if (isIndented) {
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
// no need to check checked items
if (parent.hasIndentedCodeblock) {
break;
}
if (parent.type === "list") {
parent.hasIndentedCodeblock = true;
}
}
}
}
return node;
});
}
function markAlignedList(originalText, options) {
return () => ast =>
map(ast, (node, index, parentStack) => {
if (node.type === "list" && node.children.length !== 0) {
// if one of its parents is not aligned, it's not possible to be aligned in sub-lists
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
if (parent.type === "list" && !parent.isAligned) {
node.isAligned = false;
return node;
}
}
node.isAligned = isAligned(node);
}
return node;
});
function getListItemStart(listItem) {
return listItem.children.length === 0
? -1
: listItem.children[0].position.start.column - 1;
}
function isAligned(list) {
if (!list.ordered) {
/**
* - 123
* - 123
*/
return true;
}
const [firstItem, secondItem] = list.children;
const firstInfo = getOrderedListItemInfo(firstItem, originalText);
if (firstInfo.leadingSpaces.length > 1) {
/**
* 1. 123
*
* 1. 123
* 1. 123
*/
return true;
}
const firstStart = getListItemStart(firstItem);
if (firstStart === -1) {
/**
* 1.
*
* 1.
* 1.
*/
return false;
}
if (list.children.length === 1) {
/**
* aligned:
*
* 11. 123
*
* not aligned:
*
* 1. 123
*/
return firstStart % options.tabWidth === 0;
}
const secondStart = getListItemStart(secondItem);
if (firstStart !== secondStart) {
/**
* 11. 123
* 1. 123
*
* 1. 123
* 11. 123
*/
return false;
}
if (firstStart % options.tabWidth === 0) {
/**
* 11. 123
* 12. 123
*/
return true;
}
/**
* aligned:
*
* 11. 123
* 1. 123
*
* not aligned:
*
* 1. 123
* 2. 123
*/
const secondInfo = getOrderedListItemInfo(secondItem, originalText);
return secondInfo.leadingSpaces.length > 1;
}
}
const baseParser = { const baseParser = {
astFormat: "mdast", astFormat: "mdast",
hasPragma: pragma.hasPragma, hasPragma: pragma.hasPragma,

View File

@ -0,0 +1,277 @@
"use strict";
const { getOrderedListItemInfo, mapAst, splitText } = require("./utils");
// 0x0 ~ 0x10ffff
const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
function preprocess(ast, options) {
ast = restoreUnescapedCharacter(ast, options);
ast = mergeContinuousTexts(ast);
ast = transformInlineCode(ast);
ast = transformIndentedCodeblockAndMarkItsParentList(ast, options);
ast = markAlignedList(ast, options);
ast = splitTextIntoSentences(ast, options);
ast = transformImportExport(ast);
ast = mergeContinuousImportExport(ast);
return ast;
}
function transformImportExport(ast) {
return mapAst(ast, node => {
if (node.type !== "import" && node.type !== "export") {
return node;
}
return Object.assign({}, node, { type: "importExport" });
});
}
function transformInlineCode(ast) {
return mapAst(ast, node => {
if (node.type !== "inlineCode") {
return node;
}
return Object.assign({}, node, {
value: node.value.replace(/\s+/g, " ")
});
});
}
function restoreUnescapedCharacter(ast, options) {
return mapAst(ast, node => {
return node.type !== "text"
? node
: Object.assign({}, node, {
value:
node.value !== "*" &&
node.value !== "_" && // handle these two cases in printer
isSingleCharRegex.test(node.value) &&
node.position.end.offset - node.position.start.offset !==
node.value.length
? options.originalText.slice(
node.position.start.offset,
node.position.end.offset
)
: node.value
});
});
}
function mergeContinuousImportExport(ast) {
return mergeChildren(
ast,
(prevNode, node) =>
prevNode.type === "importExport" && node.type === "importExport",
(prevNode, node) => ({
type: "importExport",
value: prevNode.value + "\n\n" + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function mergeChildren(ast, shouldMerge, mergeNode) {
return mapAst(ast, node => {
if (!node.children) {
return node;
}
const children = node.children.reduce((current, child) => {
const lastChild = current[current.length - 1];
if (lastChild && shouldMerge(lastChild, child)) {
current.splice(-1, 1, mergeNode(lastChild, child));
} else {
current.push(child);
}
return current;
}, []);
return Object.assign({}, node, { children });
});
}
function mergeContinuousTexts(ast) {
return mergeChildren(
ast,
(prevNode, node) => prevNode.type === "text" && node.type === "text",
(prevNode, node) => ({
type: "text",
value: prevNode.value + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function splitTextIntoSentences(ast, options) {
return mapAst(ast, (node, index, [parentNode]) => {
if (node.type !== "text") {
return node;
}
let value = node.value;
if (parentNode.type === "paragraph") {
if (index === 0) {
value = value.trimLeft();
}
if (index === parentNode.children.length - 1) {
value = value.trimRight();
}
}
return {
type: "sentence",
position: node.position,
children: splitText(value, options)
};
});
}
function transformIndentedCodeblockAndMarkItsParentList(ast, options) {
return mapAst(ast, (node, index, parentStack) => {
if (node.type === "code") {
// the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it
const isIndented = /^\n?( {4,}|\t)/.test(
options.originalText.slice(
node.position.start.offset,
node.position.end.offset
)
);
node.isIndented = isIndented;
if (isIndented) {
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
// no need to check checked items
if (parent.hasIndentedCodeblock) {
break;
}
if (parent.type === "list") {
parent.hasIndentedCodeblock = true;
}
}
}
}
return node;
});
}
function markAlignedList(ast, options) {
return mapAst(ast, (node, index, parentStack) => {
if (node.type === "list" && node.children.length !== 0) {
// if one of its parents is not aligned, it's not possible to be aligned in sub-lists
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
if (parent.type === "list" && !parent.isAligned) {
node.isAligned = false;
return node;
}
}
node.isAligned = isAligned(node);
}
return node;
});
function getListItemStart(listItem) {
return listItem.children.length === 0
? -1
: listItem.children[0].position.start.column - 1;
}
function isAligned(list) {
if (!list.ordered) {
/**
* - 123
* - 123
*/
return true;
}
const [firstItem, secondItem] = list.children;
const firstInfo = getOrderedListItemInfo(firstItem, options.originalText);
if (firstInfo.leadingSpaces.length > 1) {
/**
* 1. 123
*
* 1. 123
* 1. 123
*/
return true;
}
const firstStart = getListItemStart(firstItem);
if (firstStart === -1) {
/**
* 1.
*
* 1.
* 1.
*/
return false;
}
if (list.children.length === 1) {
/**
* aligned:
*
* 11. 123
*
* not aligned:
*
* 1. 123
*/
return firstStart % options.tabWidth === 0;
}
const secondStart = getListItemStart(secondItem);
if (firstStart !== secondStart) {
/**
* 11. 123
* 1. 123
*
* 1. 123
* 11. 123
*/
return false;
}
if (firstStart % options.tabWidth === 0) {
/**
* 11. 123
* 12. 123
*/
return true;
}
/**
* aligned:
*
* 11. 123
* 1. 123
*
* not aligned:
*
* 1. 123
* 2. 123
*/
const secondInfo = getOrderedListItemInfo(secondItem, options.originalText);
return secondInfo.leadingSpaces.length > 1;
}
}
module.exports = preprocess;

View File

@ -3,6 +3,7 @@
const privateUtil = require("../common/util"); const privateUtil = require("../common/util");
const embed = require("./embed"); const embed = require("./embed");
const pragma = require("./pragma"); const pragma = require("./pragma");
const preprocess = require("./preprocess");
const { const {
builders: { builders: {
concat, concat,
@ -882,7 +883,8 @@ function clean(ast, newObj, parent) {
if ( if (
ast.type === "code" || ast.type === "code" ||
ast.type === "yaml" || ast.type === "yaml" ||
ast.type === "importExport" || ast.type === "import" ||
ast.type === "export" ||
ast.type === "jsx" ast.type === "jsx"
) { ) {
delete newObj.value; delete newObj.value;
@ -892,10 +894,15 @@ function clean(ast, newObj, parent) {
delete newObj.isAligned; delete newObj.isAligned;
} }
// for whitespace: "\n" and " " are considered the same // texts can be splitted or merged
if (ast.type === "whitespace" && ast.value === "\n") { if (ast.type === "text") {
newObj.value = " "; return null;
} }
if (ast.type === "inlineCode") {
newObj.value = ast.value.replace(/[ \t\n]+/g, " ");
}
// for insert pragma // for insert pragma
if ( if (
parent && parent &&
@ -924,6 +931,7 @@ function hasPrettierIgnore(path) {
} }
module.exports = { module.exports = {
preprocess,
print: genericPrint, print: genericPrint,
embed, embed,
massageAstNode: clean, massageAstNode: clean,

View File

@ -173,7 +173,23 @@ function getFencedCodeBlockValue(node, originalText) {
} }
} }
function mapAst(ast, handler) {
return (function preorder(node, index, parentStack) {
parentStack = parentStack || [];
const newNode = Object.assign({}, handler(node, index, parentStack));
if (newNode.children) {
newNode.children = newNode.children.map((child, index) => {
return preorder(child, index, [newNode].concat(parentStack));
});
}
return newNode;
})(ast, null, null);
}
module.exports = { module.exports = {
mapAst,
splitText, splitText,
punctuationPattern, punctuationPattern,
getFencedCodeBlockValue, getFencedCodeBlockValue,

View File

@ -2,36 +2,10 @@
const createError = require("../common/parser-create-error"); const createError = require("../common/parser-create-error");
const { hasPragma } = require("./pragma"); const { hasPragma } = require("./pragma");
const { defineShortcut, mapNode } = require("./utils");
function defineShortcuts(node) {
switch (node.type) {
case "document":
defineShortcut(node, "head", () => node.children[0]);
defineShortcut(node, "body", () => node.children[1]);
break;
case "documentBody":
case "sequenceItem":
case "flowSequenceItem":
case "mappingKey":
case "mappingValue":
defineShortcut(node, "content", () => node.children[0]);
break;
case "mappingItem":
case "flowMappingItem":
defineShortcut(node, "key", () => node.children[0]);
defineShortcut(node, "value", () => node.children[1]);
break;
}
return node;
}
function parse(text) { function parse(text) {
try { try {
const root = mapNode( const root = require("yaml-unist-parser").parse(text);
require("yaml-unist-parser").parse(text),
defineShortcuts
);
/** /**
* suppress `comment not printed` error * suppress `comment not printed` error

View File

@ -16,7 +16,9 @@ const {
isLastDescendantNode, isLastDescendantNode,
isNextLineEmpty, isNextLineEmpty,
isNode, isNode,
isEmptyNode isEmptyNode,
defineShortcut,
mapNode
} = require("./utils"); } = require("./utils");
const docBuilders = require("../doc").builders; const docBuilders = require("../doc").builders;
const { const {
@ -37,6 +39,32 @@ const {
softline softline
} = docBuilders; } = docBuilders;
function preprocess(ast) {
return mapNode(ast, defineShortcuts);
}
function defineShortcuts(node) {
switch (node.type) {
case "document":
defineShortcut(node, "head", () => node.children[0]);
defineShortcut(node, "body", () => node.children[1]);
break;
case "documentBody":
case "sequenceItem":
case "flowSequenceItem":
case "mappingKey":
case "mappingValue":
defineShortcut(node, "content", () => node.children[0]);
break;
case "mappingItem":
case "flowMappingItem":
defineShortcut(node, "key", () => node.children[0]);
defineShortcut(node, "value", () => node.children[1]);
break;
}
return node;
}
function genericPrint(path, options, print) { function genericPrint(path, options, print) {
const node = path.getValue(); const node = path.getValue();
const parentNode = path.getParentNode(); const parentNode = path.getParentNode();
@ -676,6 +704,7 @@ function clean(node, newNode /*, parent */) {
} }
module.exports = { module.exports = {
preprocess,
print: genericPrint, print: genericPrint,
massageAstNode: clean, massageAstNode: clean,
insertPragma insertPragma

View File

@ -12,10 +12,13 @@ const hardline = docBuilders.hardline;
const addAlignmentToDoc = docBuilders.addAlignmentToDoc; const addAlignmentToDoc = docBuilders.addAlignmentToDoc;
const docUtils = doc.utils; const docUtils = doc.utils;
function printAstToDoc(ast, options, addAlignmentSize) { function printAstToDoc(ast, options, addAlignmentSize = 0) {
addAlignmentSize = addAlignmentSize || 0;
const printer = options.printer; const printer = options.printer;
if (printer.preprocess) {
ast = printer.preprocess(ast, options);
}
const cache = new Map(); const cache = new Map();
function printGenerically(path, args) { function printGenerically(path, args) {
@ -59,14 +62,6 @@ function printAstToDoc(ast, options, addAlignmentSize) {
} }
docUtils.propagateBreaks(doc); docUtils.propagateBreaks(doc);
if (
options.parser === "json" ||
options.parser === "json5" ||
options.parser === "json-stringify"
) {
doc = concat([doc, hardline]);
}
return doc; return doc;
} }