feat: add printer.preprocess (#5041)

Sometimes we need to transform the ast to make it easier to print, but it's currently done in the parser (markdown and yaml), which leads the output ast harder to use for external users (custom parser), adding `printer.preprocess` can solve this issue.

And also this way we could move the trailing newline for json from `ast-to-doc.js` to `language-js`.
master
Ika 2018-09-03 23:27:50 +08:00 committed by GitHub
parent 6288704c41
commit 50c9115632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 378 additions and 324 deletions

View File

@ -182,6 +182,12 @@ A plugin can implement how a pragma comment is inserted in the resulting code wh
function insertPragma(text: string): string;
```
_(Optional)_ The preprocess function can process the ast from parser before passing into `print` function.
```ts
function preprocess(ast: AST, options: object): AST;
```
### `options`
`options` is an object containing the custom options your plugin supports.

View File

@ -0,0 +1,17 @@
"use strict";
function preprocess(ast, options) {
switch (options.parser) {
case "json":
case "json5":
case "json-stringify":
return Object.assign({}, ast, {
type: "JsonRoot",
node: Object.assign({}, ast, { comments: [] })
});
default:
return ast;
}
}
module.exports = preprocess;

View File

@ -1,10 +1,13 @@
"use strict";
const { concat, hardline, indent, join } = require("../doc").builders;
const preprocess = require("./preprocess");
function genericPrint(path, options, print) {
const node = path.getValue();
switch (node.type) {
case "JsonRoot":
return concat([path.call(print, "node"), hardline]);
case "ArrayExpression":
return node.elements.length === 0
? "[]"
@ -71,6 +74,7 @@ function clean(node, newNode /*, parent*/) {
}
module.exports = {
preprocess,
print: genericPrint,
massageAstNode: clean
};

View File

@ -34,6 +34,7 @@ const clean = require("./clean");
const insertPragma = require("./pragma").insertPragma;
const handleComments = require("./comments");
const pathNeedsParens = require("./needs-parens");
const preprocess = require("./preprocess");
const {
builders: {
@ -342,6 +343,8 @@ function printPathNoParens(path, options, print, args) {
let parts = [];
switch (n.type) {
case "JsonRoot":
return concat([path.call(print, "node"), hardline]);
case "File":
// Print @babel/parser's InterpreterDirective here so that
// leading comments on the `Program` node get printed after the hashbang.
@ -5979,6 +5982,7 @@ function rawText(node) {
}
module.exports = {
preprocess,
print: genericPrint,
embed,
insertPragma,

View File

@ -38,7 +38,7 @@ const tokenizeEsSyntax = (eat, value) => {
if (isExport(subvalue) || isImport(subvalue)) {
return eat(subvalue)({
type: "importExport",
type: isExport(subvalue) ? "export" : "import",
value: subvalue
});
}

View File

@ -4,12 +4,9 @@ const remarkParse = require("remark-parse");
const unified = require("unified");
const pragma = require("./pragma");
const parseFrontMatter = require("../utils/front-matter");
const { getOrderedListItemInfo, splitText } = require("./utils");
const { mapAst } = require("./utils");
const mdx = require("./mdx");
// 0x0 ~ 0x10ffff
const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
/**
* based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications:
*
@ -25,7 +22,7 @@ const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
* interface InlineCode { children: Array<Sentence> }
*/
function createParse({ isMDX }) {
return (text, parsers, opts) => {
return text => {
const processor = unified()
.use(
remarkParse,
@ -40,40 +37,18 @@ function createParse({ isMDX }) {
.use(frontMatter)
.use(isMDX ? mdx.esSyntax : identity)
.use(liquid)
.use(restoreUnescapedCharacter(text))
.use(mergeContinuousTexts)
.use(transformInlineCode)
.use(transformIndentedCodeblockAndMarkItsParentList(text))
.use(markAlignedList(text, opts))
.use(splitTextIntoSentences(opts))
.use(isMDX ? htmlToJsx : identity)
.use(isMDX ? mergeContinuousImportExport : identity);
.use(isMDX ? htmlToJsx : identity);
return processor.runSync(processor.parse(text));
};
}
function map(ast, handler) {
return (function preorder(node, index, parentStack) {
parentStack = parentStack || [];
const newNode = Object.assign({}, handler(node, index, parentStack));
if (newNode.children) {
newNode.children = newNode.children.map((child, index) => {
return preorder(child, index, [newNode].concat(parentStack));
});
}
return newNode;
})(ast, null, null);
}
function identity(x) {
return x;
}
function htmlToJsx() {
return ast =>
map(ast, (node, index, [parent]) => {
mapAst(ast, (node, index, [parent]) => {
if (
node.type !== "html" ||
/^<!--[\s\S]*-->$/.test(node.value) ||
@ -87,114 +62,6 @@ function htmlToJsx() {
});
}
function mergeContinuousImportExport() {
return mergeChildren(
(prevNode, node) =>
prevNode.type === "importExport" && node.type === "importExport",
(prevNode, node) => ({
type: "importExport",
value: prevNode.value + "\n\n" + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function transformInlineCode() {
return ast =>
map(ast, node => {
if (node.type !== "inlineCode") {
return node;
}
return Object.assign({}, node, {
value: node.value.replace(/\s+/g, " ")
});
});
}
function restoreUnescapedCharacter(originalText) {
return () => ast =>
map(ast, node => {
return node.type !== "text"
? node
: Object.assign({}, node, {
value:
node.value !== "*" &&
node.value !== "_" && // handle these two cases in printer
isSingleCharRegex.test(node.value) &&
node.position.end.offset - node.position.start.offset !==
node.value.length
? originalText.slice(
node.position.start.offset,
node.position.end.offset
)
: node.value
});
});
}
function mergeChildren(shouldMerge, mergeNode) {
return ast =>
map(ast, node => {
if (!node.children) {
return node;
}
const children = node.children.reduce((current, child) => {
const lastChild = current[current.length - 1];
if (lastChild && shouldMerge(lastChild, child)) {
current.splice(-1, 1, mergeNode(lastChild, child));
} else {
current.push(child);
}
return current;
}, []);
return Object.assign({}, node, { children });
});
}
function mergeContinuousTexts() {
return mergeChildren(
(prevNode, node) => prevNode.type === "text" && node.type === "text",
(prevNode, node) => ({
type: "text",
value: prevNode.value + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function splitTextIntoSentences(options) {
return () => ast =>
map(ast, (node, index, [parentNode]) => {
if (node.type !== "text") {
return node;
}
let value = node.value;
if (parentNode.type === "paragraph") {
if (index === 0) {
value = value.trimLeft();
}
if (index === parentNode.children.length - 1) {
value = value.trimRight();
}
}
return {
type: "sentence",
position: node.position,
children: splitText(value, options)
};
});
}
function frontMatter() {
const proto = this.Parser.prototype;
proto.blockMethods = ["frontMatter"].concat(proto.blockMethods);
@ -231,149 +98,6 @@ function liquid() {
};
}
function transformIndentedCodeblockAndMarkItsParentList(originalText) {
return () => ast =>
map(ast, (node, index, parentStack) => {
if (node.type === "code") {
// the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it
const isIndented = /^\n?( {4,}|\t)/.test(
originalText.slice(
node.position.start.offset,
node.position.end.offset
)
);
node.isIndented = isIndented;
if (isIndented) {
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
// no need to check checked items
if (parent.hasIndentedCodeblock) {
break;
}
if (parent.type === "list") {
parent.hasIndentedCodeblock = true;
}
}
}
}
return node;
});
}
function markAlignedList(originalText, options) {
return () => ast =>
map(ast, (node, index, parentStack) => {
if (node.type === "list" && node.children.length !== 0) {
// if one of its parents is not aligned, it's not possible to be aligned in sub-lists
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
if (parent.type === "list" && !parent.isAligned) {
node.isAligned = false;
return node;
}
}
node.isAligned = isAligned(node);
}
return node;
});
function getListItemStart(listItem) {
return listItem.children.length === 0
? -1
: listItem.children[0].position.start.column - 1;
}
function isAligned(list) {
if (!list.ordered) {
/**
* - 123
* - 123
*/
return true;
}
const [firstItem, secondItem] = list.children;
const firstInfo = getOrderedListItemInfo(firstItem, originalText);
if (firstInfo.leadingSpaces.length > 1) {
/**
* 1. 123
*
* 1. 123
* 1. 123
*/
return true;
}
const firstStart = getListItemStart(firstItem);
if (firstStart === -1) {
/**
* 1.
*
* 1.
* 1.
*/
return false;
}
if (list.children.length === 1) {
/**
* aligned:
*
* 11. 123
*
* not aligned:
*
* 1. 123
*/
return firstStart % options.tabWidth === 0;
}
const secondStart = getListItemStart(secondItem);
if (firstStart !== secondStart) {
/**
* 11. 123
* 1. 123
*
* 1. 123
* 11. 123
*/
return false;
}
if (firstStart % options.tabWidth === 0) {
/**
* 11. 123
* 12. 123
*/
return true;
}
/**
* aligned:
*
* 11. 123
* 1. 123
*
* not aligned:
*
* 1. 123
* 2. 123
*/
const secondInfo = getOrderedListItemInfo(secondItem, originalText);
return secondInfo.leadingSpaces.length > 1;
}
}
const baseParser = {
astFormat: "mdast",
hasPragma: pragma.hasPragma,

View File

@ -0,0 +1,277 @@
"use strict";
const { getOrderedListItemInfo, mapAst, splitText } = require("./utils");
// 0x0 ~ 0x10ffff
const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
function preprocess(ast, options) {
ast = restoreUnescapedCharacter(ast, options);
ast = mergeContinuousTexts(ast);
ast = transformInlineCode(ast);
ast = transformIndentedCodeblockAndMarkItsParentList(ast, options);
ast = markAlignedList(ast, options);
ast = splitTextIntoSentences(ast, options);
ast = transformImportExport(ast);
ast = mergeContinuousImportExport(ast);
return ast;
}
function transformImportExport(ast) {
return mapAst(ast, node => {
if (node.type !== "import" && node.type !== "export") {
return node;
}
return Object.assign({}, node, { type: "importExport" });
});
}
function transformInlineCode(ast) {
return mapAst(ast, node => {
if (node.type !== "inlineCode") {
return node;
}
return Object.assign({}, node, {
value: node.value.replace(/\s+/g, " ")
});
});
}
function restoreUnescapedCharacter(ast, options) {
return mapAst(ast, node => {
return node.type !== "text"
? node
: Object.assign({}, node, {
value:
node.value !== "*" &&
node.value !== "_" && // handle these two cases in printer
isSingleCharRegex.test(node.value) &&
node.position.end.offset - node.position.start.offset !==
node.value.length
? options.originalText.slice(
node.position.start.offset,
node.position.end.offset
)
: node.value
});
});
}
function mergeContinuousImportExport(ast) {
return mergeChildren(
ast,
(prevNode, node) =>
prevNode.type === "importExport" && node.type === "importExport",
(prevNode, node) => ({
type: "importExport",
value: prevNode.value + "\n\n" + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function mergeChildren(ast, shouldMerge, mergeNode) {
return mapAst(ast, node => {
if (!node.children) {
return node;
}
const children = node.children.reduce((current, child) => {
const lastChild = current[current.length - 1];
if (lastChild && shouldMerge(lastChild, child)) {
current.splice(-1, 1, mergeNode(lastChild, child));
} else {
current.push(child);
}
return current;
}, []);
return Object.assign({}, node, { children });
});
}
function mergeContinuousTexts(ast) {
return mergeChildren(
ast,
(prevNode, node) => prevNode.type === "text" && node.type === "text",
(prevNode, node) => ({
type: "text",
value: prevNode.value + node.value,
position: {
start: prevNode.position.start,
end: node.position.end
}
})
);
}
function splitTextIntoSentences(ast, options) {
return mapAst(ast, (node, index, [parentNode]) => {
if (node.type !== "text") {
return node;
}
let value = node.value;
if (parentNode.type === "paragraph") {
if (index === 0) {
value = value.trimLeft();
}
if (index === parentNode.children.length - 1) {
value = value.trimRight();
}
}
return {
type: "sentence",
position: node.position,
children: splitText(value, options)
};
});
}
function transformIndentedCodeblockAndMarkItsParentList(ast, options) {
return mapAst(ast, (node, index, parentStack) => {
if (node.type === "code") {
// the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it
const isIndented = /^\n?( {4,}|\t)/.test(
options.originalText.slice(
node.position.start.offset,
node.position.end.offset
)
);
node.isIndented = isIndented;
if (isIndented) {
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
// no need to check checked items
if (parent.hasIndentedCodeblock) {
break;
}
if (parent.type === "list") {
parent.hasIndentedCodeblock = true;
}
}
}
}
return node;
});
}
function markAlignedList(ast, options) {
return mapAst(ast, (node, index, parentStack) => {
if (node.type === "list" && node.children.length !== 0) {
// if one of its parents is not aligned, it's not possible to be aligned in sub-lists
for (let i = 0; i < parentStack.length; i++) {
const parent = parentStack[i];
if (parent.type === "list" && !parent.isAligned) {
node.isAligned = false;
return node;
}
}
node.isAligned = isAligned(node);
}
return node;
});
function getListItemStart(listItem) {
return listItem.children.length === 0
? -1
: listItem.children[0].position.start.column - 1;
}
function isAligned(list) {
if (!list.ordered) {
/**
* - 123
* - 123
*/
return true;
}
const [firstItem, secondItem] = list.children;
const firstInfo = getOrderedListItemInfo(firstItem, options.originalText);
if (firstInfo.leadingSpaces.length > 1) {
/**
* 1. 123
*
* 1. 123
* 1. 123
*/
return true;
}
const firstStart = getListItemStart(firstItem);
if (firstStart === -1) {
/**
* 1.
*
* 1.
* 1.
*/
return false;
}
if (list.children.length === 1) {
/**
* aligned:
*
* 11. 123
*
* not aligned:
*
* 1. 123
*/
return firstStart % options.tabWidth === 0;
}
const secondStart = getListItemStart(secondItem);
if (firstStart !== secondStart) {
/**
* 11. 123
* 1. 123
*
* 1. 123
* 11. 123
*/
return false;
}
if (firstStart % options.tabWidth === 0) {
/**
* 11. 123
* 12. 123
*/
return true;
}
/**
* aligned:
*
* 11. 123
* 1. 123
*
* not aligned:
*
* 1. 123
* 2. 123
*/
const secondInfo = getOrderedListItemInfo(secondItem, options.originalText);
return secondInfo.leadingSpaces.length > 1;
}
}
module.exports = preprocess;

View File

@ -3,6 +3,7 @@
const privateUtil = require("../common/util");
const embed = require("./embed");
const pragma = require("./pragma");
const preprocess = require("./preprocess");
const {
builders: {
concat,
@ -882,7 +883,8 @@ function clean(ast, newObj, parent) {
if (
ast.type === "code" ||
ast.type === "yaml" ||
ast.type === "importExport" ||
ast.type === "import" ||
ast.type === "export" ||
ast.type === "jsx"
) {
delete newObj.value;
@ -892,10 +894,15 @@ function clean(ast, newObj, parent) {
delete newObj.isAligned;
}
// for whitespace: "\n" and " " are considered the same
if (ast.type === "whitespace" && ast.value === "\n") {
newObj.value = " ";
// texts can be splitted or merged
if (ast.type === "text") {
return null;
}
if (ast.type === "inlineCode") {
newObj.value = ast.value.replace(/[ \t\n]+/g, " ");
}
// for insert pragma
if (
parent &&
@ -924,6 +931,7 @@ function hasPrettierIgnore(path) {
}
module.exports = {
preprocess,
print: genericPrint,
embed,
massageAstNode: clean,

View File

@ -173,7 +173,23 @@ function getFencedCodeBlockValue(node, originalText) {
}
}
function mapAst(ast, handler) {
return (function preorder(node, index, parentStack) {
parentStack = parentStack || [];
const newNode = Object.assign({}, handler(node, index, parentStack));
if (newNode.children) {
newNode.children = newNode.children.map((child, index) => {
return preorder(child, index, [newNode].concat(parentStack));
});
}
return newNode;
})(ast, null, null);
}
module.exports = {
mapAst,
splitText,
punctuationPattern,
getFencedCodeBlockValue,

View File

@ -2,36 +2,10 @@
const createError = require("../common/parser-create-error");
const { hasPragma } = require("./pragma");
const { defineShortcut, mapNode } = require("./utils");
function defineShortcuts(node) {
switch (node.type) {
case "document":
defineShortcut(node, "head", () => node.children[0]);
defineShortcut(node, "body", () => node.children[1]);
break;
case "documentBody":
case "sequenceItem":
case "flowSequenceItem":
case "mappingKey":
case "mappingValue":
defineShortcut(node, "content", () => node.children[0]);
break;
case "mappingItem":
case "flowMappingItem":
defineShortcut(node, "key", () => node.children[0]);
defineShortcut(node, "value", () => node.children[1]);
break;
}
return node;
}
function parse(text) {
try {
const root = mapNode(
require("yaml-unist-parser").parse(text),
defineShortcuts
);
const root = require("yaml-unist-parser").parse(text);
/**
* suppress `comment not printed` error

View File

@ -16,7 +16,9 @@ const {
isLastDescendantNode,
isNextLineEmpty,
isNode,
isEmptyNode
isEmptyNode,
defineShortcut,
mapNode
} = require("./utils");
const docBuilders = require("../doc").builders;
const {
@ -37,6 +39,32 @@ const {
softline
} = docBuilders;
function preprocess(ast) {
return mapNode(ast, defineShortcuts);
}
function defineShortcuts(node) {
switch (node.type) {
case "document":
defineShortcut(node, "head", () => node.children[0]);
defineShortcut(node, "body", () => node.children[1]);
break;
case "documentBody":
case "sequenceItem":
case "flowSequenceItem":
case "mappingKey":
case "mappingValue":
defineShortcut(node, "content", () => node.children[0]);
break;
case "mappingItem":
case "flowMappingItem":
defineShortcut(node, "key", () => node.children[0]);
defineShortcut(node, "value", () => node.children[1]);
break;
}
return node;
}
function genericPrint(path, options, print) {
const node = path.getValue();
const parentNode = path.getParentNode();
@ -676,6 +704,7 @@ function clean(node, newNode /*, parent */) {
}
module.exports = {
preprocess,
print: genericPrint,
massageAstNode: clean,
insertPragma

View File

@ -12,10 +12,13 @@ const hardline = docBuilders.hardline;
const addAlignmentToDoc = docBuilders.addAlignmentToDoc;
const docUtils = doc.utils;
function printAstToDoc(ast, options, addAlignmentSize) {
addAlignmentSize = addAlignmentSize || 0;
function printAstToDoc(ast, options, addAlignmentSize = 0) {
const printer = options.printer;
if (printer.preprocess) {
ast = printer.preprocess(ast, options);
}
const cache = new Map();
function printGenerically(path, args) {
@ -59,14 +62,6 @@ function printAstToDoc(ast, options, addAlignmentSize) {
}
docUtils.propagateBreaks(doc);
if (
options.parser === "json" ||
options.parser === "json5" ||
options.parser === "json-stringify"
) {
doc = concat([doc, hardline]);
}
return doc;
}