prettier/src/language-markdown/parser-markdown.js

149 lines
3.9 KiB
JavaScript
Raw Normal View History

feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
"use strict";
const remarkParse = require("remark-parse");
const unified = require("unified");
2018-05-24 21:30:45 +03:00
const pragma = require("./pragma");
const parseFrontMatter = require("../utils/front-matter");
const { mapAst, INLINE_NODE_WRAPPER_TYPES } = require("./utils");
const mdx = require("./mdx");
const remarkMath = require("remark-math");
const htmlParser = require("../language-html/parser-html").parsers.html;
feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
/**
* based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications:
*
feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
* 1. restore unescaped character (Text)
* 2. merge continuous Texts
* 3. replace whitespaces in InlineCode#value with one whitespace
* reference: http://spec.commonmark.org/0.25/#example-605
feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
* 4. split Text into Sentence
*
feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
* interface Word { value: string }
* interface Whitespace { value: string }
* interface Sentence { children: Array<Word | Whitespace> }
* interface InlineCode { children: Array<Sentence> }
*/
function createParse({ isMDX }) {
return text => {
const processor = unified()
.use(
remarkParse,
Object.assign(
{
footnotes: true,
commonmark: true
},
isMDX && { blocks: [mdx.BLOCKS_REGEX] }
)
)
.use(frontMatter)
.use(remarkMath)
.use(isMDX ? mdx.esSyntax : identity)
.use(liquid)
.use(isMDX ? htmlToJsx : identity);
return processor.runSync(processor.parse(text));
};
feat: support markdown (#2943) * feat(markdown): inital implementation * feat(markdown): support strong * fix: add missing default value * feat(markdown): support inlineCode * feat: support delete * feat: support link * feat: support image * feat: support blockquote * feat: support heading * feat: support code * feat: support yaml * feat: support html * feat: support list * feat: support thematicBreak * feat: support table * feat: support linkReference * feat: support imageReference * feat: support definition * feat: support footnote * feat: support footnoteReference * feat: support footnoteDefinition * test(cli): update snapshots * refactor: extract SINGLE_LINE_NODE_TYPES * refactor: printChildren * fix: correct newlines * test: add trailing newline * fix: blockquote formatting * fix: node types * fix: break line correctly * fix: remove unnecessary properties to make AST_COMPARE happy * fix: escape `|` in tableCell content * fix: unexpected line break * fix: ast difference from loose list * fix: html break lines * refactor: fix linting * fix: normalize ast * fix: escape specific chars * test: add more tests * fix: build markdown parser * chore: remove unnecessary *.log * fix: escape html entity * feat: support prettier-ignore * fix: line break for non-loose listItem * feat: support formatting `code` based on `lang` * fix: add `jsx` and `tsx` * fix: use multiparser * refactor: fix linting * test: update test case 😉 * feat: switch to `_` style emphasis * fix: sequence list should use different prefix * test: add tests * fix: do not print additional new line after `prettier-ignore` * fix(list): enforce `1.` to avoid unnecessary git diff * feat: enable `commonmark` option * feat: support `break` * fix: escape backslash * refactor: escape html entity using backslash * fix: respect autolink-style link * feat: support md`...` and markdown`...` * docs: replace ands with commas * fix: respect indented code block * fix: respect html entity * docs: add docs for modified MDAST * fix: inlineCode is breakline-able * feat: support backtick in inlineCode * feat: support a-lot-of-backtick in fenced code block * feat: use `~~~`-style code block in js template * fix: respect escaped chars * fix: use `*`-style emphasis for invalid `_`-style output * test: add test cases * fix: use `- - -`-style thematicBreak to avoid conflict with yaml * fix: remain the same content for linkReference identifier * fix: `inlineCode` gap can be a line break * fix: `html` should not print trailing spaces if it's in root * refactor: fix typo * fix: wrap `definition`'s url if there's whitespace * fix: remove unnecessary whitespace at the end of paragraph * fix: fix: remove unnecessary whitespace at the start of paragraph * fix: setence children length is possible 0 * fix: support continuous ordered list * fix: do not print addtional hardline after loose list * fix: use double-backtick style for single-backtick value in inlineCode * fix: support nested emphasis * fix: support space-url in link/image * fix: escape `)` in link/image url * fix: support single-quote in link/image/definition title * fix: respect alt in image/imageReference * fix: use `*`-style thematicBreak in list * fix: loose/tight list linebreaks * fix: print third linebreak before indented code block with a tight list in the previous * test: move bug cases * fix: remove unnecessary linebreaks * refactor: fix typo
2017-10-12 01:46:44 +03:00
}
function identity(x) {
return x;
}
function htmlToJsx() {
return ast =>
mapAst(ast, (node, _index, [parent]) => {
if (
node.type !== "html" ||
node.value.match(mdx.COMMENT_REGEX) ||
INLINE_NODE_WRAPPER_TYPES.indexOf(parent.type) !== -1
) {
return node;
}
const nodes = htmlParser.parse(node.value).children;
// find out if there are adjacent JSX elements which should be allowed in mdx alike in markdown
if (nodes.length <= 1) {
return Object.assign({}, node, { type: "jsx" });
}
return nodes.reduce((newNodes, { sourceSpan: position, type }) => {
const value = node.value.slice(
position.start.offset,
position.end.offset
);
if (value) {
newNodes.push({
type: type === "element" ? "jsx" : type,
value,
position
});
}
return newNodes;
}, []);
});
}
function frontMatter() {
2018-05-15 04:17:15 +03:00
const proto = this.Parser.prototype;
proto.blockMethods = ["frontMatter"].concat(proto.blockMethods);
proto.blockTokenizers.frontMatter = tokenizer;
2018-05-15 04:17:15 +03:00
function tokenizer(eat, value) {
const parsed = parseFrontMatter(value);
2018-05-15 04:17:15 +03:00
if (parsed.frontMatter) {
return eat(parsed.frontMatter.raw)(parsed.frontMatter);
2018-05-15 04:17:15 +03:00
}
}
tokenizer.onlyAtStart = true;
}
2018-05-15 20:22:41 +03:00
function liquid() {
const proto = this.Parser.prototype;
const methods = proto.inlineMethods;
methods.splice(methods.indexOf("text"), 0, "liquid");
proto.inlineTokenizers.liquid = tokenizer;
function tokenizer(eat, value) {
const match = value.match(/^({%[\s\S]*?%}|{{[\s\S]*?}})/);
if (match) {
return eat(match[0])({
type: "liquidNode",
value: match[0]
});
}
}
tokenizer.locator = function(value, fromIndex) {
return value.indexOf("{", fromIndex);
};
}
const baseParser = {
2018-05-24 21:30:45 +03:00
astFormat: "mdast",
hasPragma: pragma.hasPragma,
locStart: node => node.position.start.offset,
locEnd: node => node.position.end.offset,
preprocess: text => text.replace(/\n\s+$/, "\n") // workaround for https://github.com/remarkjs/remark/issues/350
2018-05-24 21:30:45 +03:00
};
const markdownParser = Object.assign({}, baseParser, {
parse: createParse({ isMDX: false })
});
const mdxParser = Object.assign({}, baseParser, {
parse: createParse({ isMDX: true })
});
2018-05-24 21:30:45 +03:00
module.exports = {
parsers: {
remark: markdownParser,
2018-05-24 21:30:45 +03:00
// TODO: Delete this in 2.0
markdown: markdownParser,
mdx: mdxParser
2018-05-24 21:30:45 +03:00
}
};