2017-10-12 01:46:44 +03:00
|
|
|
"use strict";
|
|
|
|
|
|
|
|
const remarkParse = require("remark-parse");
|
|
|
|
const unified = require("unified");
|
2018-05-24 21:30:45 +03:00
|
|
|
const pragma = require("./pragma");
|
2018-05-21 17:02:09 +03:00
|
|
|
const parseFrontMatter = require("../utils/front-matter");
|
2018-09-02 11:20:22 +03:00
|
|
|
const { getOrderedListItemInfo, splitText } = require("./utils");
|
2018-08-13 17:23:09 +03:00
|
|
|
const mdx = require("./mdx");
|
2017-10-12 01:46:44 +03:00
|
|
|
|
2018-07-12 16:42:39 +03:00
|
|
|
// 0x0 ~ 0x10ffff
|
|
|
|
const isSingleCharRegex = /^([\u0000-\uffff]|[\ud800-\udbff][\udc00-\udfff])$/;
|
|
|
|
|
2017-10-12 01:46:44 +03:00
|
|
|
/**
|
|
|
|
* based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications:
|
2017-11-07 11:42:38 +03:00
|
|
|
*
|
2017-10-12 01:46:44 +03:00
|
|
|
* 1. restore unescaped character (Text)
|
|
|
|
* 2. merge continuous Texts
|
2017-11-11 19:29:59 +03:00
|
|
|
* 3. replace whitespaces in InlineCode#value with one whitespace
|
|
|
|
* reference: http://spec.commonmark.org/0.25/#example-605
|
2017-10-12 01:46:44 +03:00
|
|
|
* 4. split Text into Sentence
|
2017-11-07 11:42:38 +03:00
|
|
|
*
|
2017-10-12 01:46:44 +03:00
|
|
|
* interface Word { value: string }
|
|
|
|
* interface Whitespace { value: string }
|
|
|
|
* interface Sentence { children: Array<Word | Whitespace> }
|
|
|
|
* interface InlineCode { children: Array<Sentence> }
|
|
|
|
*/
|
2018-08-13 17:23:09 +03:00
|
|
|
function createParse({ isMDX }) {
|
|
|
|
return (text, parsers, opts) => {
|
|
|
|
const processor = unified()
|
|
|
|
.use(
|
|
|
|
remarkParse,
|
|
|
|
Object.assign(
|
|
|
|
{
|
|
|
|
footnotes: true,
|
|
|
|
commonmark: true
|
|
|
|
},
|
|
|
|
isMDX && { blocks: [mdx.BLOCKS_REGEX] }
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.use(frontMatter)
|
|
|
|
.use(isMDX ? mdx.esSyntax : identity)
|
|
|
|
.use(liquid)
|
|
|
|
.use(restoreUnescapedCharacter(text))
|
|
|
|
.use(mergeContinuousTexts)
|
|
|
|
.use(transformInlineCode)
|
|
|
|
.use(transformIndentedCodeblockAndMarkItsParentList(text))
|
|
|
|
.use(markAlignedList(text, opts))
|
2018-09-02 11:20:22 +03:00
|
|
|
.use(splitTextIntoSentences(opts))
|
2018-08-13 17:23:09 +03:00
|
|
|
.use(isMDX ? htmlToJsx : identity)
|
|
|
|
.use(isMDX ? mergeContinuousImportExport : identity);
|
|
|
|
return processor.runSync(processor.parse(text));
|
|
|
|
};
|
2017-10-12 01:46:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
function map(ast, handler) {
|
2018-07-27 04:48:09 +03:00
|
|
|
return (function preorder(node, index, parentStack) {
|
|
|
|
parentStack = parentStack || [];
|
|
|
|
|
|
|
|
const newNode = Object.assign({}, handler(node, index, parentStack));
|
2017-10-12 01:46:44 +03:00
|
|
|
if (newNode.children) {
|
|
|
|
newNode.children = newNode.children.map((child, index) => {
|
2018-07-27 04:48:09 +03:00
|
|
|
return preorder(child, index, [newNode].concat(parentStack));
|
2017-10-12 01:46:44 +03:00
|
|
|
});
|
|
|
|
}
|
2018-07-27 04:48:09 +03:00
|
|
|
|
2017-10-12 01:46:44 +03:00
|
|
|
return newNode;
|
|
|
|
})(ast, null, null);
|
|
|
|
}
|
|
|
|
|
2018-08-13 17:23:09 +03:00
|
|
|
function identity(x) {
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
function htmlToJsx() {
|
|
|
|
return ast =>
|
|
|
|
map(ast, (node, index, [parent]) => {
|
|
|
|
if (
|
|
|
|
node.type !== "html" ||
|
|
|
|
/^<!--[\s\S]*-->$/.test(node.value) ||
|
|
|
|
// inline html
|
|
|
|
parent.type === "paragraph"
|
|
|
|
) {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Object.assign({}, node, { type: "jsx" });
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function mergeContinuousImportExport() {
|
|
|
|
return mergeChildren(
|
|
|
|
(prevNode, node) =>
|
|
|
|
prevNode.type === "importExport" && node.type === "importExport",
|
|
|
|
(prevNode, node) => ({
|
|
|
|
type: "importExport",
|
|
|
|
value: prevNode.value + "\n\n" + node.value,
|
|
|
|
position: {
|
|
|
|
start: prevNode.position.start,
|
|
|
|
end: node.position.end
|
|
|
|
}
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2017-11-11 19:29:59 +03:00
|
|
|
function transformInlineCode() {
|
|
|
|
return ast =>
|
2017-10-12 01:46:44 +03:00
|
|
|
map(ast, node => {
|
|
|
|
if (node.type !== "inlineCode") {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Object.assign({}, node, {
|
2017-11-11 19:29:59 +03:00
|
|
|
value: node.value.replace(/\s+/g, " ")
|
2017-10-12 01:46:44 +03:00
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function restoreUnescapedCharacter(originalText) {
|
|
|
|
return () => ast =>
|
|
|
|
map(ast, node => {
|
|
|
|
return node.type !== "text"
|
|
|
|
? node
|
|
|
|
: Object.assign({}, node, {
|
|
|
|
value:
|
|
|
|
node.value !== "*" &&
|
|
|
|
node.value !== "_" && // handle these two cases in printer
|
2018-07-12 16:42:39 +03:00
|
|
|
isSingleCharRegex.test(node.value) &&
|
|
|
|
node.position.end.offset - node.position.start.offset !==
|
|
|
|
node.value.length
|
2017-10-12 01:46:44 +03:00
|
|
|
? originalText.slice(
|
|
|
|
node.position.start.offset,
|
|
|
|
node.position.end.offset
|
|
|
|
)
|
|
|
|
: node.value
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-08-13 17:23:09 +03:00
|
|
|
function mergeChildren(shouldMerge, mergeNode) {
|
2017-10-12 01:46:44 +03:00
|
|
|
return ast =>
|
|
|
|
map(ast, node => {
|
|
|
|
if (!node.children) {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
const children = node.children.reduce((current, child) => {
|
|
|
|
const lastChild = current[current.length - 1];
|
2018-08-13 17:23:09 +03:00
|
|
|
if (lastChild && shouldMerge(lastChild, child)) {
|
|
|
|
current.splice(-1, 1, mergeNode(lastChild, child));
|
2017-10-12 01:46:44 +03:00
|
|
|
} else {
|
|
|
|
current.push(child);
|
|
|
|
}
|
|
|
|
return current;
|
|
|
|
}, []);
|
|
|
|
return Object.assign({}, node, { children });
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-08-13 17:23:09 +03:00
|
|
|
function mergeContinuousTexts() {
|
|
|
|
return mergeChildren(
|
|
|
|
(prevNode, node) => prevNode.type === "text" && node.type === "text",
|
|
|
|
(prevNode, node) => ({
|
|
|
|
type: "text",
|
|
|
|
value: prevNode.value + node.value,
|
|
|
|
position: {
|
|
|
|
start: prevNode.position.start,
|
|
|
|
end: node.position.end
|
|
|
|
}
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2018-09-02 11:20:22 +03:00
|
|
|
function splitTextIntoSentences(options) {
|
2018-05-19 08:53:34 +03:00
|
|
|
return () => ast =>
|
2018-07-27 04:48:09 +03:00
|
|
|
map(ast, (node, index, [parentNode]) => {
|
2017-10-12 01:46:44 +03:00
|
|
|
if (node.type !== "text") {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
let value = node.value;
|
|
|
|
|
|
|
|
if (parentNode.type === "paragraph") {
|
|
|
|
if (index === 0) {
|
|
|
|
value = value.trimLeft();
|
|
|
|
}
|
|
|
|
if (index === parentNode.children.length - 1) {
|
|
|
|
value = value.trimRight();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
type: "sentence",
|
|
|
|
position: node.position,
|
2018-09-02 11:20:22 +03:00
|
|
|
children: splitText(value, options)
|
2017-10-12 01:46:44 +03:00
|
|
|
};
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-05-21 17:02:09 +03:00
|
|
|
function frontMatter() {
|
2018-05-15 04:17:15 +03:00
|
|
|
const proto = this.Parser.prototype;
|
2018-05-21 17:02:09 +03:00
|
|
|
proto.blockMethods = ["frontMatter"].concat(proto.blockMethods);
|
|
|
|
proto.blockTokenizers.frontMatter = tokenizer;
|
2018-05-15 04:17:15 +03:00
|
|
|
|
|
|
|
function tokenizer(eat, value) {
|
2018-05-21 17:02:09 +03:00
|
|
|
const parsed = parseFrontMatter(value);
|
2018-05-15 04:17:15 +03:00
|
|
|
|
2018-05-21 17:02:09 +03:00
|
|
|
if (parsed.frontMatter) {
|
2018-07-03 04:54:40 +03:00
|
|
|
return eat(parsed.frontMatter.raw)(parsed.frontMatter);
|
2018-05-15 04:17:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
tokenizer.onlyAtStart = true;
|
|
|
|
}
|
|
|
|
|
2018-05-15 20:22:41 +03:00
|
|
|
function liquid() {
|
|
|
|
const proto = this.Parser.prototype;
|
|
|
|
const methods = proto.inlineMethods;
|
|
|
|
methods.splice(methods.indexOf("text"), 0, "liquid");
|
|
|
|
proto.inlineTokenizers.liquid = tokenizer;
|
|
|
|
|
|
|
|
function tokenizer(eat, value) {
|
|
|
|
const match = value.match(/^({%[\s\S]*?%}|{{[\s\S]*?}})/);
|
|
|
|
|
|
|
|
if (match) {
|
|
|
|
return eat(match[0])({
|
|
|
|
type: "liquidNode",
|
|
|
|
value: match[0]
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tokenizer.locator = function(value, fromIndex) {
|
|
|
|
return value.indexOf("{", fromIndex);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2018-07-27 04:48:09 +03:00
|
|
|
function transformIndentedCodeblockAndMarkItsParentList(originalText) {
|
|
|
|
return () => ast =>
|
|
|
|
map(ast, (node, index, parentStack) => {
|
|
|
|
if (node.type === "code") {
|
|
|
|
// the first char may point to `\n`, e.g. `\n\t\tbar`, just ignore it
|
|
|
|
const isIndented = /^\n?( {4,}|\t)/.test(
|
|
|
|
originalText.slice(
|
|
|
|
node.position.start.offset,
|
|
|
|
node.position.end.offset
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
|
|
|
node.isIndented = isIndented;
|
|
|
|
|
|
|
|
if (isIndented) {
|
|
|
|
for (let i = 0; i < parentStack.length; i++) {
|
|
|
|
const parent = parentStack[i];
|
|
|
|
|
|
|
|
// no need to check checked items
|
|
|
|
if (parent.hasIndentedCodeblock) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (parent.type === "list") {
|
|
|
|
parent.hasIndentedCodeblock = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return node;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function markAlignedList(originalText, options) {
|
|
|
|
return () => ast =>
|
|
|
|
map(ast, (node, index, parentStack) => {
|
|
|
|
if (node.type === "list" && node.children.length !== 0) {
|
|
|
|
// if one of its parents is not aligned, it's not possible to be aligned in sub-lists
|
|
|
|
for (let i = 0; i < parentStack.length; i++) {
|
|
|
|
const parent = parentStack[i];
|
|
|
|
if (parent.type === "list" && !parent.isAligned) {
|
|
|
|
node.isAligned = false;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
node.isAligned = isAligned(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
return node;
|
|
|
|
});
|
|
|
|
|
|
|
|
function getListItemStart(listItem) {
|
|
|
|
return listItem.children.length === 0
|
|
|
|
? -1
|
|
|
|
: listItem.children[0].position.start.column - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
function isAligned(list) {
|
|
|
|
if (!list.ordered) {
|
|
|
|
/**
|
|
|
|
* - 123
|
|
|
|
* - 123
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const [firstItem, secondItem] = list.children;
|
|
|
|
|
|
|
|
const firstInfo = getOrderedListItemInfo(firstItem, originalText);
|
|
|
|
|
|
|
|
if (firstInfo.leadingSpaces.length > 1) {
|
|
|
|
/**
|
|
|
|
* 1. 123
|
|
|
|
*
|
|
|
|
* 1. 123
|
|
|
|
* 1. 123
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const firstStart = getListItemStart(firstItem);
|
|
|
|
|
|
|
|
if (firstStart === -1) {
|
|
|
|
/**
|
|
|
|
* 1.
|
|
|
|
*
|
|
|
|
* 1.
|
|
|
|
* 1.
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (list.children.length === 1) {
|
|
|
|
/**
|
|
|
|
* aligned:
|
|
|
|
*
|
|
|
|
* 11. 123
|
|
|
|
*
|
|
|
|
* not aligned:
|
|
|
|
*
|
|
|
|
* 1. 123
|
|
|
|
*/
|
|
|
|
return firstStart % options.tabWidth === 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
const secondStart = getListItemStart(secondItem);
|
|
|
|
|
|
|
|
if (firstStart !== secondStart) {
|
|
|
|
/**
|
|
|
|
* 11. 123
|
|
|
|
* 1. 123
|
|
|
|
*
|
|
|
|
* 1. 123
|
|
|
|
* 11. 123
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (firstStart % options.tabWidth === 0) {
|
|
|
|
/**
|
|
|
|
* 11. 123
|
|
|
|
* 12. 123
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* aligned:
|
|
|
|
*
|
|
|
|
* 11. 123
|
|
|
|
* 1. 123
|
|
|
|
*
|
|
|
|
* not aligned:
|
|
|
|
*
|
|
|
|
* 1. 123
|
|
|
|
* 2. 123
|
|
|
|
*/
|
|
|
|
const secondInfo = getOrderedListItemInfo(secondItem, originalText);
|
|
|
|
return secondInfo.leadingSpaces.length > 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-13 17:23:09 +03:00
|
|
|
const baseParser = {
|
2018-05-24 21:30:45 +03:00
|
|
|
astFormat: "mdast",
|
|
|
|
hasPragma: pragma.hasPragma,
|
|
|
|
locStart: node => node.position.start.offset,
|
2018-09-01 07:14:53 +03:00
|
|
|
locEnd: node => node.position.end.offset,
|
|
|
|
preprocess: text => text.replace(/\n\s+$/, "\n") // workaround for https://github.com/remarkjs/remark/issues/350
|
2018-05-24 21:30:45 +03:00
|
|
|
};
|
|
|
|
|
2018-08-13 17:23:09 +03:00
|
|
|
const markdownParser = Object.assign({}, baseParser, {
|
|
|
|
parse: createParse({ isMDX: false })
|
|
|
|
});
|
|
|
|
|
|
|
|
const mdxParser = Object.assign({}, baseParser, {
|
|
|
|
parse: createParse({ isMDX: true })
|
|
|
|
});
|
|
|
|
|
2018-05-24 21:30:45 +03:00
|
|
|
module.exports = {
|
|
|
|
parsers: {
|
2018-08-13 17:23:09 +03:00
|
|
|
remark: markdownParser,
|
2018-05-24 21:30:45 +03:00
|
|
|
// TODO: Delete this in 2.0
|
2018-08-13 17:23:09 +03:00
|
|
|
markdown: markdownParser,
|
|
|
|
mdx: mdxParser
|
2018-05-24 21:30:45 +03:00
|
|
|
}
|
|
|
|
};
|