2017-10-12 01:46:44 +03:00
|
|
|
"use strict";
|
|
|
|
|
|
|
|
const remarkParse = require("remark-parse");
|
|
|
|
const unified = require("unified");
|
2018-05-15 04:17:15 +03:00
|
|
|
const parseFrontmatter = require("../utils/front-matter");
|
2017-12-26 04:23:50 +03:00
|
|
|
const util = require("../common/util");
|
2017-10-12 01:46:44 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* based on [MDAST](https://github.com/syntax-tree/mdast) with following modifications:
|
2017-11-07 11:42:38 +03:00
|
|
|
*
|
2017-10-12 01:46:44 +03:00
|
|
|
* 1. restore unescaped character (Text)
|
|
|
|
* 2. merge continuous Texts
|
2017-11-11 19:29:59 +03:00
|
|
|
* 3. replace whitespaces in InlineCode#value with one whitespace
|
|
|
|
* reference: http://spec.commonmark.org/0.25/#example-605
|
2017-10-12 01:46:44 +03:00
|
|
|
* 4. split Text into Sentence
|
2017-11-07 11:42:38 +03:00
|
|
|
*
|
2017-10-12 01:46:44 +03:00
|
|
|
* interface Word { value: string }
|
|
|
|
* interface Whitespace { value: string }
|
|
|
|
* interface Sentence { children: Array<Word | Whitespace> }
|
|
|
|
* interface InlineCode { children: Array<Sentence> }
|
|
|
|
*/
|
|
|
|
function parse(text /*, parsers, opts*/) {
|
|
|
|
const processor = unified()
|
|
|
|
.use(remarkParse, { footnotes: true, commonmark: true })
|
2018-05-15 04:17:15 +03:00
|
|
|
.use(frontmatter)
|
2017-10-12 01:46:44 +03:00
|
|
|
.use(restoreUnescapedCharacter(text))
|
|
|
|
.use(mergeContinuousTexts)
|
2017-11-11 19:29:59 +03:00
|
|
|
.use(transformInlineCode)
|
2017-10-12 01:46:44 +03:00
|
|
|
.use(splitText);
|
|
|
|
return processor.runSync(processor.parse(text));
|
|
|
|
}
|
|
|
|
|
|
|
|
function map(ast, handler) {
|
|
|
|
return (function preorder(node, index, parentNode) {
|
|
|
|
const newNode = Object.assign({}, handler(node, index, parentNode));
|
|
|
|
if (newNode.children) {
|
|
|
|
newNode.children = newNode.children.map((child, index) => {
|
|
|
|
return preorder(child, index, newNode);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
return newNode;
|
|
|
|
})(ast, null, null);
|
|
|
|
}
|
|
|
|
|
2017-11-11 19:29:59 +03:00
|
|
|
function transformInlineCode() {
|
|
|
|
return ast =>
|
2017-10-12 01:46:44 +03:00
|
|
|
map(ast, node => {
|
|
|
|
if (node.type !== "inlineCode") {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Object.assign({}, node, {
|
2017-11-11 19:29:59 +03:00
|
|
|
value: node.value.replace(/\s+/g, " ")
|
2017-10-12 01:46:44 +03:00
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function restoreUnescapedCharacter(originalText) {
|
|
|
|
return () => ast =>
|
|
|
|
map(ast, node => {
|
|
|
|
return node.type !== "text"
|
|
|
|
? node
|
|
|
|
: Object.assign({}, node, {
|
|
|
|
value:
|
|
|
|
node.value !== "*" &&
|
|
|
|
node.value !== "_" && // handle these two cases in printer
|
|
|
|
node.value.length === 1 &&
|
|
|
|
node.position.end.offset - node.position.start.offset > 1
|
|
|
|
? originalText.slice(
|
|
|
|
node.position.start.offset,
|
|
|
|
node.position.end.offset
|
|
|
|
)
|
|
|
|
: node.value
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function mergeContinuousTexts() {
|
|
|
|
return ast =>
|
|
|
|
map(ast, node => {
|
|
|
|
if (!node.children) {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
const children = node.children.reduce((current, child) => {
|
|
|
|
const lastChild = current[current.length - 1];
|
|
|
|
if (lastChild && lastChild.type === "text" && child.type === "text") {
|
|
|
|
current.splice(-1, 1, {
|
|
|
|
type: "text",
|
|
|
|
value: lastChild.value + child.value,
|
|
|
|
position: {
|
|
|
|
start: lastChild.position.start,
|
|
|
|
end: child.position.end
|
|
|
|
}
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
current.push(child);
|
|
|
|
}
|
|
|
|
return current;
|
|
|
|
}, []);
|
|
|
|
return Object.assign({}, node, { children });
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function splitText() {
|
|
|
|
return ast =>
|
|
|
|
map(ast, (node, index, parentNode) => {
|
|
|
|
if (node.type !== "text") {
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
let value = node.value;
|
|
|
|
|
|
|
|
if (parentNode.type === "paragraph") {
|
|
|
|
if (index === 0) {
|
|
|
|
value = value.trimLeft();
|
|
|
|
}
|
|
|
|
if (index === parentNode.children.length - 1) {
|
|
|
|
value = value.trimRight();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
type: "sentence",
|
|
|
|
position: node.position,
|
2017-10-15 07:57:31 +03:00
|
|
|
children: util.splitText(value)
|
2017-10-12 01:46:44 +03:00
|
|
|
};
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-05-15 04:17:15 +03:00
|
|
|
function frontmatter() {
|
|
|
|
const proto = this.Parser.prototype;
|
|
|
|
proto.blockMethods = ["frontmatter"].concat(proto.blockMethods);
|
|
|
|
proto.blockTokenizers.frontmatter = tokenizer;
|
|
|
|
|
|
|
|
function tokenizer(eat, value) {
|
|
|
|
const parsed = parseFrontmatter(value);
|
|
|
|
|
|
|
|
if (parsed.frontmatter) {
|
|
|
|
return eat(parsed.frontmatter)({
|
|
|
|
type: "frontmatter",
|
|
|
|
value: parsed.frontmatter
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tokenizer.onlyAtStart = true;
|
|
|
|
}
|
|
|
|
|
2017-10-12 01:46:44 +03:00
|
|
|
module.exports = parse;
|