diff --git a/package.json b/package.json index e20ce126..22b8e54e 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "dashify": "0.2.2", "dedent": "0.7.0", "diff": "3.2.0", + "domhandler": "2.4.2", "editorconfig": "0.15.0", "editorconfig-to-prettier": "0.0.6", "emoji-regex": "6.5.1", @@ -37,6 +38,7 @@ "globby": "6.1.0", "graphql": "0.13.2", "html-tag-names": "1.1.2", + "htmlparser2": "3.9.2", "ignore": "3.3.7", "jest-docblock": "23.2.0", "json-stable-stringify": "1.0.1", @@ -47,7 +49,6 @@ "minimatch": "3.0.4", "minimist": "1.2.0", "normalize-path": "3.0.0", - "parse5": "5.0.0", "parse5-htmlparser2-tree-adapter": "5.0.0", "postcss-less": "1.1.5", "postcss-media-query-parser": "0.2.3", diff --git a/scripts/build/config.js b/scripts/build/config.js index ebdf0ce5..e1fb4eb5 100644 --- a/scripts/build/config.js +++ b/scripts/build/config.js @@ -68,7 +68,7 @@ const parsers = [ } }, { - input: "src/language-html/parser-parse5.js", + input: "src/language-html/parser-html.js", target: "universal" }, { diff --git a/scripts/build/shims/events.js b/scripts/build/shims/events.js new file mode 100644 index 00000000..890a0781 --- /dev/null +++ b/scripts/build/shims/events.js @@ -0,0 +1 @@ +export class EventEmitter {} diff --git a/src/common/internal-plugins.js b/src/common/internal-plugins.js index d22252f7..ddd01eec 100644 --- a/src/common/internal-plugins.js +++ b/src/common/internal-plugins.js @@ -109,8 +109,8 @@ module.exports = [ require("../language-html"), { parsers: { - get parse5() { - return eval("require")("../language-html/parser-parse5").parsers.parse5; + get html() { + return eval("require")("../language-html/parser-html").parsers.html; } } }, diff --git a/src/language-html/clean.js b/src/language-html/clean.js index eebefcb7..81eac82c 100644 --- a/src/language-html/clean.js +++ b/src/language-html/clean.js @@ -1,7 +1,9 @@ "use strict"; module.exports = function(ast, newNode) { - delete newNode.sourceCodeLocation; + delete newNode.startIndex; + delete newNode.endIndex; + delete newNode.attribs; if (ast.type === "text") { return null; @@ -11,4 +13,12 @@ module.exports = function(ast, newNode) { if (ast.type === "yaml") { return null; } + + if (ast.type === "attribute") { + delete newNode.value; + } + + if (ast.type === "directive" && ast.name === "!doctype") { + delete newNode.data; + } }; diff --git a/src/language-html/embed.js b/src/language-html/embed.js index 923f1a95..a1b36c6a 100644 --- a/src/language-html/embed.js +++ b/src/language-html/embed.js @@ -20,7 +20,7 @@ function embed(path, print, textToDoc, options) { parent.attribs.type === "application/javascript") ) { const parser = options.parser === "flow" ? "flow" : "babylon"; - const doc = textToDoc(getText(options, node), { parser }); + const doc = textToDoc(node.data, { parser }); return concat([hardline, doc]); } @@ -30,17 +30,13 @@ function embed(path, print, textToDoc, options) { (parent.attribs.type === "application/x-typescript" || parent.attribs.lang === "ts") ) { - const doc = textToDoc( - getText(options, node), - { parser: "typescript" }, - options - ); + const doc = textToDoc(node.data, { parser: "typescript" }, options); return concat([hardline, doc]); } // Inline Styles if (parent.type === "style") { - const doc = textToDoc(getText(options, node), { parser: "css" }); + const doc = textToDoc(node.data, { parser: "css" }); return concat([hardline, doc]); } @@ -106,11 +102,4 @@ function replaceNewlinesWithLiterallines(doc) { ); } -function getText(options, node) { - return options.originalText.slice( - options.locStart(node), - options.locEnd(node) - ); -} - module.exports = embed; diff --git a/src/language-html/index.js b/src/language-html/index.js index b11929f7..3a7e9a78 100644 --- a/src/language-html/index.js +++ b/src/language-html/index.js @@ -6,8 +6,8 @@ const createLanguage = require("../utils/create-language"); const languages = [ createLanguage(require("linguist-languages/data/html"), { override: { - since: null, // unreleased - parsers: ["parse5"], + since: "1.15.0", + parsers: ["html"], vscodeLanguageIds: ["html"] } }) diff --git a/src/language-html/parser-html.js b/src/language-html/parser-html.js new file mode 100644 index 00000000..73023958 --- /dev/null +++ b/src/language-html/parser-html.js @@ -0,0 +1,136 @@ +"use strict"; + +const parseFrontMatter = require("../utils/front-matter"); +const { HTML_TAGS } = require("./utils"); + +function parse(text /*, parsers, opts*/) { + const { frontMatter, content } = parseFrontMatter(text); + + // Inline the require to avoid loading all the JS if we don't use it + const Parser = require("htmlparser2/lib/Parser"); + const DomHandler = require("domhandler"); + + /** + * modifications: + * - empty attributes (e.g., ``) are parsed as `{ [attr]: null }` instead of `{ [attr]: "" }` + * - trigger `Handler#onselfclosingtag()` + */ + class CustomParser extends Parser { + constructor(cbs, options) { + super(cbs, options); + this._attribvalue = null; + } + onattribdata(value) { + if (this._attribvalue === null) { + this._attribvalue = ""; + } + super.onattribdata(value); + } + onattribend() { + super.onattribend(); + this._attribvalue = null; + } + onselfclosingtag() { + if (this._options.xmlMode || this._options.recognizeSelfClosing) { + const name = this._tagname; + this.onopentagend(); + if (this._stack[this._stack.length - 1] === name) { + this._cbs.onselfclosingtag(); + this._cbs.onclosetag(name); + this._stack.pop(); + } + } else { + this.onopentagend(); + } + } + } + + /** + * modifications: + * - add `selfClosing` field + */ + class CustomDomHandler extends DomHandler { + onselfclosingtag() { + this._tagStack[this._tagStack.length - 1].selfClosing = true; + } + } + + const handler = new CustomDomHandler({ + withStartIndices: true, + withEndIndices: true + }); + + new CustomParser(handler, { + lowerCaseTags: true, // preserve lowercase tag names to avoid false check in htmlparser2 and apply the lowercasing later + lowerCaseAttributeNames: false, + recognizeSelfClosing: true + }).end(content); + + const ast = normalize({ type: "root", children: handler.dom }, text); + + if (frontMatter) { + ast.children.unshift(frontMatter); + } + + return ast; +} + +function normalize(node, text) { + delete node.parent; + delete node.next; + delete node.prev; + + let isCaseSensitiveTag = false; + + if (node.type === "tag" && !(node.name in HTML_TAGS)) { + isCaseSensitiveTag = true; + node.name = text.slice( + node.startIndex + 1, // < + node.startIndex + 1 + node.name.length + ); + } + + if (node.attribs) { + node.attributes = Object.keys(node.attribs).map(attributeKey => ({ + type: "attribute", + key: isCaseSensitiveTag ? attributeKey : attributeKey.toLowerCase(), + value: node.attribs[attributeKey] + })); + } + + if (node.children) { + node.children = node.children.map(child => normalize(child, text)); + } + + if ( + node.type === "tag" && + node.name === "textarea" && + node.children.length === 1 && + node.children[0].type === "text" && + node.children[0].data === "\n" && + !/<\/textarea>$/.test(text.slice(locStart(node), locEnd(node))) + ) { + node.children = []; + } + + return node; +} + +function locStart(node) { + return node.startIndex; +} + +function locEnd(node) { + return node.endIndex + 1; +} + +module.exports = { + parsers: { + html: { + parse, + astFormat: "htmlparser2", + locStart, + locEnd + } + } +}; diff --git a/src/language-html/parser-parse5.js b/src/language-html/parser-parse5.js deleted file mode 100644 index cd4ea1c8..00000000 --- a/src/language-html/parser-parse5.js +++ /dev/null @@ -1,89 +0,0 @@ -"use strict"; - -const htmlTagNames = require("html-tag-names"); -const parseFrontMatter = require("../utils/front-matter"); - -const nonFragmentRegex = /^\s*(\s*)*<(!doctype|html|head|body)[\s>]/i; - -function parse(text /*, parsers, opts*/) { - // Inline the require to avoid loading all the JS if we don't use it - const parse5 = require("parse5"); - const htmlparser2TreeAdapter = require("parse5-htmlparser2-tree-adapter"); - - const { frontMatter, content } = parseFrontMatter(text); - - const isFragment = !nonFragmentRegex.test(content); - const ast = (isFragment ? parse5.parseFragment : parse5.parse)(content, { - treeAdapter: htmlparser2TreeAdapter, - sourceCodeLocationInfo: true - }); - - const normalizedAst = normalize(ast, text); - - if (frontMatter) { - normalizedAst.children.unshift(frontMatter); - } - - return normalizedAst; -} - -function normalize(node, text) { - delete node.parent; - delete node.next; - delete node.prev; - - let isCaseSensitiveTag = false; - - // preserve case-sensitive tag names - if ( - node.type === "tag" && - node.sourceCodeLocation && - htmlTagNames.indexOf(node.name) === -1 - ) { - isCaseSensitiveTag = true; - node.name = text.slice( - node.sourceCodeLocation.startOffset + 1, // < - node.sourceCodeLocation.startOffset + 1 + node.name.length - ); - } - - if (node.attribs) { - node.attributes = Object.keys(node.attribs).map(attributeKey => { - const sourceCodeLocation = node.sourceCodeLocation.attrs[attributeKey]; - return { - type: "attribute", - key: isCaseSensitiveTag - ? text - .slice( - sourceCodeLocation.startOffset, - sourceCodeLocation.endOffset - ) - .split("=", 1)[0] - : attributeKey, - value: node.attribs[attributeKey], - sourceCodeLocation - }; - }); - } - - if (node.children) { - node.children = node.children.map(child => normalize(child, text)); - } - - return node; -} - -module.exports = { - parsers: { - parse5: { - parse, - astFormat: "htmlparser2", - locStart(node) { - return node.sourceCodeLocation && node.sourceCodeLocation.startOffset; - }, - locEnd(node) { - return node.sourceCodeLocation && node.sourceCodeLocation.endOffset; - } - } - } -}; diff --git a/src/language-html/printer-htmlparser2.js b/src/language-html/printer-htmlparser2.js index 4ab42327..b0e0b470 100644 --- a/src/language-html/printer-htmlparser2.js +++ b/src/language-html/printer-htmlparser2.js @@ -18,12 +18,11 @@ const { utils: { willBreak, isLineNext, isEmpty } } = require("../doc"); const { + VOID_TAGS, hasPrettierIgnore, - isBooleanAttributeNode, isPreTagNode, isScriptTagNode, isTextAreaTagNode, - isVoidTagNode, isWhitespaceOnlyText } = require("./utils"); @@ -37,7 +36,15 @@ function genericPrint(path, options, print) { case "directive": { return concat([ "<", - n.data.replace('!DOCTYPE html ""', "!DOCTYPE html"), + n.name === "!doctype" + ? n.data + .replace(/\s+/g, " ") + .replace( + /^(!doctype)(( html)?)/i, + (_, doctype, doctypeHtml) => + doctype.toUpperCase() + doctypeHtml.toLowerCase() + ) + : n.data, ">", hardline ]); @@ -46,7 +53,9 @@ function genericPrint(path, options, print) { const parentNode = path.getParentNode(); if (isPreTagNode(parentNode) || isTextAreaTagNode(parentNode)) { - return n.data; + return concat( + n.data.split(/(\n)/g).map((x, i) => (i % 2 === 1 ? hardline : x)) + ); } return n.data.replace(/\s+/g, " ").trim(); @@ -54,11 +63,11 @@ function genericPrint(path, options, print) { case "script": case "style": case "tag": { - const isVoid = isVoidTagNode(n); + const isVoid = n.name in VOID_TAGS; const openingPrinted = printOpeningTag(path, print, isVoid); // Print self closing tag - if (isVoid) { + if (isVoid || n.selfClosing) { return openingPrinted; } @@ -71,24 +80,9 @@ function genericPrint(path, options, print) { const children = printChildren(path, print, options); - // NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move - // on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.) if (isPreTagNode(n) || isTextAreaTagNode(n)) { - const originalTagContent = options.originalText.slice( - n.sourceCodeLocation.startTag.endOffset, - n.sourceCodeLocation.endTag.startOffset - ); - const hasNewlineAfterTag = /^(\r\n|\r|\n)/.test(originalTagContent); - return dedentToRoot( - group( - concat([ - openingPrinted, - hasNewlineAfterTag ? hardline : "", - concat(children), - closingPrinted - ]) - ) + group(concat([openingPrinted, concat(children), closingPrinted])) ); } @@ -157,18 +151,8 @@ function genericPrint(path, options, print) { return concat([""]); } case "attribute": { - if (!n.value) { - if (isBooleanAttributeNode(n)) { - return n.key; - } - - const originalAttributeSourceCode = options.originalText.slice( - n.sourceCodeLocation.startOffset, - n.sourceCodeLocation.endOffset - ); - const hasEqualSign = originalAttributeSourceCode.indexOf("=") !== -1; - - return hasEqualSign ? concat([n.key, '=""']) : n.key; + if (n.value === null) { + return n.key; } return concat([n.key, '="', n.value.replace(/"/g, """), '"']); @@ -186,15 +170,23 @@ function genericPrint(path, options, print) { function printOpeningTag(path, print, isVoid) { const n = path.getValue(); + const selfClosing = isVoid || n.selfClosing; + // Don't break self-closing elements with no attributes - if (isVoid && !n.attributes.length) { + if (selfClosing && !n.attributes.length) { return concat(["<", n.name, " />"]); } // Don't break up opening elements with a single long text attribute if (n.attributes && n.attributes.length === 1 && n.attributes[0].value) { return group( - concat(["<", n.name, " ", concat(path.map(print, "attributes")), ">"]) + concat([ + "<", + n.name, + " ", + concat(path.map(print, "attributes")), + selfClosing ? " />" : ">" + ]) ); } @@ -205,7 +197,7 @@ function printOpeningTag(path, print, isVoid) { indent( concat(path.map(attr => concat([line, print(attr)]), "attributes")) ), - isVoid ? concat([line, "/>"]) : concat([softline, ">"]) + selfClosing ? concat([line, "/>"]) : concat([softline, ">"]) ]) ); } diff --git a/src/language-html/utils.js b/src/language-html/utils.js index ec0bbc99..b6165d65 100644 --- a/src/language-html/utils.js +++ b/src/language-html/utils.js @@ -1,51 +1,50 @@ "use strict"; -// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 -const BOOLEAN_ATTRIBUTES = [ - "allowfullscreen", - "allowpaymentrequest", - "async", - "autofocus", - "autoplay", - "checked", - "controls", - "default", - "defer", - "disabled", - "formnovalidate", - "hidden", - "ismap", - "itemscope", - "loop", - "multiple", - "muted", - "nomodule", - "novalidate", - "open", - "readonly", - "required", - "reversed", - "selected", - "typemustmatch" -]; +const htmlTagNames = require("html-tag-names"); -// http://w3c.github.io/html/single-page.html#void-elements -const VOID_TAGS = [ +const HTML_TAGS = arrayToMap(htmlTagNames); + +// NOTE: must be same as the one in htmlparser2 so that the parsing won't be inconsistent +// https://github.com/fb55/htmlparser2/blob/v3.9.2/lib/Parser.js#L59-L91 +const VOID_TAGS = arrayToMap([ "area", "base", + "basefont", "br", "col", + "command", "embed", + "frame", "hr", "img", "input", + "isindex", + "keygen", "link", "meta", "param", "source", "track", - "wbr" -]; + "wbr", + + "path", + "circle", + "ellipse", + "line", + "rect", + "use", + "stop", + "polyline", + "polygon" +]); + +function arrayToMap(array) { + const map = Object.create(null); + for (const value of array) { + map[value] = true; + } + return map; +} function hasPrettierIgnore(path) { const node = path.getValue(); @@ -88,16 +87,6 @@ function isWhitespaceOnlyText(node) { return node.type === "text" && node.data.trim().length === 0; } -function isBooleanAttributeNode(node) { - return ( - node.type === "attribute" && BOOLEAN_ATTRIBUTES.indexOf(node.key) !== -1 - ); -} - -function isVoidTagNode(node) { - return node.type === "tag" && VOID_TAGS.indexOf(node.name) !== -1; -} - function isPreTagNode(node) { return node.type === "tag" && node.name === "pre"; } @@ -111,11 +100,11 @@ function isScriptTagNode(node) { } module.exports = { + HTML_TAGS, + VOID_TAGS, hasPrettierIgnore, - isBooleanAttributeNode, - isWhitespaceOnlyText, isPreTagNode, isScriptTagNode, isTextAreaTagNode, - isVoidTagNode + isWhitespaceOnlyText }; diff --git a/src/main/core-options.js b/src/main/core-options.js index cae2e915..4049f4aa 100644 --- a/src/main/core-options.js +++ b/src/main/core-options.js @@ -122,7 +122,7 @@ const options = { since: null, description: "Handlebars" }, - { value: "parse5", since: null, description: "HTML" } + { value: "html", since: "1.15.0", description: "HTML" } ] }, plugins: { diff --git a/tests/html_attributes/__snapshots__/jsfmt.spec.js.snap b/tests/html_attributes/__snapshots__/jsfmt.spec.js.snap index 9c99b6d4..c814e3de 100644 --- a/tests/html_attributes/__snapshots__/jsfmt.spec.js.snap +++ b/tests/html_attributes/__snapshots__/jsfmt.spec.js.snap @@ -1,6 +1,6 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`attributes.html - parse5-verify 1`] = ` +exports[`attributes.html - html-verify 1`] = ` @@ -131,7 +131,7 @@ and HTML5 Apps. It also documents Mozilla products, like Firefox OS." `; -exports[`boolean.html - parse5-verify 1`] = ` +exports[`boolean.html - html-verify 1`] = ` @@ -154,7 +154,7 @@ exports[`boolean.html - parse5-verify 1`] = ` - - +
`; -exports[`dobule-quotes.html - parse5-verify 1`] = ` +exports[`dobule-quotes.html - html-verify 1`] = ` John 'ShotGun' Nelson ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ John 'ShotGun' Nelson `; -exports[`single-quotes.html - parse5-verify 1`] = ` +exports[`single-quotes.html - html-verify 1`] = ` John "ShotGun" Nelson ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ John "ShotGun" Nelson `; -exports[`without-quotes.html - parse5-verify 1`] = ` +exports[`without-quotes.html - html-verify 1`] = `

String

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

String

diff --git a/tests/html_attributes/jsfmt.spec.js b/tests/html_attributes/jsfmt.spec.js index 3ebd9a9e..53763df9 100644 --- a/tests/html_attributes/jsfmt.spec.js +++ b/tests/html_attributes/jsfmt.spec.js @@ -1 +1 @@ -run_spec(__dirname, ["parse5"]); +run_spec(__dirname, ["html"]); diff --git a/tests/html_aurelia/__snapshots__/jsfmt.spec.js.snap b/tests/html_aurelia/__snapshots__/jsfmt.spec.js.snap index c54c58dc..fcd09ebc 100644 --- a/tests/html_aurelia/__snapshots__/jsfmt.spec.js.snap +++ b/tests/html_aurelia/__snapshots__/jsfmt.spec.js.snap @@ -1,13 +1,12 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`basic.html - parse5-verify 1`] = ` +exports[`basic.html - html-verify 1`] = ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `; diff --git a/tests/html_aurelia/jsfmt.spec.js b/tests/html_aurelia/jsfmt.spec.js index 3ebd9a9e..53763df9 100644 --- a/tests/html_aurelia/jsfmt.spec.js +++ b/tests/html_aurelia/jsfmt.spec.js @@ -1 +1 @@ -run_spec(__dirname, ["parse5"]); +run_spec(__dirname, ["html"]); diff --git a/tests/html_basics/__snapshots__/jsfmt.spec.js.snap b/tests/html_basics/__snapshots__/jsfmt.spec.js.snap index 31cd3eed..cb98eaec 100644 --- a/tests/html_basics/__snapshots__/jsfmt.spec.js.snap +++ b/tests/html_basics/__snapshots__/jsfmt.spec.js.snap @@ -1,18 +1,18 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`comment.html - parse5-verify 1`] = ` +exports[`comment.html - html-verify 1`] = ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `; -exports[`empty.html - parse5-verify 1`] = ` +exports[`empty.html - html-verify 1`] = ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `; -exports[`empty-doc.html - parse5-verify 1`] = ` +exports[`empty-doc.html - html-verify 1`] = ` @@ -27,7 +27,7 @@ exports[`empty-doc.html - parse5-verify 1`] = ` `; -exports[`form.html - parse5-verify 1`] = ` +exports[`form.html - html-verify 1`] = `
@@ -168,7 +168,7 @@ exports[`form.html - parse5-verify 1`] = ` value="option1" checked /> - Option one is this and that—be sure to include why it's great + Option one is this and that—be sure to include why it's great
@@ -210,7 +210,7 @@ exports[`form.html - parse5-verify 1`] = ` `; -exports[`hello-world.html - parse5-verify 1`] = ` +exports[`hello-world.html - html-verify 1`] = ` @@ -229,7 +229,7 @@ exports[`hello-world.html - parse5-verify 1`] = ` - + Document @@ -242,7 +242,7 @@ exports[`hello-world.html - parse5-verify 1`] = ` `; -exports[`html-comments.html - parse5-verify 1`] = ` +exports[`html-comments.html - html-verify 1`] = ` @@ -253,7 +253,6 @@ exports[`html-comments.html - parse5-verify 1`] = ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Anchor @@ -262,7 +261,7 @@ exports[`html-comments.html - parse5-verify 1`] = ` `; -exports[`html-fragment.html - parse5-verify 1`] = ` +exports[`html-fragment.html - html-verify 1`] = ` Link