const htmlawed = require('htmlawed'); const css = require('css'); function sanitizeHtml(html) { if (!html) return ''; else html = html+''; let styles = ''; // GitHub tends to insert some metadata script. Cut off them here, // because htmLawed has a global policy for bad tags and we use "leave content in place". html = html.replace(/]*>([\s\S]*)<\/script\s*>/ig, ''); html = html.replace(/]*>([\s\S]*?)(<\/style\s*>|(?=]*>))/ig, function(m, m1) { styles += m1+'\n'; return ''; }); html = html.replace(/^[\s\S]*?]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1'); html = html.replace(/^[\s\S]*?]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1'); if (styles) { html = '\n'+html; styles = ''; } html = htmlawed.sanitize(html||'', { safe: 1, elements: '* +style', keep_bad: 6, comment: 1 }); html = html.replace(/]+)>/ig, (m, m1) => ''); html = html.replace(/]*>([\s\S]*)<\/style\s*>/ig, function(m, m1) { let ast = css.parse(m1, { silent: true }); rewriteCss(ast); return ''; }); return html; } function rewriteCss(ast) { var rules = ast.rules || ast.stylesheet && ast.stylesheet.rules; if (ast.stylesheet && ast.stylesheet.parsingErrors) { delete ast.stylesheet.parsingErrors; } if (rules) { for (var i = 0; i < rules.length; i++) { if (rules[i].type == 'document') { // prune @document instructions (may spy on current URL) rules.splice(i--, 1); } else if (rules[i].type == 'rule' && (!rules[i].selectors || !rules[i].declarations)) rules.splice(i--, 1); else rewriteCss(rules[i]); } } else if (ast.type == 'rule') { for (var i = 0; i < ast.selectors.length; i++) { // FIXME: Do not hardcode css selector for frontend here // This will require generating unique substitution string, // so we may also generate 'blocked images' stubs when we do it. ast.selectors[i] = '.message-view .text '+ast.selectors[i]; } } } module.exports = sanitizeHtml;