add readme

master
Vitaliy Filippov 2016-08-27 14:54:38 +03:00
parent c2c23ac90d
commit 9b0eface1e
7 changed files with 2034 additions and 679 deletions

View File

@ -1,4 +1,4 @@
{
"plugins": [ "transform-es2015-destructuring", "transform-object-rest-spread" ],
"plugins": [ "transform-es2015-destructuring", "transform-object-rest-spread", "transform-es2015-arrow-functions" ],
"retainLines": true
}

43
README.md Normal file
View File

@ -0,0 +1,43 @@
# htmLawed
This is a JS rewrite of a very good and safe htmLawed HTML sanitizer, http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/
It is safe against almost all possible XSS vectors; see test cases in htmLawed_TESTCASE.txt and rsnake_xss.txt.
## Install
`npm install htmlawed`
## Basic usage
```js
const htmlawed = require('htmlawed');
var safe = htmlawed.sanitize('<html code>', { safe: 1 });
```
## Config options
The same config as in the original PHP version is supported. See documentation here:
http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s2.2
The most interesting options are:
* `safe`: sanitize against most XSS
* `elements`: space-delimited allowed HTML elements with '+' or '-' in front.
for example, `* +style` means "allow all standard elements and <style> element".
* `keep_bad`: what to do with bad tags (6 is the default)
** 0 = remove them
** 1 = escape tags and element content (replace < > to &gt; &lt;)
** 2 = remove tags, escape element content
** 3, 4 = like 1, 2 but remove if text (#PCDATA) is invalid in parent element
** 5, 6 = like 3, 4 but leave space characters in place
* `parent`: supposed parent element that will be wrapped around content
* `tidy: -1 = compact/uglify HTML, 0 = no change (default), 1 = tify/beautify HTML
* `abs_url`: -1 = make relative, 0 = no change (default), 1 = make absolute
* `base_url`: base URL for `abs_url` to work if not 0
# License
LGPL, because it's a rewrite of the original LGPL-licensed library.
Copyright (c) 2016+ Vitaliy Filippov (vitalif ~ mail.ru)

File diff suppressed because it is too large Load Diff

1288
htmLawed.src.js Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,50 @@
{
"name": "htmlawed",
"version": "1.0.0",
"author": {
"name": "Vitaliy Filippov",
"email": "vitalif@yourcmc.ru",
"url": "http://yourcmc.ru/wiki/"
},
"name": "htmlawed",
"description": "JS port of htmLawed HTML filter library",
"main": "htmLawed.js",
"files": [
"htmLawed.js",
"htmLawed.src.js",
"htmLawed.min.js",
"test.js"
],
"repository": {
"type": "git",
"url": "https://github.com/vitalif/htmlawed"
},
"homepage": "https://github.com/vitalif/htmlawed",
"bugs": {
"url": "https://github.com/vitalif/htmlawed/issues"
},
"dependencies": {
},
"devDependencies": {
"babel-cli": "latest",
"babel-plugin-transform-es2015-destructuring": "latest",
"babel-plugin-transform-object-rest-spread": "latest",
"babel-plugin-transform-es2015-arrow-functions": "latest",
"eslint": "latest",
"eslint-plugin-no-regex-dot": "latest"
},
"scripts": {
}
"build": "eslint htmLawed.src.js && babel htmLawed.src.js > htmLawed.js",
"test": "nodejs test.js"
},
"keywords": [
"html",
"sanitizer"
],
"license": "LGPL",
"maintainers": [
{
"name": "vitalif",
"email": "vitalif@yourcmc.ru"
}
]
}

View File

@ -1,5 +0,0 @@
#!/bin/sh
# php -r 'require "htmLawed.php"; print htmLawed::sanitize(file_get_contents("test_xss.txt"), array("safe" => 1));' > test_php.htm
node_modules/.bin/eslint htmLawed.js
node_modules/.bin/babel htmLawed.js > htmLawed.c.js
nodejs htmLawed-test.js

View File

@ -1,5 +1,5 @@
const fs = require('fs');
const htmLawed = require('./htmLawed.c.js');
const htmLawed = require('./htmLawed.js');
var out1 = htmLawed.sanitize(fs.readFileSync('htmLawed_TESTCASE.txt', { encoding: 'utf8' }), { safe: 1, keep_bad: 1 });
var check1 = fs.readFileSync('htmLawed_TESTCASE_out.htm', { encoding: 'utf8' });
@ -23,4 +23,7 @@ while ((m = /^(\d+)\.\s*([^\n]+)\n\nInput code »\n([\s\S]*?)\n\nOutput code »\
tests = tests.substr(m[0].length);
}
htmLawed.sanitize('<body><style>a { }</style> <img style="abc: 1">zhopa</img> <p>Hello &nbsp; world!</p></body>', { safe: 1, elements: '* +style', style_pass: true });
var src = '<body><style>a { }</style> <img style="abc: 1">zhopa</img> <p>Hello &nbsp; world!</p></body>';
var res = '<style>a { }</style> <img style="abc: 1" src="src" alt="image" />zhopa <p>Hello &nbsp; world!</p>';
var ok = htmLawed.sanitize(src, { safe: 1, elements: '* +style', style_pass: true });
console.log("[STYLE_PASS] "+(ok ? "OK" : "NOT OK"));