correct unicode string lengths with "unicode" option

master
Evgeny Poberezkin 2015-06-04 23:08:45 +01:00
parent 62e0d23a00
commit b9b9affcf4
9 changed files with 168 additions and 147 deletions

View File

@ -38,8 +38,8 @@ ajv compiles schemas to functions and caches them in both cases (using stringifi
## Options ## Options
- _allErrors_: if true, jv will continue validating all rules collecting all errors (false by default) - _allErrors_: if true, ajv checks all rules collecting all errors, otherwise it will return after the first error (false by default)
- _verbose_: include the reference to the validated data in the errors (false by default) - _verbose_: include the reference to the validated data in the errors (false by default)
- _format_: if false, the formats won't be validated (true by default) - _format_: if false, the formats won't be validated (true by default)
- _uniqueItems_: if false, `uniqueItems` keyword will be ignored (true by default) - _uniqueItems_: if false, `uniqueItems` keyword will be ignored (true by default, i.e. uniqueItems is checked)
- _unicode_: if true, the lengths of strings with unicode pairs will be correct (false by default) - not implemented yet. - _unicode_: if true, the lengths of strings with unicode pairs will be correct and each pair will be counted as one character (false by default, as it is slower).

View File

@ -1,14 +1,14 @@
'use strict'; 'use strict';
module.exports = { module.exports = {
date: date, date: date,
'date-time': date_time, 'date-time': date_time,
uri: uri, uri: uri,
email: /^[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&''*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$/, email: /^[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&''*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$/,
hostname: hostname, hostname: hostname,
ipv4: /^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$/, ipv4: /^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$/,
ipv6: /^\s*(?:(?:(?:[0-9a-f]{1,4}:){7}(?:[0-9a-f]{1,4}|:))|(?:(?:[0-9a-f]{1,4}:){6}(?::[0-9a-f]{1,4}|(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9a-f]{1,4}:){5}(?:(?:(?::[0-9a-f]{1,4}){1,2})|:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9a-f]{1,4}:){4}(?:(?:(?::[0-9a-f]{1,4}){1,3})|(?:(?::[0-9a-f]{1,4})?:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){3}(?:(?:(?::[0-9a-f]{1,4}){1,4})|(?:(?::[0-9a-f]{1,4}){0,2}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){2}(?:(?:(?::[0-9a-f]{1,4}){1,5})|(?:(?::[0-9a-f]{1,4}){0,3}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){1}(?:(?:(?::[0-9a-f]{1,4}){1,6})|(?:(?::[0-9a-f]{1,4}){0,4}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?::(?:(?:(?::[0-9a-f]{1,4}){1,7})|(?:(?::[0-9a-f]{1,4}){0,5}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(?:%.+)?\s*$/i, ipv6: /^\s*(?:(?:(?:[0-9a-f]{1,4}:){7}(?:[0-9a-f]{1,4}|:))|(?:(?:[0-9a-f]{1,4}:){6}(?::[0-9a-f]{1,4}|(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9a-f]{1,4}:){5}(?:(?:(?::[0-9a-f]{1,4}){1,2})|:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9a-f]{1,4}:){4}(?:(?:(?::[0-9a-f]{1,4}){1,3})|(?:(?::[0-9a-f]{1,4})?:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){3}(?:(?:(?::[0-9a-f]{1,4}){1,4})|(?:(?::[0-9a-f]{1,4}){0,2}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){2}(?:(?:(?::[0-9a-f]{1,4}){1,5})|(?:(?::[0-9a-f]{1,4}){0,3}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9a-f]{1,4}:){1}(?:(?:(?::[0-9a-f]{1,4}){1,6})|(?:(?::[0-9a-f]{1,4}){0,4}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?::(?:(?:(?::[0-9a-f]{1,4}){1,7})|(?:(?::[0-9a-f]{1,4}){0,5}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(?:%.+)?\s*$/i,
regex: regex regex: regex
}; };
var DATE = /^\d\d\d\d-(\d\d)-(\d\d)$/; var DATE = /^\d\d\d\d-(\d\d)-(\d\d)$/;
@ -19,48 +19,48 @@ var URI = /^(?:[a-z][a-z0-9+\-.]*:)?(?:\/\/(?:(?:[a-z0-9\-._~!$&'()*+,;=:]|%[0-9
function date(str) { function date(str) {
// full-date from http://tools.ietf.org/html/rfc3339#section-5.6 // full-date from http://tools.ietf.org/html/rfc3339#section-5.6
var matches = str.match(DATE); var matches = str.match(DATE);
if (!matches) return false; if (!matches) return false;
var month = +matches[1]; var month = +matches[1];
var day = +matches[2]; var day = +matches[2];
return month >= 1 && month <= 12 && day >= 1 && day <= DAYS[month]; return month >= 1 && month <= 12 && day >= 1 && day <= DAYS[month];
} }
function date_time(str) { function date_time(str) {
// http://tools.ietf.org/html/rfc3339#section-5.6 // http://tools.ietf.org/html/rfc3339#section-5.6
var dateTime = str.toLowerCase().split('t'); var dateTime = str.toLowerCase().split('t');
if (!date(dateTime[0])) return false; if (!date(dateTime[0])) return false;
var matches = dateTime[1].match(TIME); var matches = dateTime[1].match(TIME);
if (!matches) return false; if (!matches) return false;
var hour = matches[1]; var hour = matches[1];
var minute = matches[2]; var minute = matches[2];
var second = matches[3]; var second = matches[3];
return hour <= 23 && minute <= 59 && second <= 59; return hour <= 23 && minute <= 59 && second <= 59;
} }
function hostname(str) { function hostname(str) {
// http://tools.ietf.org/html/rfc1034#section-3.5 // http://tools.ietf.org/html/rfc1034#section-3.5
return str.length <= 255 && HOSTNAME.test(str); return str.length <= 255 && HOSTNAME.test(str);
} }
function uri(str) { function uri(str) {
// http://jmrware.com/articles/2009/uri_regexp/URI_regex.html + optional protocol + required "." // http://jmrware.com/articles/2009/uri_regexp/URI_regex.html + optional protocol + required "."
return str.indexOf('.') >= 0 && URI.test(str); return str.indexOf('.') >= 0 && URI.test(str);
} }
function regex(str) { function regex(str) {
try { try {
new RegExp(str); new RegExp(str);
return true; return true;
} catch(e) { } catch(e) {
return false; return false;
} }
} }

View File

@ -1,54 +1,54 @@
'use strict'; 'use strict';
var doT = require('dot') var doT = require('dot')
, fs = require('fs') , fs = require('fs')
, stableStringify = require('json-stable-stringify') , stableStringify = require('json-stable-stringify')
, formats = require('./formats') , formats = require('./formats')
, resolve = require('./resolve') , resolve = require('./resolve')
, equal = require('./equal'); , equal = require('./equal');
var RULES = require('./rules') var RULES = require('./rules')
, validateTemplate = fs.readFileSync(__dirname + '/validate.dot.js') , validateTemplate = fs.readFileSync(__dirname + '/validate.dot.js')
, validateGenerator = doT.compile(validateTemplate); , validateGenerator = doT.compile(validateTemplate);
module.exports = compile; module.exports = compile;
function compile(schema) { function compile(schema) {
var self = this; var self = this;
var validateCode = validateGenerator({ var validateCode = validateGenerator({
isRoot: true, isRoot: true,
schema: schema, schema: schema,
schemaPath: '', schemaPath: '',
RULES: RULES, RULES: RULES,
validate: validateGenerator, validate: validateGenerator,
copy: copy, copy: copy,
toHash: toHash, toHash: toHash,
resolveRef: resolveRef, resolveRef: resolveRef,
checkDataType: checkDataType, checkDataType: checkDataType,
checkDataTypes: checkDataTypes, checkDataTypes: checkDataTypes,
escapeQuotes: escapeQuotes, escapeQuotes: escapeQuotes,
stableStringify: stableStringify, stableStringify: stableStringify,
opts: this.opts opts: this.opts
}); });
// console.log('\n\n\n *** \n', validateCode); // console.log('\n\n\n *** \n', validateCode);
var validate; var validate;
eval('validate = ' + validateCode); eval('validate = ' + validateCode);
validate.schema = schema; validate.schema = schema;
validate.errors = []; validate.errors = [];
return validate; return validate;
function resolveRef(ref) { function resolveRef(ref) {
return resolve.call(self, compile, schema, ref); return resolve.call(self, compile, schema, ref);
} }
function validateRef(ref, data) { function validateRef(ref, data) {
var v = ref == '#' ? validate : self._schemas[ref]; var v = ref == '#' ? validate : self._schemas[ref];
var valid = v(data); var valid = v(data);
return { valid: valid, errors: v.errors }; return { valid: valid, errors: v.errors };
} }
} }
@ -58,58 +58,78 @@ function compile(schema) {
function copy(o, to) { function copy(o, to) {
to = to || {}; to = to || {};
for (var key in o) to[key] = o[key]; for (var key in o) to[key] = o[key];
return to; return to;
} }
function checkDataType(dataType, lvl) { function checkDataType(dataType, lvl) {
var data = 'data' + lvl; var data = 'data' + lvl;
switch (dataType) { switch (dataType) {
case 'null': return data + ' === null'; case 'null': return data + ' === null';
case 'array': return 'Array.isArray(' + data + ')'; case 'array': return 'Array.isArray(' + data + ')';
case 'object': return '(' + data + ' && typeof ' + data + ' == "object" && !Array.isArray(' + data + '))'; case 'object': return '(' + data + ' && typeof ' + data + ' == "object" && !Array.isArray(' + data + '))';
case 'integer': return '(typeof ' + data + ' == "number" && !(' + data + ' % 1))' case 'integer': return '(typeof ' + data + ' == "number" && !(' + data + ' % 1))'
default: return 'typeof ' + data + ' == "' + dataType + '"'; default: return 'typeof ' + data + ' == "' + dataType + '"';
} }
} }
function checkDataTypes(dataTypes, lvl) { function checkDataTypes(dataTypes, lvl) {
var data = 'data' + lvl; var data = 'data' + lvl;
switch (dataTypes.length) { switch (dataTypes.length) {
case 0: return 'true'; case 0: return 'true';
case 1: return checkDataType(dataTypes[0], lvl); case 1: return checkDataType(dataTypes[0], lvl);
default: default:
var code = '' var code = ''
var types = toHash(dataTypes); var types = toHash(dataTypes);
if (types.array && types.object) { if (types.array && types.object) {
code = types.null ? '(': '(' + data + ' && ' code = types.null ? '(': '(' + data + ' && '
code += 'typeof ' + data + ' == "object")'; code += 'typeof ' + data + ' == "object")';
delete types.null; delete types.null;
delete types.array; delete types.array;
delete types.object; delete types.object;
} }
if (types.number) delete types.integer; if (types.number) delete types.integer;
for (var t in types) for (var t in types)
code += (code ? '||' : '' ) + checkDataType(t, lvl); code += (code ? '||' : '' ) + checkDataType(t, lvl);
return code; return code;
}
}
// https://mathiasbynens.be/notes/javascript-encoding
// https://github.com/bestiejs/punycode.js - punycode.ucs2.decode
function ucs2length(str) {
var length = 0
, len = str.length
, pos = 0
, value;
while (pos < len) {
length++;
value = str.charCodeAt(pos++);
if (value >= 0xD800 && value <= 0xDBFF && pos < len) {
// high surrogate, and there is a next character
value = str.charCodeAt(pos);
if ((value & 0xFC00) == 0xDC00) pos++; // low surrogate
} }
}
return length;
} }
function toHash(arr, func) { function toHash(arr, func) {
var hash = {}; var hash = {};
arr.forEach(function (item) { arr.forEach(function (item) {
if (func) item = func(item); if (func) item = func(item);
hash[item] = true; hash[item] = true;
}); });
return hash; return hash;
} }
function escapeQuotes(str) { function escapeQuotes(str) {
return str.replace(/"/g, '\\"'); return str.replace(/"/g, '\\"');
} }

View File

@ -2,30 +2,30 @@
module.exports = function resolve(compile, rootSchema, ref) { module.exports = function resolve(compile, rootSchema, ref) {
var schema = rootSchema; var schema = rootSchema;
if (ref[0] != '#') if (ref[0] != '#')
schema = undefined; schema = undefined;
else if (ref != '#') { else if (ref != '#') {
if (this._schemas[ref]) if (this._schemas[ref])
schema = this._schemas[ref]; schema = this._schemas[ref];
else { else {
var parts = ref.split('/'); var parts = ref.split('/');
for (var i = 1; i < parts.length; i++) { for (var i = 1; i < parts.length; i++) {
if (!schema) break; if (!schema) break;
var part = unescape(parts[i]); var part = unescape(parts[i]);
schema = schema[part]; schema = schema[part];
if (schema.$ref) if (schema.$ref)
schema = resolve.call(this, compile, rootSchema, schema.$ref); schema = resolve.call(this, compile, rootSchema, schema.$ref);
} }
if (schema) this._schemas[ref] = compile.call(this, schema); if (schema) this._schemas[ref] = compile.call(this, schema);
}
} }
return schema; }
return schema;
}; };
function unescape(str) { function unescape(str) {
return decodeURIComponent(str) return decodeURIComponent(str)
.replace(/~1/g, '/') .replace(/~1/g, '/')
.replace(/~0/g, '~'); .replace(/~0/g, '~');
} }

View File

@ -34,6 +34,15 @@
#}} #}}
{{## def.strLength:
{{? it.opts.unicode }}
ucs2length(data{{=$dataLvl}})
{{??}}
data{{=$dataLvl}}.length
{{?}}
#}}
{{## def.cleanUp: {{## def.cleanUp:
{{ out = out.replace(/if \(valid[0-9]*\) \{\s*\}/g, ''); }} {{ out = out.replace(/if \(valid[0-9]*\) \{\s*\}/g, ''); }}
#}} #}}

View File

@ -1,6 +1,6 @@
{{# def.definitions }} {{# def.definitions }}
{{# def.setup:'maxLength' }} {{# def.setup:'maxLength' }}
var valid{{=$lvl}} = data{{=$dataLvl}}.length <= {{=$schema}}; var valid{{=$lvl}} = {{# def.strLength }} <= {{=$schema}};
{{# def.checkError:'maxLength' }} {{# def.checkError:'maxLength' }}

View File

@ -1,6 +1,6 @@
{{# def.definitions }} {{# def.definitions }}
{{# def.setup:'minLength' }} {{# def.setup:'minLength' }}
var valid{{=$lvl}} = data{{=$dataLvl}}.length >= {{=$schema}}; var valid{{=$lvl}} = {{# def.strLength }} >= {{=$schema}};
{{# def.checkError:'minLength' }} {{# def.checkError:'minLength' }}

View File

@ -1,6 +1,6 @@
{ {
"name": "ajv", "name": "ajv",
"version": "0.2.0", "version": "0.2.1",
"description": "Another JSON schema Validator", "description": "Another JSON schema Validator",
"main": "lib/ajv.js", "main": "lib/ajv.js",
"scripts": { "scripts": {

View File

@ -6,18 +6,10 @@ var glob = require('glob')
var ONLY_RULES, SKIP_RULES; var ONLY_RULES, SKIP_RULES;
// ONLY_RULES = [ // ONLY_RULES = [
// 'type', // 'type', 'not', 'allOf', 'anyOf', 'oneOf', 'enum',
// 'not', // 'maximum', 'minimum', 'multipleOf', 'maxLength', 'minLength', 'pattern',
// 'allOf',
// 'anyOf',
// 'oneOf',
// 'enum',
// 'maximum', 'minimum',
// 'multipleOf',
// 'maxLength', 'minLength', 'pattern',
// 'properties', 'patternProperties', 'additionalProperties', // 'properties', 'patternProperties', 'additionalProperties',
// 'dependencies', // 'dependencies', 'required',
// 'required',
// 'maxProperties', 'minProperties', 'maxItems', 'minItems', // 'maxProperties', 'minProperties', 'maxItems', 'minItems',
// 'items', 'additionalItems', 'uniqueItems', // 'items', 'additionalItems', 'uniqueItems',
// 'optional/format', 'optional/bignum', // 'optional/format', 'optional/bignum',
@ -31,8 +23,8 @@ SKIP_RULES = [
var Ajv = require('../lib/ajv') var Ajv = require('../lib/ajv')
, ajv = Ajv() , ajv = Ajv({ unicode: true })
, fullAjv = Ajv({ allErrors: true, verbose: true }); , fullAjv = Ajv({ unicode: true, allErrors: true, verbose: true });
var remoteRefs = { var remoteRefs = {
'http://localhost:1234/integer.json': require('./JSON-Schema-Test-Suite/remotes/integer.json'), 'http://localhost:1234/integer.json': require('./JSON-Schema-Test-Suite/remotes/integer.json'),
@ -67,7 +59,7 @@ describe('JSON-Schema tests', function () {
var fullValidate = fullAjv.compile(testSet.schema); var fullValidate = fullAjv.compile(testSet.schema);
testSet.tests.forEach(function (test) { testSet.tests.forEach(function (test) {
// if (test.description != 'valid') return; // if (test.description != 'one supplementary Unicode code point is not long enough') return;
// console.log(testSet.schema, '\n\n***\n\n', validate.toString()); // console.log(testSet.schema, '\n\n***\n\n', validate.toString());
it(test.description, function() { it(test.description, function() {
var valid = validate(test.data); var valid = validate(test.data);