Compare commits

...

1 Commits

Author SHA1 Message Date
Rahul Padigela 91de27804a improvement: test commit changes 2020-04-15 10:03:25 -07:00
2 changed files with 21 additions and 1 deletions

View File

@ -36,7 +36,7 @@ function awsURIencode(input, encodeSlash, noEncodeStar) {
const encSlash = encodeSlash === undefined ? true : encodeSlash;
let encoded = '';
for (let i = 0; i < input.length; i++) {
const ch = input.charAt(i);
let ch = input.charAt(i);
if ((ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z') ||
(ch >= '0' && ch <= '9') ||
@ -50,6 +50,19 @@ function awsURIencode(input, encodeSlash, noEncodeStar) {
} else if (ch === '*') {
encoded = encoded.concat(noEncodeStar ? '*' : '%2A');
} else {
if (ch >= '\uD800' && ch <= '\uDBFF') {
// If this character is a high surrogate peek the next character
// and join it with this one if the next character is a low surrogate.
// Otherwise the encoded URI will contain the two surrogates as two distinct UTF-8
// sequences which is not valid UTF-8.
if (i + 1 < input.length) {
const ch2 = input.charAt(i+1);
if (ch2 >= '\uDC00' && ch2 <= '\uDFFF') {
i++;
ch += ch2;
}
}
}
encoded = encoded.concat(_toHexUTF8(ch));
}
}

View File

@ -53,4 +53,11 @@ describe('should URIencode in accordance with AWS rules', () => {
const actualOutput = awsURIencode(input);
assert.strictEqual(actualOutput, expectedOutput);
});
it('should encode codepoints that use surrogate pairs in UTF-16 as a single UTF-8 sequence', () => {
const input = '/s3amazonaws.com/I-like-🌮s';
const expectedOutput = '%2Fs3amazonaws.com%2FI-like-%F0%9F%8C%AEs';
const actualOutput = awsURIencode(input);
assert.strictEqual(actualOutput, expectedOutput);
});
});