From 5c82426d94ca78abb025d0ffbaed37cfc990504e Mon Sep 17 00:00:00 2001 From: Pale Moon Date: Fri, 9 Sep 2016 11:22:26 +0200 Subject: [PATCH] Add ES6 extended (braced) unicode literals. This adds support for the form \u{xxxxxx} --- js/src/frontend/TokenStream.cpp | 49 +++++++++++++++ js/src/frontend/TokenStream.h | 1 + js/src/tests/ecma_6/String/unicode-braced.js | 64 ++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 js/src/tests/ecma_6/String/unicode-braced.js diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index bee3486430..28708b2922 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -1656,6 +1656,37 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) return false; } +bool +TokenStream::getBracedUnicode(uint32_t* cp) +{ + consumeKnownChar('{'); + + bool first = true; + int32_t c; + uint32_t code = 0; + while (true) { + c = getCharIgnoreEOL(); + if (c == EOF) + return false; + if (c == '}') { + if (first) + return false; + break; + } + + if (!JS7_ISHEX(c)) + return false; + + code = (code << 4) | JS7_UNHEX(c); + if (code > 0x10FFFF) + return false; + first = false; + } + + *cp = code; + return true; +} + bool TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) { @@ -1693,6 +1724,24 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) // Unicode character specification. case 'u': { + if (peekChar() == '{') { + uint32_t code; + if (!getBracedUnicode(&code)) { + reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); + return false; + } + + MOZ_ASSERT(code <= 0x10FFFF); + if (code < 0x10000) { + c = code; + } else { + if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800)) + return false; + c = ((code - 0x10000) % 1024) + 0xDC00; + } + break; + } + char16_t cp[4]; if (peekChars(4, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 7d5b83ecd0..b291a9ed1c 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -771,6 +771,7 @@ class MOZ_STACK_CLASS TokenStream bool getTokenInternal(TokenKind* ttp, Modifier modifier); + bool getBracedUnicode(uint32_t* code); bool getStringOrTemplateToken(int untilChar, Token** tp); int32_t getChar(); diff --git a/js/src/tests/ecma_6/String/unicode-braced.js b/js/src/tests/ecma_6/String/unicode-braced.js new file mode 100644 index 0000000000..62d96b8441 --- /dev/null +++ b/js/src/tests/ecma_6/String/unicode-braced.js @@ -0,0 +1,64 @@ +var BUGNUMBER = 320500; +var summary = 'Add \\u{xxxxxx} string literals'; + +print(BUGNUMBER + ": " + summary); + +assertEq("\u{0}", String.fromCodePoint(0x0)); +assertEq("\u{1}", String.fromCodePoint(0x1)); +assertEq("\u{10}", String.fromCodePoint(0x10)); +assertEq("\u{100}", String.fromCodePoint(0x100)); +assertEq("\u{1000}", String.fromCodePoint(0x1000)); +assertEq("\u{D7FF}", String.fromCodePoint(0xD7FF)); +assertEq("\u{D800}", String.fromCodePoint(0xD800)); +assertEq("\u{DBFF}", String.fromCodePoint(0xDBFF)); +assertEq("\u{DC00}", String.fromCodePoint(0xDC00)); +assertEq("\u{DFFF}", String.fromCodePoint(0xDFFF)); +assertEq("\u{E000}", String.fromCodePoint(0xE000)); +assertEq("\u{10000}", String.fromCodePoint(0x10000)); +assertEq("\u{100000}", String.fromCodePoint(0x100000)); +assertEq("\u{10FFFF}", String.fromCodePoint(0x10FFFF)); +assertEq("\u{10ffff}", String.fromCodePoint(0x10FFFF)); + +assertEq("A\u{1}\u{10}B\u{100}\u{1000}\u{10000}C\u{100000}", + "A" + + String.fromCodePoint(0x1) + + String.fromCodePoint(0x10) + + "B" + + String.fromCodePoint(0x100) + + String.fromCodePoint(0x1000) + + String.fromCodePoint(0x10000) + + "C" + + String.fromCodePoint(0x100000)); + +assertEq('\u{10ffff}', String.fromCodePoint(0x10FFFF)); +assertEq(`\u{10ffff}`, String.fromCodePoint(0x10FFFF)); +assertEq(`\u{10ffff}${""}`, String.fromCodePoint(0x10FFFF)); +assertEq(`${""}\u{10ffff}`, String.fromCodePoint(0x10FFFF)); +assertEq(`${""}\u{10ffff}${""}`, String.fromCodePoint(0x10FFFF)); + +assertEq("\u{00}", String.fromCodePoint(0x0)); +assertEq("\u{00000000000000000}", String.fromCodePoint(0x0)); +assertEq("\u{00000000000001000}", String.fromCodePoint(0x1000)); + +assertEq(eval(`"\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}"`), String.fromCodePoint(0x1234)); + +assertEq("\U{0}", "U{0}"); + +assertThrowsInstanceOf(() => eval(`"\\u{-1}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{0.0}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{G}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{{"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{110000}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{00110000}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{100000000000000000000000000000}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{ FFFF}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{FFFF }"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{FF FF}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{F F F F}"`), SyntaxError); +assertThrowsInstanceOf(() => eval(`"\\u{100000001}"`), SyntaxError); + +if (typeof reportCompare === "function") + reportCompare(true, true);