lambdaworld-archive/node_modules/shift-parser/src/tokenizer.js

1514 lines
46 KiB
JavaScript

/**
* Copyright 2014 Shape Security, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const { getHexValue, isLineTerminator, isWhiteSpace, isIdentifierStart, isIdentifierPart, isDecimalDigit } = require('./utils');
const { ErrorMessages } = require('./errors');
const TokenClass = {
Eof: { name: '<End>' },
Ident: { name: 'Identifier', isIdentifierName: true },
Keyword: { name: 'Keyword', isIdentifierName: true },
NumericLiteral: { name: 'Numeric' },
TemplateElement: { name: 'Template' },
Punctuator: { name: 'Punctuator' },
StringLiteral: { name: 'String' },
RegularExpression: { name: 'RegularExpression' },
Illegal: { name: 'Illegal' },
};
const TokenType = {
EOS: { klass: TokenClass.Eof, name: 'EOS' },
LPAREN: { klass: TokenClass.Punctuator, name: '(' },
RPAREN: { klass: TokenClass.Punctuator, name: ')' },
LBRACK: { klass: TokenClass.Punctuator, name: '[' },
RBRACK: { klass: TokenClass.Punctuator, name: ']' },
LBRACE: { klass: TokenClass.Punctuator, name: '{' },
RBRACE: { klass: TokenClass.Punctuator, name: '}' },
COLON: { klass: TokenClass.Punctuator, name: ':' },
SEMICOLON: { klass: TokenClass.Punctuator, name: ';' },
PERIOD: { klass: TokenClass.Punctuator, name: '.' },
ELLIPSIS: { klass: TokenClass.Punctuator, name: '...' },
ARROW: { klass: TokenClass.Punctuator, name: '=>' },
CONDITIONAL: { klass: TokenClass.Punctuator, name: '?' },
INC: { klass: TokenClass.Punctuator, name: '++' },
DEC: { klass: TokenClass.Punctuator, name: '--' },
ASSIGN: { klass: TokenClass.Punctuator, name: '=' },
ASSIGN_BIT_OR: { klass: TokenClass.Punctuator, name: '|=' },
ASSIGN_BIT_XOR: { klass: TokenClass.Punctuator, name: '^=' },
ASSIGN_BIT_AND: { klass: TokenClass.Punctuator, name: '&=' },
ASSIGN_SHL: { klass: TokenClass.Punctuator, name: '<<=' },
ASSIGN_SHR: { klass: TokenClass.Punctuator, name: '>>=' },
ASSIGN_SHR_UNSIGNED: { klass: TokenClass.Punctuator, name: '>>>=' },
ASSIGN_ADD: { klass: TokenClass.Punctuator, name: '+=' },
ASSIGN_SUB: { klass: TokenClass.Punctuator, name: '-=' },
ASSIGN_MUL: { klass: TokenClass.Punctuator, name: '*=' },
ASSIGN_DIV: { klass: TokenClass.Punctuator, name: '/=' },
ASSIGN_MOD: { klass: TokenClass.Punctuator, name: '%=' },
ASSIGN_EXP: { klass: TokenClass.Punctuator, name: '**=' },
COMMA: { klass: TokenClass.Punctuator, name: ',' },
OR: { klass: TokenClass.Punctuator, name: '||' },
AND: { klass: TokenClass.Punctuator, name: '&&' },
BIT_OR: { klass: TokenClass.Punctuator, name: '|' },
BIT_XOR: { klass: TokenClass.Punctuator, name: '^' },
BIT_AND: { klass: TokenClass.Punctuator, name: '&' },
SHL: { klass: TokenClass.Punctuator, name: '<<' },
SHR: { klass: TokenClass.Punctuator, name: '>>' },
SHR_UNSIGNED: { klass: TokenClass.Punctuator, name: '>>>' },
ADD: { klass: TokenClass.Punctuator, name: '+' },
SUB: { klass: TokenClass.Punctuator, name: '-' },
MUL: { klass: TokenClass.Punctuator, name: '*' },
DIV: { klass: TokenClass.Punctuator, name: '/' },
MOD: { klass: TokenClass.Punctuator, name: '%' },
EXP: { klass: TokenClass.Punctuator, name: '**' },
EQ: { klass: TokenClass.Punctuator, name: '==' },
NE: { klass: TokenClass.Punctuator, name: '!=' },
EQ_STRICT: { klass: TokenClass.Punctuator, name: '===' },
NE_STRICT: { klass: TokenClass.Punctuator, name: '!==' },
LT: { klass: TokenClass.Punctuator, name: '<' },
GT: { klass: TokenClass.Punctuator, name: '>' },
LTE: { klass: TokenClass.Punctuator, name: '<=' },
GTE: { klass: TokenClass.Punctuator, name: '>=' },
INSTANCEOF: { klass: TokenClass.Keyword, name: 'instanceof' },
IN: { klass: TokenClass.Keyword, name: 'in' },
NOT: { klass: TokenClass.Punctuator, name: '!' },
BIT_NOT: { klass: TokenClass.Punctuator, name: '~' },
ASYNC: { klass: TokenClass.Keyword, name: 'async' },
AWAIT: { klass: TokenClass.Keyword, name: 'await' },
ENUM: { klass: TokenClass.Keyword, name: 'enum' },
DELETE: { klass: TokenClass.Keyword, name: 'delete' },
TYPEOF: { klass: TokenClass.Keyword, name: 'typeof' },
VOID: { klass: TokenClass.Keyword, name: 'void' },
BREAK: { klass: TokenClass.Keyword, name: 'break' },
CASE: { klass: TokenClass.Keyword, name: 'case' },
CATCH: { klass: TokenClass.Keyword, name: 'catch' },
CLASS: { klass: TokenClass.Keyword, name: 'class' },
CONTINUE: { klass: TokenClass.Keyword, name: 'continue' },
DEBUGGER: { klass: TokenClass.Keyword, name: 'debugger' },
DEFAULT: { klass: TokenClass.Keyword, name: 'default' },
DO: { klass: TokenClass.Keyword, name: 'do' },
ELSE: { klass: TokenClass.Keyword, name: 'else' },
EXPORT: { klass: TokenClass.Keyword, name: 'export' },
EXTENDS: { klass: TokenClass.Keyword, name: 'extends' },
FINALLY: { klass: TokenClass.Keyword, name: 'finally' },
FOR: { klass: TokenClass.Keyword, name: 'for' },
FUNCTION: { klass: TokenClass.Keyword, name: 'function' },
IF: { klass: TokenClass.Keyword, name: 'if' },
IMPORT: { klass: TokenClass.Keyword, name: 'import' },
LET: { klass: TokenClass.Keyword, name: 'let' },
NEW: { klass: TokenClass.Keyword, name: 'new' },
RETURN: { klass: TokenClass.Keyword, name: 'return' },
SUPER: { klass: TokenClass.Keyword, name: 'super' },
SWITCH: { klass: TokenClass.Keyword, name: 'switch' },
THIS: { klass: TokenClass.Keyword, name: 'this' },
THROW: { klass: TokenClass.Keyword, name: 'throw' },
TRY: { klass: TokenClass.Keyword, name: 'try' },
VAR: { klass: TokenClass.Keyword, name: 'var' },
WHILE: { klass: TokenClass.Keyword, name: 'while' },
WITH: { klass: TokenClass.Keyword, name: 'with' },
NULL: { klass: TokenClass.Keyword, name: 'null' },
TRUE: { klass: TokenClass.Keyword, name: 'true' },
FALSE: { klass: TokenClass.Keyword, name: 'false' },
YIELD: { klass: TokenClass.Keyword, name: 'yield' },
NUMBER: { klass: TokenClass.NumericLiteral, name: '' },
STRING: { klass: TokenClass.StringLiteral, name: '' },
REGEXP: { klass: TokenClass.RegularExpression, name: '' },
IDENTIFIER: { klass: TokenClass.Ident, name: '' },
CONST: { klass: TokenClass.Keyword, name: 'const' },
TEMPLATE: { klass: TokenClass.TemplateElement, name: '' },
ESCAPED_KEYWORD: { klass: TokenClass.Keyword, name: '' },
ILLEGAL: { klass: TokenClass.Illegal, name: '' },
};
const TT = TokenType;
const I = TT.ILLEGAL;
const F = false;
const T = true;
const ONE_CHAR_PUNCTUATOR = [
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, TT.NOT, I, I, I,
TT.MOD, TT.BIT_AND, I, TT.LPAREN, TT.RPAREN, TT.MUL, TT.ADD, TT.COMMA, TT.SUB, TT.PERIOD, TT.DIV, I, I, I, I, I, I, I,
I, I, I, TT.COLON, TT.SEMICOLON, TT.LT, TT.ASSIGN, TT.GT, TT.CONDITIONAL, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, TT.LBRACK, I, TT.RBRACK, TT.BIT_XOR, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, TT.LBRACE, TT.BIT_OR, TT.RBRACE, TT.BIT_NOT,
];
const PUNCTUATOR_START = [
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, T, T,
F, T, T, T, T, T, T, F, T, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, T, F, T, T, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, T, T, T, T, F,
];
class JsError extends Error {
constructor(index, line, column, msg) {
super(msg);
this.index = index;
// Safari defines these properties as non-writable and non-configurable on Error objects
try {
this.line = line;
this.column = column;
} catch (e) {}
// define these as well so Safari still has access to this info
this.parseErrorLine = line;
this.parseErrorColumn = column;
this.description = msg;
this.message = `[${line}:${column}]: ${msg}`;
}
}
function fromCodePoint(cp) {
if (cp <= 0xFFFF) return String.fromCharCode(cp);
let cu1 = String.fromCharCode(Math.floor((cp - 0x10000) / 0x400) + 0xD800);
let cu2 = String.fromCharCode((cp - 0x10000) % 0x400 + 0xDC00);
return cu1 + cu2;
}
function decodeUtf16(lead, trail) {
return (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000;
}
class Tokenizer {
constructor(source) {
this.source = source;
this.index = 0;
this.line = 0;
this.lineStart = 0;
this.startIndex = 0;
this.startLine = 0;
this.startLineStart = 0;
this.lastIndex = 0;
this.lastLine = 0;
this.lastLineStart = 0;
this.hasLineTerminatorBeforeNext = false;
this.tokenIndex = 0;
}
saveLexerState() {
return {
source: this.source,
index: this.index,
line: this.line,
lineStart: this.lineStart,
startIndex: this.startIndex,
startLine: this.startLine,
startLineStart: this.startLineStart,
lastIndex: this.lastIndex,
lastLine: this.lastLine,
lastLineStart: this.lastLineStart,
lookahead: this.lookahead,
hasLineTerminatorBeforeNext: this.hasLineTerminatorBeforeNext,
tokenIndex: this.tokenIndex,
};
}
restoreLexerState(state) {
this.source = state.source;
this.index = state.index;
this.line = state.line;
this.lineStart = state.lineStart;
this.startIndex = state.startIndex;
this.startLine = state.startLine;
this.startLineStart = state.startLineStart;
this.lastIndex = state.lastIndex;
this.lastLine = state.lastLine;
this.lastLineStart = state.lastLineStart;
this.lookahead = state.lookahead;
this.hasLineTerminatorBeforeNext = state.hasLineTerminatorBeforeNext;
this.tokenIndex = state.tokenIndex;
}
createILLEGAL() {
this.startIndex = this.index;
this.startLine = this.line;
this.startLineStart = this.lineStart;
return this.index < this.source.length
? this.createError(ErrorMessages.UNEXPECTED_ILLEGAL_TOKEN, this.source.charAt(this.index))
: this.createError(ErrorMessages.UNEXPECTED_EOS);
}
createUnexpected(token) {
switch (token.type.klass) {
case TokenClass.Eof:
return this.createError(ErrorMessages.UNEXPECTED_EOS);
case TokenClass.Ident:
return this.createError(ErrorMessages.UNEXPECTED_IDENTIFIER);
case TokenClass.Keyword:
if (token.type === TokenType.ESCAPED_KEYWORD) {
return this.createError(ErrorMessages.UNEXPECTED_ESCAPED_KEYWORD);
}
return this.createError(ErrorMessages.UNEXPECTED_TOKEN, token.slice.text);
case TokenClass.NumericLiteral:
return this.createError(ErrorMessages.UNEXPECTED_NUMBER);
case TokenClass.TemplateElement:
return this.createError(ErrorMessages.UNEXPECTED_TEMPLATE);
case TokenClass.Punctuator:
return this.createError(ErrorMessages.UNEXPECTED_TOKEN, token.type.name);
case TokenClass.StringLiteral:
return this.createError(ErrorMessages.UNEXPECTED_STRING);
// the other token classes are RegularExpression and Illegal, but they cannot reach here
}
// istanbul ignore next
throw new Error('Unreachable: unexpected token of class ' + token.type.klass);
}
createError(message, ...params) {
let msg;
if (typeof message === 'function') {
msg = message(...params);
} else {
msg = message;
}
return new JsError(this.startIndex, this.startLine + 1, this.startIndex - this.startLineStart + 1, msg);
}
createErrorWithLocation(location, message) {
/* istanbul ignore next */
let msg = message.replace(/\{(\d+)\}/g, (_, n) => JSON.stringify(arguments[+n + 2]));
if (location.slice && location.slice.startLocation) {
location = location.slice.startLocation;
}
return new JsError(location.offset, location.line, location.column + 1, msg);
}
static cse2(id, ch1, ch2) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2;
}
static cse3(id, ch1, ch2, ch3) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2 && id.charAt(3) === ch3;
}
static cse4(id, ch1, ch2, ch3, ch4) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2 && id.charAt(3) === ch3 && id.charAt(4) === ch4;
}
static cse5(id, ch1, ch2, ch3, ch4, ch5) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2 && id.charAt(3) === ch3 && id.charAt(4) === ch4 && id.charAt(5) === ch5;
}
static cse6(id, ch1, ch2, ch3, ch4, ch5, ch6) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2 && id.charAt(3) === ch3 && id.charAt(4) === ch4 && id.charAt(5) === ch5 && id.charAt(6) === ch6;
}
static cse7(id, ch1, ch2, ch3, ch4, ch5, ch6, ch7) {
return id.charAt(1) === ch1 && id.charAt(2) === ch2 && id.charAt(3) === ch3 && id.charAt(4) === ch4 && id.charAt(5) === ch5 && id.charAt(6) === ch6 && id.charAt(7) === ch7;
}
getKeyword(id) {
if (id.length === 1 || id.length > 10) {
return TokenType.IDENTIFIER;
}
/* istanbul ignore next */
switch (id.length) {
case 2:
switch (id.charAt(0)) {
case 'i':
switch (id.charAt(1)) {
case 'f':
return TokenType.IF;
case 'n':
return TokenType.IN;
default:
break;
}
break;
case 'd':
if (id.charAt(1) === 'o') {
return TokenType.DO;
}
break;
}
break;
case 3:
switch (id.charAt(0)) {
case 'v':
if (Tokenizer.cse2(id, 'a', 'r')) {
return TokenType.VAR;
}
break;
case 'f':
if (Tokenizer.cse2(id, 'o', 'r')) {
return TokenType.FOR;
}
break;
case 'n':
if (Tokenizer.cse2(id, 'e', 'w')) {
return TokenType.NEW;
}
break;
case 't':
if (Tokenizer.cse2(id, 'r', 'y')) {
return TokenType.TRY;
}
break;
case 'l':
if (Tokenizer.cse2(id, 'e', 't')) {
return TokenType.LET;
}
break;
}
break;
case 4:
switch (id.charAt(0)) {
case 't':
if (Tokenizer.cse3(id, 'h', 'i', 's')) {
return TokenType.THIS;
} else if (Tokenizer.cse3(id, 'r', 'u', 'e')) {
return TokenType.TRUE;
}
break;
case 'n':
if (Tokenizer.cse3(id, 'u', 'l', 'l')) {
return TokenType.NULL;
}
break;
case 'e':
if (Tokenizer.cse3(id, 'l', 's', 'e')) {
return TokenType.ELSE;
} else if (Tokenizer.cse3(id, 'n', 'u', 'm')) {
return TokenType.ENUM;
}
break;
case 'c':
if (Tokenizer.cse3(id, 'a', 's', 'e')) {
return TokenType.CASE;
}
break;
case 'v':
if (Tokenizer.cse3(id, 'o', 'i', 'd')) {
return TokenType.VOID;
}
break;
case 'w':
if (Tokenizer.cse3(id, 'i', 't', 'h')) {
return TokenType.WITH;
}
break;
}
break;
case 5:
switch (id.charAt(0)) {
case 'a':
if (Tokenizer.cse4(id, 's', 'y', 'n', 'c')) {
return TokenType.ASYNC;
}
if (Tokenizer.cse4(id, 'w', 'a', 'i', 't')) {
return TokenType.AWAIT;
}
break;
case 'w':
if (Tokenizer.cse4(id, 'h', 'i', 'l', 'e')) {
return TokenType.WHILE;
}
break;
case 'b':
if (Tokenizer.cse4(id, 'r', 'e', 'a', 'k')) {
return TokenType.BREAK;
}
break;
case 'f':
if (Tokenizer.cse4(id, 'a', 'l', 's', 'e')) {
return TokenType.FALSE;
}
break;
case 'c':
if (Tokenizer.cse4(id, 'a', 't', 'c', 'h')) {
return TokenType.CATCH;
} else if (Tokenizer.cse4(id, 'o', 'n', 's', 't')) {
return TokenType.CONST;
} else if (Tokenizer.cse4(id, 'l', 'a', 's', 's')) {
return TokenType.CLASS;
}
break;
case 't':
if (Tokenizer.cse4(id, 'h', 'r', 'o', 'w')) {
return TokenType.THROW;
}
break;
case 'y':
if (Tokenizer.cse4(id, 'i', 'e', 'l', 'd')) {
return TokenType.YIELD;
}
break;
case 's':
if (Tokenizer.cse4(id, 'u', 'p', 'e', 'r')) {
return TokenType.SUPER;
}
break;
}
break;
case 6:
switch (id.charAt(0)) {
case 'r':
if (Tokenizer.cse5(id, 'e', 't', 'u', 'r', 'n')) {
return TokenType.RETURN;
}
break;
case 't':
if (Tokenizer.cse5(id, 'y', 'p', 'e', 'o', 'f')) {
return TokenType.TYPEOF;
}
break;
case 'd':
if (Tokenizer.cse5(id, 'e', 'l', 'e', 't', 'e')) {
return TokenType.DELETE;
}
break;
case 's':
if (Tokenizer.cse5(id, 'w', 'i', 't', 'c', 'h')) {
return TokenType.SWITCH;
}
break;
case 'e':
if (Tokenizer.cse5(id, 'x', 'p', 'o', 'r', 't')) {
return TokenType.EXPORT;
}
break;
case 'i':
if (Tokenizer.cse5(id, 'm', 'p', 'o', 'r', 't')) {
return TokenType.IMPORT;
}
break;
}
break;
case 7:
switch (id.charAt(0)) {
case 'd':
if (Tokenizer.cse6(id, 'e', 'f', 'a', 'u', 'l', 't')) {
return TokenType.DEFAULT;
}
break;
case 'f':
if (Tokenizer.cse6(id, 'i', 'n', 'a', 'l', 'l', 'y')) {
return TokenType.FINALLY;
}
break;
case 'e':
if (Tokenizer.cse6(id, 'x', 't', 'e', 'n', 'd', 's')) {
return TokenType.EXTENDS;
}
break;
}
break;
case 8:
switch (id.charAt(0)) {
case 'f':
if (Tokenizer.cse7(id, 'u', 'n', 'c', 't', 'i', 'o', 'n')) {
return TokenType.FUNCTION;
}
break;
case 'c':
if (Tokenizer.cse7(id, 'o', 'n', 't', 'i', 'n', 'u', 'e')) {
return TokenType.CONTINUE;
}
break;
case 'd':
if (Tokenizer.cse7(id, 'e', 'b', 'u', 'g', 'g', 'e', 'r')) {
return TokenType.DEBUGGER;
}
break;
}
break;
case 10:
if (id === 'instanceof') {
return TokenType.INSTANCEOF;
}
break;
}
return TokenType.IDENTIFIER;
}
skipSingleLineComment(offset) {
this.index += offset;
while (this.index < this.source.length) {
/**
* @type {Number}
*/
let chCode = this.source.charCodeAt(this.index);
this.index++;
if (isLineTerminator(chCode)) {
this.hasLineTerminatorBeforeNext = true;
if (chCode === 0xD /* "\r" */ && this.source.charCodeAt(this.index) === 0xA /* "\n" */) {
this.index++;
}
this.lineStart = this.index;
this.line++;
return;
}
}
}
skipMultiLineComment() {
this.index += 2;
const length = this.source.length;
let isLineStart = false;
while (this.index < length) {
let chCode = this.source.charCodeAt(this.index);
if (chCode < 0x80) {
switch (chCode) {
case 42: // "*"
// Block comment ends with "*/".
if (this.source.charAt(this.index + 1) === '/') {
this.index = this.index + 2;
return isLineStart;
}
this.index++;
break;
case 10: // "\n"
isLineStart = true;
this.hasLineTerminatorBeforeNext = true;
this.index++;
this.lineStart = this.index;
this.line++;
break;
case 13: // "\r":
isLineStart = true;
this.hasLineTerminatorBeforeNext = true;
if (this.source.charAt(this.index + 1) === '\n') {
this.index++;
}
this.index++;
this.lineStart = this.index;
this.line++;
break;
default:
this.index++;
}
} else if (chCode === 0x2028 || chCode === 0x2029) {
isLineStart = true;
this.hasLineTerminatorBeforeNext = true;
this.index++;
this.lineStart = this.index;
this.line++;
} else {
this.index++;
}
}
throw this.createILLEGAL();
}
skipComment() {
this.hasLineTerminatorBeforeNext = false;
let isLineStart = this.index === 0;
const length = this.source.length;
while (this.index < length) {
let chCode = this.source.charCodeAt(this.index);
if (isWhiteSpace(chCode)) {
this.index++;
} else if (isLineTerminator(chCode)) {
this.hasLineTerminatorBeforeNext = true;
this.index++;
if (chCode === 13 /* "\r" */ && this.source.charAt(this.index) === '\n') {
this.index++;
}
this.lineStart = this.index;
this.line++;
isLineStart = true;
} else if (chCode === 47 /* "/" */) {
if (this.index + 1 >= length) {
break;
}
chCode = this.source.charCodeAt(this.index + 1);
if (chCode === 47 /* "/" */) {
this.skipSingleLineComment(2);
isLineStart = true;
} else if (chCode === 42 /* "*" */) {
isLineStart = this.skipMultiLineComment() || isLineStart;
} else {
break;
}
} else if (!this.moduleIsTheGoalSymbol && isLineStart && chCode === 45 /* "-" */) {
if (this.index + 2 >= length) {
break;
}
// U+003E is ">"
if (this.source.charAt(this.index + 1) === '-' && this.source.charAt(this.index + 2) === '>') {
// "-->" is a single-line comment
this.skipSingleLineComment(3);
} else {
break;
}
} else if (!this.moduleIsTheGoalSymbol && chCode === 60 /* "<" */) {
if (this.source.slice(this.index + 1, this.index + 4) === '!--') {
this.skipSingleLineComment(4);
isLineStart = true;
} else {
break;
}
} else {
break;
}
}
}
scanHexEscape2() {
if (this.index + 2 > this.source.length) {
return -1;
}
let r1 = getHexValue(this.source.charAt(this.index));
if (r1 === -1) {
return -1;
}
let r2 = getHexValue(this.source.charAt(this.index + 1));
if (r2 === -1) {
return -1;
}
this.index += 2;
return r1 << 4 | r2;
}
scanUnicode() {
if (this.source.charAt(this.index) === '{') {
// \u{HexDigits}
let i = this.index + 1;
let hexDigits = 0, ch;
while (i < this.source.length) {
ch = this.source.charAt(i);
let hex = getHexValue(ch);
if (hex === -1) {
break;
}
hexDigits = hexDigits << 4 | hex;
if (hexDigits > 0x10FFFF) {
throw this.createILLEGAL();
}
i++;
}
if (ch !== '}') {
throw this.createILLEGAL();
}
if (i === this.index + 1) {
++this.index; // This is so that the error is 'Unexpected "}"' instead of 'Unexpected "{"'.
throw this.createILLEGAL();
}
this.index = i + 1;
return hexDigits;
}
// \uHex4Digits
if (this.index + 4 > this.source.length) {
return -1;
}
let r1 = getHexValue(this.source.charAt(this.index));
if (r1 === -1) {
return -1;
}
let r2 = getHexValue(this.source.charAt(this.index + 1));
if (r2 === -1) {
return -1;
}
let r3 = getHexValue(this.source.charAt(this.index + 2));
if (r3 === -1) {
return -1;
}
let r4 = getHexValue(this.source.charAt(this.index + 3));
if (r4 === -1) {
return -1;
}
this.index += 4;
return r1 << 12 | r2 << 8 | r3 << 4 | r4;
}
getEscapedIdentifier() {
let id = '';
let check = isIdentifierStart;
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
let code = ch.charCodeAt(0);
let start = this.index;
++this.index;
if (ch === '\\') {
if (this.index >= this.source.length) {
throw this.createILLEGAL();
}
if (this.source.charAt(this.index) !== 'u') {
throw this.createILLEGAL();
}
++this.index;
code = this.scanUnicode();
if (code < 0) {
throw this.createILLEGAL();
}
ch = fromCodePoint(code);
} else if (code >= 0xD800 && code <= 0xDBFF) {
if (this.index >= this.source.length) {
throw this.createILLEGAL();
}
let lowSurrogateCode = this.source.charCodeAt(this.index);
++this.index;
if (!(lowSurrogateCode >= 0xDC00 && lowSurrogateCode <= 0xDFFF)) {
throw this.createILLEGAL();
}
code = decodeUtf16(code, lowSurrogateCode);
ch = fromCodePoint(code);
}
if (!check(code)) {
if (id.length < 1) {
throw this.createILLEGAL();
}
this.index = start;
return id;
}
check = isIdentifierPart;
id += ch;
}
return id;
}
getIdentifier() {
let start = this.index;
let l = this.source.length;
let i = this.index;
let check = isIdentifierStart;
while (i < l) {
let ch = this.source.charAt(i);
let code = ch.charCodeAt(0);
if (ch === '\\' || code >= 0xD800 && code <= 0xDBFF) {
// Go back and try the hard one.
this.index = start;
return this.getEscapedIdentifier();
}
if (!check(code)) {
this.index = i;
return this.source.slice(start, i);
}
++i;
check = isIdentifierPart;
}
this.index = i;
return this.source.slice(start, i);
}
scanIdentifier() {
let startLocation = this.getLocation();
let start = this.index;
// Backslash (U+005C) starts an escaped character.
let id = this.source.charAt(this.index) === '\\' ? this.getEscapedIdentifier() : this.getIdentifier();
let slice = this.getSlice(start, startLocation);
slice.text = id;
let hasEscape = this.index - start !== id.length;
let type = this.getKeyword(id);
if (hasEscape && type !== TokenType.IDENTIFIER) {
type = TokenType.ESCAPED_KEYWORD;
}
return { type, value: id, slice, escaped: hasEscape };
}
getLocation() {
return {
line: this.startLine + 1,
column: this.startIndex - this.startLineStart,
offset: this.startIndex,
};
}
getLastTokenEndLocation() {
return {
line: this.lastLine + 1,
column: this.lastIndex - this.lastLineStart,
offset: this.lastIndex,
};
}
getSlice(start, startLocation) {
return { text: this.source.slice(start, this.index), start, startLocation, end: this.index };
}
scanPunctuatorHelper() {
let ch1 = this.source.charAt(this.index);
switch (ch1) {
// Check for most common single-character punctuators.
case '.': {
let ch2 = this.source.charAt(this.index + 1);
if (ch2 !== '.') return TokenType.PERIOD;
let ch3 = this.source.charAt(this.index + 2);
if (ch3 !== '.') return TokenType.PERIOD;
return TokenType.ELLIPSIS;
}
case '(':
return TokenType.LPAREN;
case ')':
case ';':
case ',':
return ONE_CHAR_PUNCTUATOR[ch1.charCodeAt(0)];
case '{':
return TokenType.LBRACE;
case '}':
case '[':
case ']':
case ':':
case '?':
case '~':
return ONE_CHAR_PUNCTUATOR[ch1.charCodeAt(0)];
default:
// "=" (U+003D) marks an assignment or comparison operator.
if (this.index + 1 < this.source.length && this.source.charAt(this.index + 1) === '=') {
switch (ch1) {
case '=':
if (this.index + 2 < this.source.length && this.source.charAt(this.index + 2) === '=') {
return TokenType.EQ_STRICT;
}
return TokenType.EQ;
case '!':
if (this.index + 2 < this.source.length && this.source.charAt(this.index + 2) === '=') {
return TokenType.NE_STRICT;
}
return TokenType.NE;
case '|':
return TokenType.ASSIGN_BIT_OR;
case '+':
return TokenType.ASSIGN_ADD;
case '-':
return TokenType.ASSIGN_SUB;
case '*':
return TokenType.ASSIGN_MUL;
case '<':
return TokenType.LTE;
case '>':
return TokenType.GTE;
case '/':
return TokenType.ASSIGN_DIV;
case '%':
return TokenType.ASSIGN_MOD;
case '^':
return TokenType.ASSIGN_BIT_XOR;
case '&':
return TokenType.ASSIGN_BIT_AND;
// istanbul ignore next
default:
break; // failed
}
}
}
if (this.index + 1 < this.source.length) {
let ch2 = this.source.charAt(this.index + 1);
if (ch1 === ch2) {
if (this.index + 2 < this.source.length) {
let ch3 = this.source.charAt(this.index + 2);
if (ch1 === '>' && ch3 === '>') {
// 4-character punctuator: >>>=
if (this.index + 3 < this.source.length && this.source.charAt(this.index + 3) === '=') {
return TokenType.ASSIGN_SHR_UNSIGNED;
}
return TokenType.SHR_UNSIGNED;
}
if (ch1 === '<' && ch3 === '=') {
return TokenType.ASSIGN_SHL;
}
if (ch1 === '>' && ch3 === '=') {
return TokenType.ASSIGN_SHR;
}
if (ch1 === '*' && ch3 === '=') {
return TokenType.ASSIGN_EXP;
}
}
// Other 2-character punctuators: ++ -- << >> && ||
switch (ch1) {
case '*':
return TokenType.EXP;
case '+':
return TokenType.INC;
case '-':
return TokenType.DEC;
case '<':
return TokenType.SHL;
case '>':
return TokenType.SHR;
case '&':
return TokenType.AND;
case '|':
return TokenType.OR;
// istanbul ignore next
default:
break; // failed
}
} else if (ch1 === '=' && ch2 === '>') {
return TokenType.ARROW;
}
}
return ONE_CHAR_PUNCTUATOR[ch1.charCodeAt(0)];
}
// 7.7 Punctuators
scanPunctuator() {
let startLocation = this.getLocation();
let start = this.index;
let subType = this.scanPunctuatorHelper();
this.index += subType.name.length;
return { type: subType, value: subType.name, slice: this.getSlice(start, startLocation) };
}
scanHexLiteral(start, startLocation) {
let i = this.index;
while (i < this.source.length) {
let ch = this.source.charAt(i);
let hex = getHexValue(ch);
if (hex === -1) {
break;
}
i++;
}
if (this.index === i) {
throw this.createILLEGAL();
}
if (i < this.source.length && isIdentifierStart(this.source.charCodeAt(i))) {
throw this.createILLEGAL();
}
this.index = i;
let slice = this.getSlice(start, startLocation);
return { type: TokenType.NUMBER, value: parseInt(slice.text.substr(2), 16), slice };
}
scanBinaryLiteral(start, startLocation) {
let offset = this.index - start;
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch !== '0' && ch !== '1') {
break;
}
this.index++;
}
if (this.index - start <= offset) {
throw this.createILLEGAL();
}
if (this.index < this.source.length && (isIdentifierStart(this.source.charCodeAt(this.index))
|| isDecimalDigit(this.source.charCodeAt(this.index)))) {
throw this.createILLEGAL();
}
return {
type: TokenType.NUMBER,
value: parseInt(this.getSlice(start, startLocation).text.substr(offset), 2),
slice: this.getSlice(start, startLocation),
octal: false,
noctal: false,
};
}
scanOctalLiteral(start, startLocation) {
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch >= '0' && ch <= '7') {
this.index++;
} else if (isIdentifierPart(ch.charCodeAt(0))) {
throw this.createILLEGAL();
} else {
break;
}
}
if (this.index - start === 2) {
throw this.createILLEGAL();
}
return {
type: TokenType.NUMBER,
value: parseInt(this.getSlice(start, startLocation).text.substr(2), 8),
slice: this.getSlice(start, startLocation),
octal: false,
noctal: false,
};
}
scanLegacyOctalLiteral(start, startLocation) {
let isOctal = true;
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch >= '0' && ch <= '7') {
this.index++;
} else if (ch === '8' || ch === '9') {
isOctal = false;
this.index++;
} else if (isIdentifierPart(ch.charCodeAt(0))) {
throw this.createILLEGAL();
} else {
break;
}
}
let slice = this.getSlice(start, startLocation);
if (!isOctal) {
this.eatDecimalLiteralSuffix();
return {
type: TokenType.NUMBER,
slice,
value: +slice.text,
octal: true,
noctal: !isOctal,
};
}
return {
type: TokenType.NUMBER,
slice,
value: parseInt(slice.text.substr(1), 8),
octal: true,
noctal: !isOctal,
};
}
scanNumericLiteral() {
let ch = this.source.charAt(this.index);
// assert(ch === "." || "0" <= ch && ch <= "9")
let startLocation = this.getLocation();
let start = this.index;
if (ch === '0') {
this.index++;
if (this.index < this.source.length) {
ch = this.source.charAt(this.index);
if (ch === 'x' || ch === 'X') {
this.index++;
return this.scanHexLiteral(start, startLocation);
} else if (ch === 'b' || ch === 'B') {
this.index++;
return this.scanBinaryLiteral(start, startLocation);
} else if (ch === 'o' || ch === 'O') {
this.index++;
return this.scanOctalLiteral(start, startLocation);
} else if (ch >= '0' && ch <= '9') {
return this.scanLegacyOctalLiteral(start, startLocation);
}
} else {
let slice = this.getSlice(start, startLocation);
return {
type: TokenType.NUMBER,
value: +slice.text,
slice,
octal: false,
noctal: false,
};
}
} else if (ch !== '.') {
// Must be "1".."9"
ch = this.source.charAt(this.index);
while (ch >= '0' && ch <= '9') {
this.index++;
if (this.index === this.source.length) {
let slice = this.getSlice(start, startLocation);
return {
type: TokenType.NUMBER,
value: +slice.text,
slice,
octal: false,
noctal: false,
};
}
ch = this.source.charAt(this.index);
}
}
this.eatDecimalLiteralSuffix();
if (this.index !== this.source.length && isIdentifierStart(this.source.charCodeAt(this.index))) {
throw this.createILLEGAL();
}
let slice = this.getSlice(start, startLocation);
return {
type: TokenType.NUMBER,
value: +slice.text,
slice,
octal: false,
noctal: false,
};
}
eatDecimalLiteralSuffix() {
let ch = this.source.charAt(this.index);
if (ch === '.') {
this.index++;
if (this.index === this.source.length) {
return;
}
ch = this.source.charAt(this.index);
while (ch >= '0' && ch <= '9') {
this.index++;
if (this.index === this.source.length) {
return;
}
ch = this.source.charAt(this.index);
}
}
// EOF not reached here
if (ch === 'e' || ch === 'E') {
this.index++;
if (this.index === this.source.length) {
throw this.createILLEGAL();
}
ch = this.source.charAt(this.index);
if (ch === '+' || ch === '-') {
this.index++;
if (this.index === this.source.length) {
throw this.createILLEGAL();
}
ch = this.source.charAt(this.index);
}
if (ch >= '0' && ch <= '9') {
while (ch >= '0' && ch <= '9') {
this.index++;
if (this.index === this.source.length) {
break;
}
ch = this.source.charAt(this.index);
}
} else {
throw this.createILLEGAL();
}
}
}
scanStringEscape(str, octal) {
this.index++;
if (this.index === this.source.length) {
throw this.createILLEGAL();
}
let ch = this.source.charAt(this.index);
if (isLineTerminator(ch.charCodeAt(0))) {
this.index++;
if (ch === '\r' && this.source.charAt(this.index) === '\n') {
this.index++;
}
this.lineStart = this.index;
this.line++;
} else {
switch (ch) {
case 'n':
str += '\n';
this.index++;
break;
case 'r':
str += '\r';
this.index++;
break;
case 't':
str += '\t';
this.index++;
break;
case 'u':
case 'x': {
let unescaped;
this.index++;
if (this.index >= this.source.length) {
throw this.createILLEGAL();
}
unescaped = ch === 'u' ? this.scanUnicode() : this.scanHexEscape2();
if (unescaped < 0) {
throw this.createILLEGAL();
}
str += fromCodePoint(unescaped);
break;
}
case 'b':
str += '\b';
this.index++;
break;
case 'f':
str += '\f';
this.index++;
break;
case 'v':
str += '\u000B';
this.index++;
break;
default:
if (ch >= '0' && ch <= '7') {
let octalStart = this.index;
let octLen = 1;
// 3 digits are only allowed when string starts
// with 0, 1, 2, 3
if (ch >= '0' && ch <= '3') {
octLen = 0;
}
let code = 0;
while (octLen < 3 && ch >= '0' && ch <= '7') {
this.index++;
if (octLen > 0 || ch !== '0') {
octal = this.source.slice(octalStart, this.index);
}
code *= 8;
code += ch - '0';
octLen++;
if (this.index === this.source.length) {
throw this.createILLEGAL();
}
ch = this.source.charAt(this.index);
}
if (code === 0 && octLen === 1 && (ch === '8' || ch === '9')) {
octal = this.source.slice(octalStart, this.index + 1);
}
str += String.fromCharCode(code);
} else if (ch === '8' || ch === '9') {
throw this.createILLEGAL();
} else {
str += ch;
this.index++;
}
}
}
return [str, octal];
}
// 7.8.4 String Literals
scanStringLiteral() {
let str = '';
let quote = this.source.charAt(this.index);
// assert((quote === "\"" || quote === """), "String literal must starts with a quote")
let startLocation = this.getLocation();
let start = this.index;
this.index++;
let octal = null;
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch === quote) {
this.index++;
return { type: TokenType.STRING, slice: this.getSlice(start, startLocation), str, octal };
} else if (ch === '\\') {
[str, octal] = this.scanStringEscape(str, octal);
} else if (isLineTerminator(ch.charCodeAt(0)) && ch !== 0x2028 && ch !== 0x2029) {
throw this.createILLEGAL();
} else {
str += ch;
this.index++;
}
}
throw this.createILLEGAL();
}
scanTemplateElement() {
let startLocation = this.getLocation();
let start = this.index;
this.index++;
while (this.index < this.source.length) {
let ch = this.source.charCodeAt(this.index);
switch (ch) {
case 0x60: { // `
this.index++;
return { type: TokenType.TEMPLATE, tail: true, slice: this.getSlice(start, startLocation) };
}
case 0x24: { // $
if (this.source.charCodeAt(this.index + 1) === 0x7B) { // {
this.index += 2;
return { type: TokenType.TEMPLATE, tail: false, slice: this.getSlice(start, startLocation) };
}
this.index++;
break;
}
case 0x5C: { // \\
let octal = this.scanStringEscape('', null)[1];
if (octal != null) {
throw this.createError(ErrorMessages.NO_OCTALS_IN_TEMPLATES);
}
break;
}
case 0x0D: { // \r
this.line++;
this.index++;
if (this.index < this.source.length && this.source.charAt(this.index) === '\n') {
this.index++;
}
this.lineStart = this.index;
break;
}
case 0x0A: // \r
case 0x2028:
case 0x2029: {
this.line++;
this.index++;
this.lineStart = this.index;
break;
}
default:
this.index++;
}
}
throw this.createILLEGAL();
}
scanRegExp(str) {
let startLocation = this.getLocation();
let start = this.index;
let terminated = false;
let classMarker = false;
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch === '\\') {
str += ch;
this.index++;
ch = this.source.charAt(this.index);
// ECMA-262 7.8.5
if (isLineTerminator(ch.charCodeAt(0))) {
throw this.createError(ErrorMessages.UNTERMINATED_REGEXP);
}
str += ch;
this.index++;
} else if (isLineTerminator(ch.charCodeAt(0))) {
throw this.createError(ErrorMessages.UNTERMINATED_REGEXP);
} else {
if (classMarker) {
if (ch === ']') {
classMarker = false;
}
} else if (ch === '/') {
terminated = true;
str += ch;
this.index++;
break;
} else if (ch === '[') {
classMarker = true;
}
str += ch;
this.index++;
}
}
if (!terminated) {
throw this.createError(ErrorMessages.UNTERMINATED_REGEXP);
}
while (this.index < this.source.length) {
let ch = this.source.charAt(this.index);
if (ch === '\\') {
throw this.createError(ErrorMessages.INVALID_REGEXP_FLAGS);
}
if (!isIdentifierPart(ch.charCodeAt(0))) {
break;
}
this.index++;
str += ch;
}
return { type: TokenType.REGEXP, value: str, slice: this.getSlice(start, startLocation) };
}
advance() {
let startLocation = this.getLocation();
this.lastIndex = this.index;
this.lastLine = this.line;
this.lastLineStart = this.lineStart;
this.skipComment();
this.startIndex = this.index;
this.startLine = this.line;
this.startLineStart = this.lineStart;
if (this.lastIndex === 0) {
this.lastIndex = this.index;
this.lastLine = this.line;
this.lastLineStart = this.lineStart;
}
if (this.index >= this.source.length) {
return { type: TokenType.EOS, slice: this.getSlice(this.index, startLocation) };
}
let charCode = this.source.charCodeAt(this.index);
if (charCode < 0x80) {
if (PUNCTUATOR_START[charCode]) {
return this.scanPunctuator();
}
if (isIdentifierStart(charCode) || charCode === 0x5C /* backslash (\) */) {
return this.scanIdentifier();
}
// Dot (.) U+002E can also start a floating-point number, hence the need
// to check the next character.
if (charCode === 0x2E) {
if (this.index + 1 < this.source.length && isDecimalDigit(this.source.charCodeAt(this.index + 1))) {
return this.scanNumericLiteral();
}
return this.scanPunctuator();
}
// String literal starts with single quote (U+0027) or double quote (U+0022).
if (charCode === 0x27 || charCode === 0x22) {
return this.scanStringLiteral();
}
// Template literal starts with back quote (U+0060)
if (charCode === 0x60) {
return this.scanTemplateElement();
}
if (charCode /* "0" */ >= 0x30 && charCode <= 0x39 /* "9" */) {
return this.scanNumericLiteral();
}
// Slash (/) U+002F can also start a regex.
throw this.createILLEGAL();
} else {
if (isIdentifierStart(charCode) || charCode >= 0xD800 && charCode <= 0xDBFF) {
return this.scanIdentifier();
}
throw this.createILLEGAL();
}
}
eof() {
return this.lookahead.type === TokenType.EOS;
}
lex() {
let prevToken = this.lookahead;
this.lookahead = this.advance();
this.tokenIndex++;
return prevToken;
}
}
module.exports = {
default: Tokenizer,
Tokenizer,
TokenClass,
TokenType,
JsError,
};