import { isIdentifierStart, isIdentifierChar } from "../util/identifier"; import { types as tt, keywords as keywordTypes } from "./types"; import { SourceLocation } from "../locutil"; import { lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace } from "../util/whitespace"; // Object type used to represent tokens. Note that normally, tokens // simply exist as properties on the parser object. This is only // used for the onToken callback and the external tokenizer. export class Token { constructor(p) { this.type = p.type; this.value = p.value; this.start = p.start; this.end = p.end; this.loc = new SourceLocation(p.startLoc, p.endLoc); } } // ## Tokenizer // Are we running under Rhino? /* global Packages */ const isRhino = typeof Packages === "object" && Object.prototype.toString.call(Packages) === "[object JavaPackage]"; // Parse a regular expression. Some context-awareness is necessary, // since a '/' inside a '[]' set does not end the expression. function tryCreateRegexp(src, flags, throwErrorStart) { try { return new RegExp(src, flags); } catch (e) { if (throwErrorStart !== undefined) { if (e instanceof SyntaxError) this.raise(throwErrorStart, "Error parsing regular expression: " + e.message); this.raise(e); } } } var regexpUnicodeSupport = !!tryCreateRegexp("\uffff", "u"); function codePointToString(code) { // UTF-16 Decoding if (code <= 0xFFFF) return String.fromCharCode(code); return String.fromCharCode(((code - 0x10000) >> 10) + 0xD800, ((code - 0x10000) & 1023) + 0xDC00); } // Used to signal to callers of `readWord1` whether the word // contained any escape sequences. This is needed because words with // escape sequences must not be interpreted as keywords. var containsEsc; export default class Tokenizer { constructor() { // The current position of the tokenizer in the input. this.pos = this.lineStart = 0; this.curLine = 1; // Properties of the current token: // Its type this.type = tt.eof; // For tokens that include more information than their type, the value this.value = null; // Its start and end offset this.start = this.end = this.pos; // And, if locations are used, the {line, column} object // corresponding to those offsets this.startLoc = this.endLoc = this.curPosition(); // Position information for the previous token this.lastTokEndLoc = this.lastTokStartLoc = null; this.lastTokStart = this.lastTokEnd = this.pos; // The context stack is used to superficially track syntactic // context to predict whether a regular expression is allowed in a // given position. this.context = this.initialContext(); this.exprAllowed = true; } // Move to the next token next() { if (!this.isLookahead) { this.tokens.push(new Token(this)); } this.lastTokEnd = this.end; this.lastTokStart = this.start; this.lastTokEndLoc = this.endLoc; this.lastTokStartLoc = this.startLoc; this.nextToken(); }; getToken() { this.next(); return new Token(this); } // Toggle strict mode. Re-reads the next number or string to please // pedantic tests (`"use strict"; 010;` should fail). setStrict(strict) { this.strict = strict; if (this.type !== tt.num && this.type !== tt.string) return; this.pos = this.start; while (this.pos < this.lineStart) { this.lineStart = this.input.lastIndexOf("\n", this.lineStart - 2) + 1; --this.curLine; } this.nextToken(); } curContext() { return this.context[this.context.length - 1]; } // Read a single token, updating the parser object's token-related // properties. nextToken() { let curContext = this.curContext(); if (!curContext || !curContext.preserveSpace) this.skipSpace(); this.start = this.pos; this.startLoc = this.curPosition(); if (this.pos >= this.input.length) return this.finishToken(tt.eof); if (curContext.override) { return curContext.override(this); } else { return this.readToken(this.fullCharCodeAtPos()); } } readToken(code) { // Identifier or keyword. '\uXXXX' sequences are allowed in // identifiers, so '\' also dispatches to that. if (isIdentifierStart(code, true) || code === 92 /* '\' */) return this.readWord(); return this.getTokenFromCode(code); } fullCharCodeAtPos() { let code = this.input.charCodeAt(this.pos); if (code <= 0xd7ff || code >= 0xe000) return code; let next = this.input.charCodeAt(this.pos + 1); return (code << 10) + next - 0x35fdc00; } pushComment(block, text, start, end, startLoc, endLoc) { var comment = { type: block ? "CommentBlock" : "CommentLine", value: text, start: start, end: end, loc: new SourceLocation(startLoc, endLoc), range: [start, end] }; this.tokens.push(comment); this.comments.push(comment); this.addComment(comment); } skipBlockComment() { let startLoc = this.curPosition(); let start = this.pos, end = this.input.indexOf("*/", this.pos += 2); if (end === -1) this.raise(this.pos - 2, "Unterminated comment"); this.pos = end + 2; lineBreakG.lastIndex = start; let match; while ((match = lineBreakG.exec(this.input)) && match.index < this.pos) { ++this.curLine; this.lineStart = match.index + match[0].length; } this.pushComment(true, this.input.slice(start + 2, end), start, this.pos, startLoc, this.curPosition()); } skipLineComment(startSkip) { let start = this.pos; let startLoc = this.curPosition(); let ch = this.input.charCodeAt(this.pos += startSkip); while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) { ++this.pos; ch = this.input.charCodeAt(this.pos); } this.pushComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos, startLoc, this.curPosition()); } // Called at the start of the parse and after every token. Skips // whitespace and comments, and. skipSpace() { loop: while (this.pos < this.input.length) { let ch = this.input.charCodeAt(this.pos); switch (ch) { case 32: case 160: // ' ' ++this.pos; break; case 13: if (this.input.charCodeAt(this.pos + 1) === 10) { ++this.pos; } case 10: case 8232: case 8233: ++this.pos; ++this.curLine; this.lineStart = this.pos; break; case 47: // '/' switch (this.input.charCodeAt(this.pos + 1)) { case 42: // '*' this.skipBlockComment(); break; case 47: this.skipLineComment(2); break; default: break loop; } break; default: if (ch > 8 && ch < 14 || ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) { ++this.pos; } else { break loop; } } } } // Called at the end of every token. Sets `end`, `val`, and // maintains `context` and `exprAllowed`, and skips the space after // the token, so that the next one's `start` will point at the // right position. finishToken(type, val) { this.end = this.pos; this.endLoc = this.curPosition(); let prevType = this.type; this.type = type; this.value = val; this.updateContext(prevType); } // ### Token reading // This is the function that is called to fetch the next token. It // is somewhat obscure, because it works in character codes rather // than characters, and because operator parsing has been inlined // into it. // // All in the name of speed. // readToken_dot() { let next = this.input.charCodeAt(this.pos + 1); if (next >= 48 && next <= 57) return this.readNumber(true); let next2 = this.input.charCodeAt(this.pos + 2); if (next === 46 && next2 === 46) { // 46 = dot '.' this.pos += 3; return this.finishToken(tt.ellipsis); } else { ++this.pos; return this.finishToken(tt.dot); } } readToken_slash() { // '/' let next = this.input.charCodeAt(this.pos + 1); if (this.exprAllowed) { ++this.pos; return this.readRegexp(); } if (next === 61) return this.finishOp(tt.assign, 2); return this.finishOp(tt.slash, 1); } readToken_mult_modulo(code) { // '%*' var type = code === 42 ? tt.star : tt.modulo; var width = 1; var next = this.input.charCodeAt(this.pos + 1); if (next === 42 && this.options.features["es7.exponentiationOperator"]) { // '*' width++; next = this.input.charCodeAt(this.pos + 2); type = tt.exponent; } if (next === 61) { width++; type = tt.assign; } return this.finishOp(type, width); } readToken_pipe_amp(code) { // '|&' let next = this.input.charCodeAt(this.pos + 1); if (next === code) return this.finishOp(code === 124 ? tt.logicalOR : tt.logicalAND, 2); if (next === 61) return this.finishOp(tt.assign, 2); return this.finishOp(code === 124 ? tt.bitwiseOR : tt.bitwiseAND, 1); } readToken_caret() { // '^' let next = this.input.charCodeAt(this.pos + 1); if (next === 61) { return this.finishOp(tt.assign, 2); } else { return this.finishOp(tt.bitwiseXOR, 1); } } readToken_plus_min(code) { // '+-' let next = this.input.charCodeAt(this.pos + 1); if (next === code) { if (next === 45 && this.input.charCodeAt(this.pos + 2) === 62 && lineBreak.test(this.input.slice(this.lastTokEnd, this.pos))) { // A `-->` line comment this.skipLineComment(3); this.skipSpace(); return this.nextToken(); } return this.finishOp(tt.incDec, 2); } if (next === 61) { return this.finishOp(tt.assign, 2); } else { return this.finishOp(tt.plusMin, 1); } } readToken_lt_gt(code) { // '<>' let next = this.input.charCodeAt(this.pos + 1); let size = 1; if (next === code) { size = code === 62 && this.input.charCodeAt(this.pos + 2) === 62 ? 3 : 2; if (this.input.charCodeAt(this.pos + size) === 61) return this.finishOp(tt.assign, size + 1); return this.finishOp(tt.bitShift, size); } if (next === 33 && code === 60 && this.input.charCodeAt(this.pos + 2) === 45 && this.input.charCodeAt(this.pos + 3) === 45) { if (this.inModule) this.unexpected(); // `