// @flow import type { Options } from "../options"; import type { Position } from "../util/location"; import * as charCodes from "charcodes"; import { isIdentifierStart, isIdentifierChar, isKeyword, } from "../util/identifier"; import { types as tt, keywords as keywordTypes, type TokenType } from "./types"; import { type TokContext, types as ct } from "./context"; import LocationParser from "../parser/location"; import { SourceLocation } from "../util/location"; import { lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace, } from "../util/whitespace"; import State from "./state"; const VALID_REGEX_FLAGS = "gmsiyu"; // The following character codes are forbidden from being // an immediate sibling of NumericLiteralSeparator _ const forbiddenNumericSeparatorSiblings = { decBinOct: [ charCodes.dot, charCodes.uppercaseB, charCodes.uppercaseE, charCodes.uppercaseO, charCodes.underscore, // multiple separators are not allowed charCodes.lowercaseB, charCodes.lowercaseE, charCodes.lowercaseO, ], hex: [ charCodes.dot, charCodes.uppercaseX, charCodes.underscore, // multiple separators are not allowed charCodes.lowercaseX, ], }; const allowedNumericSeparatorSiblings = {}; allowedNumericSeparatorSiblings.bin = [ // 0 - 1 charCodes.digit0, charCodes.digit1, ]; allowedNumericSeparatorSiblings.oct = [ // 0 - 7 ...allowedNumericSeparatorSiblings.bin, charCodes.digit2, charCodes.digit3, charCodes.digit4, charCodes.digit5, charCodes.digit6, charCodes.digit7, ]; allowedNumericSeparatorSiblings.dec = [ // 0 - 9 ...allowedNumericSeparatorSiblings.oct, charCodes.digit8, charCodes.digit9, ]; allowedNumericSeparatorSiblings.hex = [ // 0 - 9, A - F, a - f, ...allowedNumericSeparatorSiblings.dec, charCodes.uppercaseA, charCodes.uppercaseB, charCodes.uppercaseC, charCodes.uppercaseD, charCodes.uppercaseE, charCodes.uppercaseF, charCodes.lowercaseA, charCodes.lowercaseB, charCodes.lowercaseC, charCodes.lowercaseD, charCodes.lowercaseE, charCodes.lowercaseF, ]; // Object type used to represent tokens. Note that normally, tokens // simply exist as properties on the parser object. This is only // used for the onToken callback and the external tokenizer. export class Token { constructor(state: State) { this.type = state.type; this.value = state.value; this.start = state.start; this.end = state.end; this.loc = new SourceLocation(state.startLoc, state.endLoc); } type: TokenType; value: any; start: number; end: number; loc: SourceLocation; } // ## Tokenizer function codePointToString(code: number): string { // UTF-16 Decoding if (code <= 0xffff) { return String.fromCharCode(code); } else { return String.fromCharCode( ((code - 0x10000) >> 10) + 0xd800, ((code - 0x10000) & 1023) + 0xdc00, ); } } export default class Tokenizer extends LocationParser { // Forward-declarations // parser/util.js +unexpected: (pos?: ?number, messageOrType?: string | TokenType) => empty; isLookahead: boolean; constructor(options: Options, input: string) { super(); this.state = new State(); this.state.init(options, input); this.isLookahead = false; } // Move to the next token next(): void { if (this.options.tokens && !this.isLookahead) { this.state.tokens.push(new Token(this.state)); } this.state.lastTokEnd = this.state.end; this.state.lastTokStart = this.state.start; this.state.lastTokEndLoc = this.state.endLoc; this.state.lastTokStartLoc = this.state.startLoc; this.nextToken(); } // TODO eat(type: TokenType): boolean { if (this.match(type)) { this.next(); return true; } else { return false; } } // TODO match(type: TokenType): boolean { return this.state.type === type; } // TODO isKeyword(word: string): boolean { return isKeyword(word); } // TODO lookahead(): State { const old = this.state; this.state = old.clone(true); this.isLookahead = true; this.next(); this.isLookahead = false; const curr = this.state; this.state = old; return curr; } // Toggle strict mode. Re-reads the next number or string to please // pedantic tests (`"use strict"; 010;` should fail). setStrict(strict: boolean): void { this.state.strict = strict; if (!this.match(tt.num) && !this.match(tt.string)) return; this.state.pos = this.state.start; while (this.state.pos < this.state.lineStart) { this.state.lineStart = this.input.lastIndexOf("\n", this.state.lineStart - 2) + 1; --this.state.curLine; } this.nextToken(); } curContext(): TokContext { return this.state.context[this.state.context.length - 1]; } // Read a single token, updating the parser object's token-related // properties. nextToken(): void { const curContext = this.curContext(); if (!curContext || !curContext.preserveSpace) this.skipSpace(); this.state.containsOctal = false; this.state.octalPosition = null; this.state.start = this.state.pos; this.state.startLoc = this.state.curPosition(); if (this.state.pos >= this.input.length) { this.finishToken(tt.eof); return; } if (curContext.override) { curContext.override(this); } else { this.readToken(this.fullCharCodeAtPos()); } } readToken(code: number): void { // Identifier or keyword. '\uXXXX' sequences are allowed in // identifiers, so '\' also dispatches to that. if (isIdentifierStart(code) || code === charCodes.backslash) { this.readWord(); } else { this.getTokenFromCode(code); } } fullCharCodeAtPos(): number { const code = this.input.charCodeAt(this.state.pos); if (code <= 0xd7ff || code >= 0xe000) return code; const next = this.input.charCodeAt(this.state.pos + 1); return (code << 10) + next - 0x35fdc00; } pushComment( block: boolean, text: string, start: number, end: number, startLoc: Position, endLoc: Position, ): void { const comment = { type: block ? "CommentBlock" : "CommentLine", value: text, start: start, end: end, loc: new SourceLocation(startLoc, endLoc), }; if (!this.isLookahead) { if (this.options.tokens) this.state.tokens.push(comment); this.state.comments.push(comment); this.addComment(comment); } } skipBlockComment(): void { const startLoc = this.state.curPosition(); const start = this.state.pos; const end = this.input.indexOf("*/", (this.state.pos += 2)); if (end === -1) this.raise(this.state.pos - 2, "Unterminated comment"); this.state.pos = end + 2; lineBreakG.lastIndex = start; let match; while ( (match = lineBreakG.exec(this.input)) && match.index < this.state.pos ) { ++this.state.curLine; this.state.lineStart = match.index + match[0].length; } this.pushComment( true, this.input.slice(start + 2, end), start, this.state.pos, startLoc, this.state.curPosition(), ); } skipLineComment(startSkip: number): void { const start = this.state.pos; const startLoc = this.state.curPosition(); let ch = this.input.charCodeAt((this.state.pos += startSkip)); if (this.state.pos < this.input.length) { while ( ch !== charCodes.lineFeed && ch !== charCodes.carriageReturn && ch !== charCodes.lineSeparator && ch !== charCodes.paragraphSeparator && ++this.state.pos < this.input.length ) { ch = this.input.charCodeAt(this.state.pos); } } this.pushComment( false, this.input.slice(start + startSkip, this.state.pos), start, this.state.pos, startLoc, this.state.curPosition(), ); } // Called at the start of the parse and after every token. Skips // whitespace and comments, and. skipSpace(): void { loop: while (this.state.pos < this.input.length) { const ch = this.input.charCodeAt(this.state.pos); switch (ch) { case charCodes.space: case charCodes.nonBreakingSpace: ++this.state.pos; break; case charCodes.carriageReturn: if ( this.input.charCodeAt(this.state.pos + 1) === charCodes.lineFeed ) { ++this.state.pos; } case charCodes.lineFeed: case charCodes.lineSeparator: case charCodes.paragraphSeparator: ++this.state.pos; ++this.state.curLine; this.state.lineStart = this.state.pos; break; case charCodes.slash: switch (this.input.charCodeAt(this.state.pos + 1)) { case charCodes.asterisk: this.skipBlockComment(); break; case charCodes.slash: this.skipLineComment(2); break; default: break loop; } break; default: if ( (ch > charCodes.backSpace && ch < charCodes.shiftOut) || (ch >= charCodes.oghamSpaceMark && nonASCIIwhitespace.test(String.fromCharCode(ch))) ) { ++this.state.pos; } else { break loop; } } } } // Called at the end of every token. Sets `end`, `val`, and // maintains `context` and `exprAllowed`, and skips the space after // the token, so that the next one's `start` will point at the // right position. finishToken(type: TokenType, val: any): void { this.state.end = this.state.pos; this.state.endLoc = this.state.curPosition(); const prevType = this.state.type; this.state.type = type; this.state.value = val; this.updateContext(prevType); } // ### Token reading // This is the function that is called to fetch the next token. It // is somewhat obscure, because it works in character codes rather // than characters, and because operator parsing has been inlined // into it. // // All in the name of speed. // readToken_numberSign(code: number): void { if (this.state.pos === 0 && this.readToken_interpreter()) { return; } const nextPos = this.state.pos + 1; const next = this.input.charCodeAt(nextPos); // if (isIdentifierStart(next)) { if ( (this.hasPlugin("classPrivateProperties") || this.hasPlugin("classPrivateMethods")) && this.state.classLevel > 0 ) { ++this.state.pos; this.finishToken(tt.hash); return; } else if ( this.getPluginOption("pipelineOperator", "proposal") === "smart" ) { if (next >= charCodes.digit0 && next <= charCodes.digit9) { this.raise( this.state.pos, `Unexpected digit after topic reference: '#${String.fromCodePoint( next, )}'`, ); } else { this.finishOp(tt.primaryTopicReference, 1); } } else { this.raise( this.state.pos, `Unexpected character '${codePointToString(code)}'`, ); } } readToken_dot(): void { const next = this.input.charCodeAt(this.state.pos + 1); if (next >= charCodes.digit0 && next <= charCodes.digit9) { this.readNumber(true); return; } const next2 = this.input.charCodeAt(this.state.pos + 2); if (next === charCodes.dot && next2 === charCodes.dot) { this.state.pos += 3; this.finishToken(tt.ellipsis); } else { ++this.state.pos; this.finishToken(tt.dot); } } readToken_slash(): void { // '/' if (this.state.exprAllowed) { ++this.state.pos; this.readRegexp(); return; } const next = this.input.charCodeAt(this.state.pos + 1); if (next === charCodes.equalsTo) { this.finishOp(tt.assign, 2); } else { this.finishOp(tt.slash, 1); } } readToken_interpreter(): boolean { if (this.state.pos !== 0 || this.state.input.length < 2) return false; const start = this.state.pos; this.state.pos += 1; let ch = this.input.charCodeAt(this.state.pos); if (ch !== charCodes.exclamationMark) return false; while ( ch !== charCodes.lineFeed && ch !== charCodes.carriageReturn && ch !== charCodes.lineSeparator && ch !== charCodes.paragraphSeparator && ++this.state.pos < this.input.length ) { ch = this.input.charCodeAt(this.state.pos); } const value = this.input.slice(start + 2, this.state.pos); this.finishToken(tt.interpreterDirective, value); return true; } readToken_mult_modulo(code: number): void { // '%*' let type = code === charCodes.asterisk ? tt.star : tt.modulo; let width = 1; let next = this.input.charCodeAt(this.state.pos + 1); const exprAllowed = this.state.exprAllowed; // Exponentiation operator ** if (code === charCodes.asterisk && next === charCodes.asterisk) { width++; next = this.input.charCodeAt(this.state.pos + 2); type = tt.exponent; } if (next === charCodes.equalsTo && !exprAllowed) { width++; type = tt.assign; } this.finishOp(type, width); } readToken_pipe_amp(code: number): void { // '|&' const next = this.input.charCodeAt(this.state.pos + 1); if (next === code) { if (this.input.charCodeAt(this.state.pos + 2) === charCodes.equalsTo) { this.finishOp(tt.assign, 3); } else { this.finishOp( code === charCodes.verticalBar ? tt.logicalOR : tt.logicalAND, 2, ); } return; } if (code === charCodes.verticalBar) { // '|>' if (next === charCodes.greaterThan) { this.finishOp(tt.pipeline, 2); return; } else if (next === charCodes.rightCurlyBrace && this.hasPlugin("flow")) { // '|}' this.finishOp(tt.braceBarR, 2); return; } } if (next === charCodes.equalsTo) { this.finishOp(tt.assign, 2); return; } this.finishOp( code === charCodes.verticalBar ? tt.bitwiseOR : tt.bitwiseAND, 1, ); } readToken_caret(): void { // '^' const next = this.input.charCodeAt(this.state.pos + 1); if (next === charCodes.equalsTo) { this.finishOp(tt.assign, 2); } else { this.finishOp(tt.bitwiseXOR, 1); } } readToken_plus_min(code: number): void { // '+-' const next = this.input.charCodeAt(this.state.pos + 1); if (next === code) { if ( next === charCodes.dash && !this.inModule && this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan && lineBreak.test(this.input.slice(this.state.lastTokEnd, this.state.pos)) ) { // A `-->` line comment this.skipLineComment(3); this.skipSpace(); this.nextToken(); return; } this.finishOp(tt.incDec, 2); return; } if (next === charCodes.equalsTo) { this.finishOp(tt.assign, 2); } else { this.finishOp(tt.plusMin, 1); } } readToken_lt_gt(code: number): void { // '<>' const next = this.input.charCodeAt(this.state.pos + 1); let size = 1; if (next === code) { size = code === charCodes.greaterThan && this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan ? 3 : 2; if (this.input.charCodeAt(this.state.pos + size) === charCodes.equalsTo) { this.finishOp(tt.assign, size + 1); return; } this.finishOp(tt.bitShift, size); return; } if ( next === charCodes.exclamationMark && code === charCodes.lessThan && !this.inModule && this.input.charCodeAt(this.state.pos + 2) === charCodes.dash && this.input.charCodeAt(this.state.pos + 3) === charCodes.dash ) { // `