perf: minor tokenizer tweaks (#13652)

This commit is contained in:
Mickey Rose 2021-08-09 21:20:44 +02:00 committed by GitHub
parent 8a09993e39
commit da1d166ea6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 21 deletions

View File

@ -201,11 +201,10 @@ export default class StatementParser extends ExpressionParser {
if (isIdentifierStart(nextCh)) { if (isIdentifierStart(nextCh)) {
keywordRelationalOperator.lastIndex = next; keywordRelationalOperator.lastIndex = next;
const matched = keywordRelationalOperator.exec(this.input); if (keywordRelationalOperator.test(this.input)) {
if (matched !== null) {
// We have seen `in` or `instanceof` so far, now check if the identfier // We have seen `in` or `instanceof` so far, now check if the identfier
// ends here // ends here
const endCh = this.codePointAtPos(next + matched[0].length); const endCh = this.codePointAtPos(keywordRelationalOperator.lastIndex);
if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) { if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) {
return false; return false;
} }

View File

@ -4,7 +4,7 @@ import { types as tt, TokenType } from "../tokenizer/types";
import Tokenizer from "../tokenizer"; import Tokenizer from "../tokenizer";
import State from "../tokenizer/state"; import State from "../tokenizer/state";
import type { Node } from "../types"; import type { Node } from "../types";
import { lineBreak } from "../util/whitespace"; import { lineBreak, skipWhiteSpaceToLineBreak } from "../util/whitespace";
import { isIdentifierChar } from "../util/identifier"; import { isIdentifierChar } from "../util/identifier";
import ClassScopeHandler from "../util/class-scope"; import ClassScopeHandler from "../util/class-scope";
import ExpressionScopeHandler from "../util/expression-scope"; import ExpressionScopeHandler from "../util/expression-scope";
@ -119,9 +119,8 @@ export default class UtilParser extends Tokenizer {
} }
hasFollowingLineBreak(): boolean { hasFollowingLineBreak(): boolean {
return lineBreak.test( skipWhiteSpaceToLineBreak.lastIndex = this.state.end;
this.input.slice(this.state.end, this.nextTokenStart()), return skipWhiteSpaceToLineBreak.test(this.input);
);
} }
// TODO // TODO

View File

@ -237,9 +237,7 @@ export default class Tokenizer extends ParserErrors {
nextTokenStartSince(pos: number): number { nextTokenStartSince(pos: number): number {
skipWhiteSpace.lastIndex = pos; skipWhiteSpace.lastIndex = pos;
const skip = skipWhiteSpace.exec(this.input); return skipWhiteSpace.test(this.input) ? skipWhiteSpace.lastIndex : pos;
// $FlowIgnore: The skipWhiteSpace ensures to match any string
return pos + skip[0].length;
} }
lookaheadCharCode(): number { lookaheadCharCode(): number {
@ -307,18 +305,14 @@ export default class Tokenizer extends ParserErrors {
let startLoc; let startLoc;
if (!this.isLookahead) startLoc = this.state.curPosition(); if (!this.isLookahead) startLoc = this.state.curPosition();
const start = this.state.pos; const start = this.state.pos;
const end = this.input.indexOf("*/", this.state.pos + 2); const end = this.input.indexOf("*/", start + 2);
if (end === -1) throw this.raise(start, Errors.UnterminatedComment); if (end === -1) throw this.raise(start, Errors.UnterminatedComment);
this.state.pos = end + 2; this.state.pos = end + 2;
lineBreakG.lastIndex = start; lineBreakG.lastIndex = start + 2;
let match; while (lineBreakG.test(this.input) && lineBreakG.lastIndex <= end) {
while (
(match = lineBreakG.exec(this.input)) &&
match.index < this.state.pos
) {
++this.state.curLine; ++this.state.curLine;
this.state.lineStart = match.index + match[0].length; this.state.lineStart = lineBreakG.lastIndex;
} }
// If we are doing a lookahead right now we need to advance the position (above code) // If we are doing a lookahead right now we need to advance the position (above code)
@ -326,11 +320,10 @@ export default class Tokenizer extends ParserErrors {
if (this.isLookahead) return; if (this.isLookahead) return;
/*:: invariant(startLoc) */ /*:: invariant(startLoc) */
const value = this.input.slice(start + 2, end);
const comment = { const comment = {
type: "CommentBlock", type: "CommentBlock",
value: value, value: this.input.slice(start + 2, end),
start: start, start,
end: end + 2, end: end + 2,
loc: new SourceLocation(startLoc, this.state.curPosition()), loc: new SourceLocation(startLoc, this.state.curPosition()),
}; };

View File

@ -23,6 +23,28 @@ export function isNewLine(code: number): boolean {
export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g; export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g;
export const skipWhiteSpaceInLine =
/(?:[^\S\n\r\u2028\u2029]|\/\/.*|\/\*.*?\*\/)*/y;
// Skip whitespace and single-line comments, including /* no newline here */.
// After this RegExp matches, its lastIndex points to a line terminator, or
// the start of multi-line comment (which is effectively a line terminator),
// or the end of string.
export const skipWhiteSpaceToLineBreak = new RegExp(
// Unfortunately JS doesn't support Perl's atomic /(?>pattern)/ or
// possessive quantifiers, so we use a trick to prevent backtracking
// when the look-ahead for line terminator fails.
"(?=(" +
// Capture the whitespace and comments that should be skipped inside
// a look-ahead assertion, and then re-match the group as a unit.
skipWhiteSpaceInLine.source +
"))\\1" +
// Look-ahead for either line terminator, start of multi-line comment,
// or end of string.
/(?=[\n\r\u2028\u2029]|\/\*(?!.*?\*\/)|$)/.source,
"y", // sticky
);
// https://tc39.github.io/ecma262/#sec-white-space // https://tc39.github.io/ecma262/#sec-white-space
export function isWhitespace(code: number): boolean { export function isWhitespace(code: number): boolean {
switch (code) { switch (code) {