From da1d166ea6b538aeda1faba0d2438a44f2a0c1af Mon Sep 17 00:00:00 2001 From: Mickey Rose Date: Mon, 9 Aug 2021 21:20:44 +0200 Subject: [PATCH] perf: minor tokenizer tweaks (#13652) --- packages/babel-parser/src/parser/statement.js | 5 ++--- packages/babel-parser/src/parser/util.js | 7 +++--- packages/babel-parser/src/tokenizer/index.js | 21 ++++++------------ packages/babel-parser/src/util/whitespace.js | 22 +++++++++++++++++++ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/packages/babel-parser/src/parser/statement.js b/packages/babel-parser/src/parser/statement.js index 5e72f7c094..71bbb711bd 100644 --- a/packages/babel-parser/src/parser/statement.js +++ b/packages/babel-parser/src/parser/statement.js @@ -201,11 +201,10 @@ export default class StatementParser extends ExpressionParser { if (isIdentifierStart(nextCh)) { keywordRelationalOperator.lastIndex = next; - const matched = keywordRelationalOperator.exec(this.input); - if (matched !== null) { + if (keywordRelationalOperator.test(this.input)) { // We have seen `in` or `instanceof` so far, now check if the identfier // ends here - const endCh = this.codePointAtPos(next + matched[0].length); + const endCh = this.codePointAtPos(keywordRelationalOperator.lastIndex); if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) { return false; } diff --git a/packages/babel-parser/src/parser/util.js b/packages/babel-parser/src/parser/util.js index 648a5743c5..6c73a4b385 100644 --- a/packages/babel-parser/src/parser/util.js +++ b/packages/babel-parser/src/parser/util.js @@ -4,7 +4,7 @@ import { types as tt, TokenType } from "../tokenizer/types"; import Tokenizer from "../tokenizer"; import State from "../tokenizer/state"; import type { Node } from "../types"; -import { lineBreak } from "../util/whitespace"; +import { lineBreak, skipWhiteSpaceToLineBreak } from "../util/whitespace"; import { isIdentifierChar } from "../util/identifier"; import ClassScopeHandler from "../util/class-scope"; import ExpressionScopeHandler from "../util/expression-scope"; @@ -119,9 +119,8 @@ export default class UtilParser extends Tokenizer { } hasFollowingLineBreak(): boolean { - return lineBreak.test( - this.input.slice(this.state.end, this.nextTokenStart()), - ); + skipWhiteSpaceToLineBreak.lastIndex = this.state.end; + return skipWhiteSpaceToLineBreak.test(this.input); } // TODO diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index d3cf76f376..85a0a60395 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -237,9 +237,7 @@ export default class Tokenizer extends ParserErrors { nextTokenStartSince(pos: number): number { skipWhiteSpace.lastIndex = pos; - const skip = skipWhiteSpace.exec(this.input); - // $FlowIgnore: The skipWhiteSpace ensures to match any string - return pos + skip[0].length; + return skipWhiteSpace.test(this.input) ? skipWhiteSpace.lastIndex : pos; } lookaheadCharCode(): number { @@ -307,18 +305,14 @@ export default class Tokenizer extends ParserErrors { let startLoc; if (!this.isLookahead) startLoc = this.state.curPosition(); const start = this.state.pos; - const end = this.input.indexOf("*/", this.state.pos + 2); + const end = this.input.indexOf("*/", start + 2); if (end === -1) throw this.raise(start, Errors.UnterminatedComment); this.state.pos = end + 2; - lineBreakG.lastIndex = start; - let match; - while ( - (match = lineBreakG.exec(this.input)) && - match.index < this.state.pos - ) { + lineBreakG.lastIndex = start + 2; + while (lineBreakG.test(this.input) && lineBreakG.lastIndex <= end) { ++this.state.curLine; - this.state.lineStart = match.index + match[0].length; + this.state.lineStart = lineBreakG.lastIndex; } // If we are doing a lookahead right now we need to advance the position (above code) @@ -326,11 +320,10 @@ export default class Tokenizer extends ParserErrors { if (this.isLookahead) return; /*:: invariant(startLoc) */ - const value = this.input.slice(start + 2, end); const comment = { type: "CommentBlock", - value: value, - start: start, + value: this.input.slice(start + 2, end), + start, end: end + 2, loc: new SourceLocation(startLoc, this.state.curPosition()), }; diff --git a/packages/babel-parser/src/util/whitespace.js b/packages/babel-parser/src/util/whitespace.js index 2fdbc3d423..e13eb5c1f2 100644 --- a/packages/babel-parser/src/util/whitespace.js +++ b/packages/babel-parser/src/util/whitespace.js @@ -23,6 +23,28 @@ export function isNewLine(code: number): boolean { export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g; +export const skipWhiteSpaceInLine = + /(?:[^\S\n\r\u2028\u2029]|\/\/.*|\/\*.*?\*\/)*/y; + +// Skip whitespace and single-line comments, including /* no newline here */. +// After this RegExp matches, its lastIndex points to a line terminator, or +// the start of multi-line comment (which is effectively a line terminator), +// or the end of string. +export const skipWhiteSpaceToLineBreak = new RegExp( + // Unfortunately JS doesn't support Perl's atomic /(?>pattern)/ or + // possessive quantifiers, so we use a trick to prevent backtracking + // when the look-ahead for line terminator fails. + "(?=(" + + // Capture the whitespace and comments that should be skipped inside + // a look-ahead assertion, and then re-match the group as a unit. + skipWhiteSpaceInLine.source + + "))\\1" + + // Look-ahead for either line terminator, start of multi-line comment, + // or end of string. + /(?=[\n\r\u2028\u2029]|\/\*(?!.*?\*\/)|$)/.source, + "y", // sticky +); + // https://tc39.github.io/ecma262/#sec-white-space export function isWhitespace(code: number): boolean { switch (code) {