perf: minor tokenizer tweaks (#13652)

This commit is contained in:
Mickey Rose 2021-08-09 21:20:44 +02:00 committed by GitHub
parent 8a09993e39
commit da1d166ea6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 21 deletions

View File

@ -201,11 +201,10 @@ export default class StatementParser extends ExpressionParser {
if (isIdentifierStart(nextCh)) {
keywordRelationalOperator.lastIndex = next;
const matched = keywordRelationalOperator.exec(this.input);
if (matched !== null) {
if (keywordRelationalOperator.test(this.input)) {
// We have seen `in` or `instanceof` so far, now check if the identfier
// ends here
const endCh = this.codePointAtPos(next + matched[0].length);
const endCh = this.codePointAtPos(keywordRelationalOperator.lastIndex);
if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) {
return false;
}

View File

@ -4,7 +4,7 @@ import { types as tt, TokenType } from "../tokenizer/types";
import Tokenizer from "../tokenizer";
import State from "../tokenizer/state";
import type { Node } from "../types";
import { lineBreak } from "../util/whitespace";
import { lineBreak, skipWhiteSpaceToLineBreak } from "../util/whitespace";
import { isIdentifierChar } from "../util/identifier";
import ClassScopeHandler from "../util/class-scope";
import ExpressionScopeHandler from "../util/expression-scope";
@ -119,9 +119,8 @@ export default class UtilParser extends Tokenizer {
}
hasFollowingLineBreak(): boolean {
return lineBreak.test(
this.input.slice(this.state.end, this.nextTokenStart()),
);
skipWhiteSpaceToLineBreak.lastIndex = this.state.end;
return skipWhiteSpaceToLineBreak.test(this.input);
}
// TODO

View File

@ -237,9 +237,7 @@ export default class Tokenizer extends ParserErrors {
nextTokenStartSince(pos: number): number {
skipWhiteSpace.lastIndex = pos;
const skip = skipWhiteSpace.exec(this.input);
// $FlowIgnore: The skipWhiteSpace ensures to match any string
return pos + skip[0].length;
return skipWhiteSpace.test(this.input) ? skipWhiteSpace.lastIndex : pos;
}
lookaheadCharCode(): number {
@ -307,18 +305,14 @@ export default class Tokenizer extends ParserErrors {
let startLoc;
if (!this.isLookahead) startLoc = this.state.curPosition();
const start = this.state.pos;
const end = this.input.indexOf("*/", this.state.pos + 2);
const end = this.input.indexOf("*/", start + 2);
if (end === -1) throw this.raise(start, Errors.UnterminatedComment);
this.state.pos = end + 2;
lineBreakG.lastIndex = start;
let match;
while (
(match = lineBreakG.exec(this.input)) &&
match.index < this.state.pos
) {
lineBreakG.lastIndex = start + 2;
while (lineBreakG.test(this.input) && lineBreakG.lastIndex <= end) {
++this.state.curLine;
this.state.lineStart = match.index + match[0].length;
this.state.lineStart = lineBreakG.lastIndex;
}
// If we are doing a lookahead right now we need to advance the position (above code)
@ -326,11 +320,10 @@ export default class Tokenizer extends ParserErrors {
if (this.isLookahead) return;
/*:: invariant(startLoc) */
const value = this.input.slice(start + 2, end);
const comment = {
type: "CommentBlock",
value: value,
start: start,
value: this.input.slice(start + 2, end),
start,
end: end + 2,
loc: new SourceLocation(startLoc, this.state.curPosition()),
};

View File

@ -23,6 +23,28 @@ export function isNewLine(code: number): boolean {
export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g;
export const skipWhiteSpaceInLine =
/(?:[^\S\n\r\u2028\u2029]|\/\/.*|\/\*.*?\*\/)*/y;
// Skip whitespace and single-line comments, including /* no newline here */.
// After this RegExp matches, its lastIndex points to a line terminator, or
// the start of multi-line comment (which is effectively a line terminator),
// or the end of string.
export const skipWhiteSpaceToLineBreak = new RegExp(
// Unfortunately JS doesn't support Perl's atomic /(?>pattern)/ or
// possessive quantifiers, so we use a trick to prevent backtracking
// when the look-ahead for line terminator fails.
"(?=(" +
// Capture the whitespace and comments that should be skipped inside
// a look-ahead assertion, and then re-match the group as a unit.
skipWhiteSpaceInLine.source +
"))\\1" +
// Look-ahead for either line terminator, start of multi-line comment,
// or end of string.
/(?=[\n\r\u2028\u2029]|\/\*(?!.*?\*\/)|$)/.source,
"y", // sticky
);
// https://tc39.github.io/ecma262/#sec-white-space
export function isWhitespace(code: number): boolean {
switch (code) {