perf: minor tokenizer tweaks (#13652)
This commit is contained in:
parent
8a09993e39
commit
da1d166ea6
@ -201,11 +201,10 @@ export default class StatementParser extends ExpressionParser {
|
|||||||
|
|
||||||
if (isIdentifierStart(nextCh)) {
|
if (isIdentifierStart(nextCh)) {
|
||||||
keywordRelationalOperator.lastIndex = next;
|
keywordRelationalOperator.lastIndex = next;
|
||||||
const matched = keywordRelationalOperator.exec(this.input);
|
if (keywordRelationalOperator.test(this.input)) {
|
||||||
if (matched !== null) {
|
|
||||||
// We have seen `in` or `instanceof` so far, now check if the identfier
|
// We have seen `in` or `instanceof` so far, now check if the identfier
|
||||||
// ends here
|
// ends here
|
||||||
const endCh = this.codePointAtPos(next + matched[0].length);
|
const endCh = this.codePointAtPos(keywordRelationalOperator.lastIndex);
|
||||||
if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) {
|
if (!isIdentifierChar(endCh) && endCh !== charCodes.backslash) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import { types as tt, TokenType } from "../tokenizer/types";
|
|||||||
import Tokenizer from "../tokenizer";
|
import Tokenizer from "../tokenizer";
|
||||||
import State from "../tokenizer/state";
|
import State from "../tokenizer/state";
|
||||||
import type { Node } from "../types";
|
import type { Node } from "../types";
|
||||||
import { lineBreak } from "../util/whitespace";
|
import { lineBreak, skipWhiteSpaceToLineBreak } from "../util/whitespace";
|
||||||
import { isIdentifierChar } from "../util/identifier";
|
import { isIdentifierChar } from "../util/identifier";
|
||||||
import ClassScopeHandler from "../util/class-scope";
|
import ClassScopeHandler from "../util/class-scope";
|
||||||
import ExpressionScopeHandler from "../util/expression-scope";
|
import ExpressionScopeHandler from "../util/expression-scope";
|
||||||
@ -119,9 +119,8 @@ export default class UtilParser extends Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hasFollowingLineBreak(): boolean {
|
hasFollowingLineBreak(): boolean {
|
||||||
return lineBreak.test(
|
skipWhiteSpaceToLineBreak.lastIndex = this.state.end;
|
||||||
this.input.slice(this.state.end, this.nextTokenStart()),
|
return skipWhiteSpaceToLineBreak.test(this.input);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
|
|||||||
@ -237,9 +237,7 @@ export default class Tokenizer extends ParserErrors {
|
|||||||
|
|
||||||
nextTokenStartSince(pos: number): number {
|
nextTokenStartSince(pos: number): number {
|
||||||
skipWhiteSpace.lastIndex = pos;
|
skipWhiteSpace.lastIndex = pos;
|
||||||
const skip = skipWhiteSpace.exec(this.input);
|
return skipWhiteSpace.test(this.input) ? skipWhiteSpace.lastIndex : pos;
|
||||||
// $FlowIgnore: The skipWhiteSpace ensures to match any string
|
|
||||||
return pos + skip[0].length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lookaheadCharCode(): number {
|
lookaheadCharCode(): number {
|
||||||
@ -307,18 +305,14 @@ export default class Tokenizer extends ParserErrors {
|
|||||||
let startLoc;
|
let startLoc;
|
||||||
if (!this.isLookahead) startLoc = this.state.curPosition();
|
if (!this.isLookahead) startLoc = this.state.curPosition();
|
||||||
const start = this.state.pos;
|
const start = this.state.pos;
|
||||||
const end = this.input.indexOf("*/", this.state.pos + 2);
|
const end = this.input.indexOf("*/", start + 2);
|
||||||
if (end === -1) throw this.raise(start, Errors.UnterminatedComment);
|
if (end === -1) throw this.raise(start, Errors.UnterminatedComment);
|
||||||
|
|
||||||
this.state.pos = end + 2;
|
this.state.pos = end + 2;
|
||||||
lineBreakG.lastIndex = start;
|
lineBreakG.lastIndex = start + 2;
|
||||||
let match;
|
while (lineBreakG.test(this.input) && lineBreakG.lastIndex <= end) {
|
||||||
while (
|
|
||||||
(match = lineBreakG.exec(this.input)) &&
|
|
||||||
match.index < this.state.pos
|
|
||||||
) {
|
|
||||||
++this.state.curLine;
|
++this.state.curLine;
|
||||||
this.state.lineStart = match.index + match[0].length;
|
this.state.lineStart = lineBreakG.lastIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are doing a lookahead right now we need to advance the position (above code)
|
// If we are doing a lookahead right now we need to advance the position (above code)
|
||||||
@ -326,11 +320,10 @@ export default class Tokenizer extends ParserErrors {
|
|||||||
if (this.isLookahead) return;
|
if (this.isLookahead) return;
|
||||||
/*:: invariant(startLoc) */
|
/*:: invariant(startLoc) */
|
||||||
|
|
||||||
const value = this.input.slice(start + 2, end);
|
|
||||||
const comment = {
|
const comment = {
|
||||||
type: "CommentBlock",
|
type: "CommentBlock",
|
||||||
value: value,
|
value: this.input.slice(start + 2, end),
|
||||||
start: start,
|
start,
|
||||||
end: end + 2,
|
end: end + 2,
|
||||||
loc: new SourceLocation(startLoc, this.state.curPosition()),
|
loc: new SourceLocation(startLoc, this.state.curPosition()),
|
||||||
};
|
};
|
||||||
|
|||||||
@ -23,6 +23,28 @@ export function isNewLine(code: number): boolean {
|
|||||||
|
|
||||||
export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g;
|
export const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g;
|
||||||
|
|
||||||
|
export const skipWhiteSpaceInLine =
|
||||||
|
/(?:[^\S\n\r\u2028\u2029]|\/\/.*|\/\*.*?\*\/)*/y;
|
||||||
|
|
||||||
|
// Skip whitespace and single-line comments, including /* no newline here */.
|
||||||
|
// After this RegExp matches, its lastIndex points to a line terminator, or
|
||||||
|
// the start of multi-line comment (which is effectively a line terminator),
|
||||||
|
// or the end of string.
|
||||||
|
export const skipWhiteSpaceToLineBreak = new RegExp(
|
||||||
|
// Unfortunately JS doesn't support Perl's atomic /(?>pattern)/ or
|
||||||
|
// possessive quantifiers, so we use a trick to prevent backtracking
|
||||||
|
// when the look-ahead for line terminator fails.
|
||||||
|
"(?=(" +
|
||||||
|
// Capture the whitespace and comments that should be skipped inside
|
||||||
|
// a look-ahead assertion, and then re-match the group as a unit.
|
||||||
|
skipWhiteSpaceInLine.source +
|
||||||
|
"))\\1" +
|
||||||
|
// Look-ahead for either line terminator, start of multi-line comment,
|
||||||
|
// or end of string.
|
||||||
|
/(?=[\n\r\u2028\u2029]|\/\*(?!.*?\*\/)|$)/.source,
|
||||||
|
"y", // sticky
|
||||||
|
);
|
||||||
|
|
||||||
// https://tc39.github.io/ecma262/#sec-white-space
|
// https://tc39.github.io/ecma262/#sec-white-space
|
||||||
export function isWhitespace(code: number): boolean {
|
export function isWhitespace(code: number): boolean {
|
||||||
switch (code) {
|
switch (code) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user