Faster readRegexp (#13453)

This commit is contained in:
Huáng Jùnliàng 2021-06-10 19:00:21 -04:00 committed by GitHub
parent 0eb2853732
commit 6c8b2336f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 82 additions and 31 deletions

View File

@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "../../lib/index.js";
import { report } from "../util.mjs";
const suite = new Benchmark.Suite();
function createInput(length) {
return "const a = /" + "[/\\\\]".repeat(length / 4) + "/igsudm";
}
function benchCases(name, implementation, options) {
for (const length of [256, 512, 1024, 2048]) {
const input = createInput(length);
suite.add(`${name} ${length}-size RegExp literal `, () => {
implementation.parse(input, options);
});
}
}
benchCases("baseline", baseline);
benchCases("current", current);
suite.on("cycle", report).run();

View File

@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "../../lib/index-v2.js";
import current from "../../lib/index.js";
import { report } from "../util.mjs";
const suite = new Benchmark.Suite();
function createInput(length) {
return "/x/dgimsuy;".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [256, 512, 1024, 2048]) {
const input = createInput(length);
suite.add(`${name} ${length} small regexp literal with all flags`, () => {
implementation.parse(input, options);
});
}
}
benchCases("baseline", baseline);
benchCases("current", current);
suite.on("cycle", report).run();

View File

@ -33,7 +33,7 @@
"node": ">=6.0.0" "node": ">=6.0.0"
}, },
"devDependencies": { "devDependencies": {
"@babel-baseline/parser": "npm:@babel/parser@^7.14.4", "@babel-baseline/parser": "npm:@babel/parser@^7.14.5",
"@babel/code-frame": "workspace:*", "@babel/code-frame": "workspace:*",
"@babel/helper-fixtures": "workspace:*", "@babel/helper-fixtures": "workspace:*",
"@babel/helper-validator-identifier": "workspace:*", "@babel/helper-validator-identifier": "workspace:*",

View File

@ -12,7 +12,6 @@ import { type TokContext, types as ct } from "./context";
import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error"; import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error";
import { SourceLocation } from "../util/location"; import { SourceLocation } from "../util/location";
import { import {
lineBreak,
lineBreakG, lineBreakG,
isNewLine, isNewLine,
isWhitespace, isWhitespace,
@ -21,7 +20,15 @@ import {
import State from "./state"; import State from "./state";
import type { LookaheadState } from "./state"; import type { LookaheadState } from "./state";
const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]); const VALID_REGEX_FLAGS = new Set([
charCodes.lowercaseG,
charCodes.lowercaseM,
charCodes.lowercaseS,
charCodes.lowercaseI,
charCodes.lowercaseY,
charCodes.lowercaseU,
charCodes.lowercaseD,
]);
// The following character codes are forbidden from being // The following character codes are forbidden from being
// an immediate sibling of NumericLiteralSeparator _ // an immediate sibling of NumericLiteralSeparator _
@ -976,53 +983,52 @@ export default class Tokenizer extends ParserErrors {
readRegexp(): void { readRegexp(): void {
const start = this.state.start + 1; const start = this.state.start + 1;
let escaped, inClass; let escaped, inClass;
for (;;) { let { pos } = this.state;
if (this.state.pos >= this.length) { for (; ; ++pos) {
if (pos >= this.length) {
throw this.raise(start, Errors.UnterminatedRegExp); throw this.raise(start, Errors.UnterminatedRegExp);
} }
const ch = this.input.charAt(this.state.pos); const ch = this.input.charCodeAt(pos);
if (lineBreak.test(ch)) { if (isNewLine(ch)) {
throw this.raise(start, Errors.UnterminatedRegExp); throw this.raise(start, Errors.UnterminatedRegExp);
} }
if (escaped) { if (escaped) {
escaped = false; escaped = false;
} else { } else {
if (ch === "[") { if (ch === charCodes.leftSquareBracket) {
inClass = true; inClass = true;
} else if (ch === "]" && inClass) { } else if (ch === charCodes.rightSquareBracket && inClass) {
inClass = false; inClass = false;
} else if (ch === "/" && !inClass) { } else if (ch === charCodes.slash && !inClass) {
break; break;
} }
escaped = ch === "\\"; escaped = ch === charCodes.backslash;
} }
++this.state.pos;
} }
const content = this.input.slice(start, this.state.pos); const content = this.input.slice(start, pos);
++this.state.pos; ++pos;
let mods = ""; let mods = "";
while (this.state.pos < this.length) { while (pos < this.length) {
const char = this.input[this.state.pos]; const cp = this.codePointAtPos(pos);
const charCode = this.codePointAtPos(this.state.pos); // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
const char = String.fromCharCode(cp);
if (VALID_REGEX_FLAGS.has(char)) { if (VALID_REGEX_FLAGS.has(cp)) {
if (mods.indexOf(char) > -1) { if (mods.includes(char)) {
this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags); this.raise(pos + 1, Errors.DuplicateRegExpFlags);
} }
} else if ( } else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
isIdentifierChar(charCode) || this.raise(pos + 1, Errors.MalformedRegExpFlags);
charCode === charCodes.backslash
) {
this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags);
} else { } else {
break; break;
} }
++this.state.pos; ++pos;
mods += char; mods += char;
} }
this.state.pos = pos;
this.finishToken(tt.regexp, { this.finishToken(tt.regexp, {
pattern: content, pattern: content,

View File

@ -5,12 +5,12 @@ __metadata:
version: 4 version: 4
cacheKey: 7 cacheKey: 7
"@babel-baseline/parser@npm:@babel/parser@^7.14.4": "@babel-baseline/parser@npm:@babel/parser@^7.14.5":
version: 7.14.4 version: 7.14.5
resolution: "@babel/parser@npm:7.14.4" resolution: "@babel/parser@npm:7.14.5"
bin: bin:
parser: ./bin/babel-parser.js parser: ./bin/babel-parser.js
checksum: 3bc067c1ee0e0178d365e1b2988ea1a0d6d37af37870ea1a7e80729b3bdc40acda083cac44ce72f63a5b31a489e35120f617bd41f312dec4c86cf814cff8e64a checksum: 55c14793888cb7d54275811e7f13136875df1ee4fc368f3f10cff46ebdf95b6a072e706a0486be0ac5686a597cbfb82f33b5f66aa6ba80ff50b73bca945035c6
languageName: node languageName: node
linkType: hard linkType: hard
@ -658,6 +658,7 @@ __metadata:
resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1" resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1"
dependencies: dependencies:
"@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5" "@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5"
checksum: eb4895913562bf398b8bf7e6c68a0380f153f52f2715b3685f9d07e376725227678c2f920dfe0772012dfed655e037534619de86bb9f3284b92555f8bf9d0f42
languageName: node languageName: node
linkType: hard linkType: hard
@ -972,7 +973,7 @@ __metadata:
version: 0.0.0-use.local version: 0.0.0-use.local
resolution: "@babel/parser@workspace:packages/babel-parser" resolution: "@babel/parser@workspace:packages/babel-parser"
dependencies: dependencies:
"@babel-baseline/parser": "npm:@babel/parser@^7.14.4" "@babel-baseline/parser": "npm:@babel/parser@^7.14.5"
"@babel/code-frame": "workspace:*" "@babel/code-frame": "workspace:*"
"@babel/helper-fixtures": "workspace:*" "@babel/helper-fixtures": "workspace:*"
"@babel/helper-validator-identifier": "workspace:*" "@babel/helper-validator-identifier": "workspace:*"