Faster identifier tokenizing (#13262)

* add benchmark * perf: faster identifier tokenizing - Mover iterator identifier parsing to the Flow plugin - If the character is an identifier start, pass it to readWord1
2021-05-06 18:47:19 -04:00 · 2021-05-06 18:47:19 -04:00 · a8fea4037d
commit a8fea4037d
parent 1879491af7
6 changed files with 79 additions and 28 deletions
--- a/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
+++ b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
@ -0,0 +1,23 @@
+import Benchmark from "benchmark";
+import baseline from "@babel-baseline/parser";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "a;".repeat(length);
+}
+current.parse("a");
+function benchCases(name, implementation, options) {
+  for (const length of [64, 128, 256, 512, 1024]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} length-1 identifiers`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
--- a/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
+++ b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
@ -0,0 +1,23 @@
+import Benchmark from "benchmark";
+import baseline from "@babel-baseline/parser";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "aa;".repeat(length);
+}
+current.parse("a");
+function benchCases(name, implementation, options) {
+  for (const length of [64, 128, 256, 512, 1024]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} length-2 identifiers`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
--- a/packages/babel-parser/src/plugins/flow/index.js
+++ b/packages/babel-parser/src/plugins/flow/index.js
@ -2219,6 +2219,22 @@ export default (superClass: Class<Parser>): Class<Parser> =>
      }
    }

+    isIterator(word: string): boolean {
+      return word === "iterator" || word === "asyncIterator";
+    }
+
+    readIterator(): void {
+      const word = super.readWord1();
+      const fullWord = "@@" + word;
+
+      // Allow @@iterator and @@asyncIterator as a identifier only inside type
+      if (!this.isIterator(word) || !this.state.inType) {
+        this.raise(this.state.pos, Errors.InvalidIdentifier, fullWord);
+      }
+
+      this.finishToken(tt.name, fullWord);
+    }
+
    // ensure that inside flow types, we bypass the jsx parser plugin
    getTokenFromCode(code: number): void {
      const next = this.input.charCodeAt(this.state.pos + 1);
@ -2236,8 +2252,8 @@ export default (superClass: Class<Parser>): Class<Parser> =>
        // allow double nullable types in Flow: ??string
        return this.finishOp(tt.question, 1);
      } else if (isIteratorStart(code, next)) {
-        this.state.isIterator = true;
-        return super.readWord();
+        this.state.pos += 2; // eat "@@"
+        return this.readIterator();
      } else {
        return super.getTokenFromCode(code);
      }
--- a/packages/babel-parser/src/tokenizer/context.js
+++ b/packages/babel-parser/src/tokenizer/context.js
@ -76,10 +76,6 @@ tt.name.updateContext = function (prevType) {
    }
  }
  this.state.exprAllowed = allowed;
-
-  if (this.state.isIterator) {
-    this.state.isIterator = false;
-  }
 };

 tt.braceL.updateContext = function (prevType) {
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@ -453,7 +453,10 @@ export default class Tokenizer extends ParserErrors {
        this.finishToken(tt.bracketHashL);
      }
      this.state.pos += 2;
-    } else if (isIdentifierStart(next) || next === charCodes.backslash) {
+    } else if (isIdentifierStart(next)) {
+      ++this.state.pos;
+      this.finishToken(tt.privateName, this.readWord1(next));
+    } else if (next === charCodes.backslash) {
      ++this.state.pos;
      this.finishToken(tt.privateName, this.readWord1());
    } else {
@ -920,7 +923,7 @@ export default class Tokenizer extends ParserErrors {

      default:
        if (isIdentifierStart(code)) {
-          this.readWord();
+          this.readWord(code);
          return;
        }
    }
@ -1457,19 +1460,23 @@ export default class Tokenizer extends ParserErrors {
  //
  // Incrementally adds only escaped chars, adding other chunks as-is
  // as a micro-optimization.
+  //
+  // When `firstCode` is given, it assumes it is always an identifier start and
+  // will skip reading start position again

-  readWord1(): string {
-    let word = "";
+  readWord1(firstCode: number | void): string {
    this.state.containsEsc = false;
+    let word = "";
    const start = this.state.pos;
    let chunkStart = this.state.pos;
+    if (firstCode !== undefined) {
+      this.state.pos += firstCode <= 0xffff ? 1 : 2;
+    }

    while (this.state.pos < this.length) {
      const ch = this.codePointAtPos(this.state.pos);
      if (isIdentifierChar(ch)) {
        this.state.pos += ch <= 0xffff ? 1 : 2;
-      } else if (this.state.isIterator && ch === charCodes.atSign) {
-        ++this.state.pos;
      } else if (ch === charCodes.backslash) {
        this.state.containsEsc = true;

@ -1501,25 +1508,12 @@ export default class Tokenizer extends ParserErrors {
    return word + this.input.slice(chunkStart, this.state.pos);
  }

-  isIterator(word: string): boolean {
-    return word === "@@iterator" || word === "@@asyncIterator";
-  }
-
  // Read an identifier or keyword token. Will check for reserved
  // words when necessary.

-  readWord(): void {
-    const word = this.readWord1();
+  readWord(firstCode: number | void): void {
+    const word = this.readWord1(firstCode);
    const type = keywordTypes.get(word) || tt.name;
-
-    // Allow @@iterator and @@asyncIterator as a identifier only inside type
-    if (
-      this.state.isIterator &&
-      (!this.isIterator(word) || !this.state.inType)
-    ) {
-      this.raise(this.state.pos, Errors.InvalidIdentifier, word);
-    }
-
    this.finishToken(type, word);
  }

--- a/packages/babel-parser/src/tokenizer/state.js
+++ b/packages/babel-parser/src/tokenizer/state.js
@ -64,7 +64,6 @@ export default class State {
  noAnonFunctionType: boolean = false;
  inPropertyName: boolean = false;
  hasFlowComment: boolean = false;
-  isIterator: boolean = false;
  isAmbientContext: boolean = false;
  inAbstractClass: boolean = false;