From 4229ef792e58a19218a8c737f04d6ee741453e1d Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 20 Nov 2012 16:57:07 +0100 Subject: [PATCH] Give readToken sub-functions meaningful names --- acorn.js | 33 ++++++------- index.html | 140 ++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 111 insertions(+), 62 deletions(-) diff --git a/acorn.js b/acorn.js index 43425c8590..9160e58542 100644 --- a/acorn.js +++ b/acorn.js @@ -487,47 +487,47 @@ // The `forceRegexp` parameter is used in the one case where the // `tokRegexpAllowed` trick does not work. See `parseStatement`. - function readToken_46(code) { // '.' + function readToken_dot(code) { var next = input.charCodeAt(tokPos+1); if (next >= 48 && next <= 57) return readNumber(String.fromCharCode(code)); ++tokPos; return finishToken(_dot); } - function readToken_47() { // '/' + function readToken_slash() { // '/' var next = input.charCodeAt(tokPos+1); if (tokRegexpAllowed) {++tokPos; return readRegexp();} if (next === 61) return finishOp(_assign, 2); return finishOp(_slash, 1); } - function readToken_37_42() { // '%*' + function readToken_mult_modulo() { // '%*' var next = input.charCodeAt(tokPos+1); if (next === 61) return finishOp(_assign, 2); return finishOp(_bin10, 1); } - function readToken_124_38(code) { // '|&' + function readToken_pipe_amp(code) { // '|&' var next = input.charCodeAt(tokPos+1); if (next === code) return finishOp(code === 124 ? _bin1 : _bin2, 2); if (next === 61) return finishOp(_assign, 2); return finishOp(code === 124 ? _bin3 : _bin5, 1); } - function readToken_94() { // '^' + function readToken_caret() { // '^' var next = input.charCodeAt(tokPos+1); if (next === 61) return finishOp(_assign, 2); return finishOp(_bin4, 1); } - function readToken_43_45(code) { // '+-' + function readToken_plus_min(code) { // '+-' var next = input.charCodeAt(tokPos+1); if (next === code) return finishOp(_incdec, 2); if (next === 61) return finishOp(_assign, 2); return finishOp(_plusmin, 1); } - function readToken_60_62(code) { // '<>' + function readToken_lt_gt(code) { // '<>' var next = input.charCodeAt(tokPos+1); var size = 1; if (next === code) { @@ -540,19 +540,18 @@ return finishOp(_bin7, size); } - function readToken_61_33(code) { // '=!' + function readToken_eq_excl(code) { // '=!' var next = input.charCodeAt(tokPos+1); if (next === 61) return finishOp(_bin6, input.charCodeAt(tokPos+2) === 61 ? 3 : 2); return finishOp(code === 61 ? _eq : _prefix, 1); } function getTokenFromCode(code) { - switch(code) { // The interpretation of a dot depends on whether it is followed // by a digit. case 46: // '.' - return readToken_46(code); + return readToken_dot(code); // Punctuation tokens. case 40: ++tokPos; return finishToken(_parenL); @@ -585,25 +584,25 @@ // of the type given by its first argument. case 47: // '/' - return readToken_47(code); + return readToken_slash(code); case 37: case 42: // '%*' - return readToken_37_42(); + return readToken_mult_modulo(); case 124: case 38: // '|&' - return readToken_124_38(code); + return readToken_pipe_amp(code); case 94: // '^' - return readToken_94(); + return readToken_caret(); case 43: case 45: // '+-' - return readToken_43_45(code); + return readToken_plus_min(code); case 60: case 62: // '<>' - return readToken_60_62(code); + return readToken_lt_gt(code); case 61: case 33: // '=!' - return readToken_61_33(code); + return readToken_eq_excl(code); case 126: // '~' return finishOp(_prefix, 1); diff --git a/index.html b/index.html index 14bbb8685e..8a4419bc47 100644 --- a/index.html +++ b/index.html @@ -296,22 +296,69 @@ into it.

All in the name of speed.

The forceRegexp parameter is used in the one case where the -tokRegexpAllowed trick does not work. See parseStatement.

  function readToken(forceRegexp) {
-    tokStart = tokPos;
-    if (options.locations) tokStartLoc = curLineLoc();
-    tokCommentsBefore = tokComments;
-    if (forceRegexp) return readRegexp();
-    if (tokPos >= inputLen) return finishToken(_eof);
-
-    var code = input.charCodeAt(tokPos);

Identifier or keyword. '\uXXXX' sequences are allowed in -identifiers, so '\' also dispatches to that.

    if (isIdentifierStart(code) || code === 92 /* '\' */) return readWord();
+tokRegexpAllowed trick does not work. See parseStatement.

  function readToken_dot(code) {
     var next = input.charCodeAt(tokPos+1);
+    if (next >= 48 && next <= 57) return readNumber(String.fromCharCode(code));
+    ++tokPos;
+    return finishToken(_dot);
+  }
 
-    switch(code) {

The interpretation of a dot depends on whether it is followed + function readToken_slash() { // '/' + var next = input.charCodeAt(tokPos+1); + if (tokRegexpAllowed) {++tokPos; return readRegexp();} + if (next === 61) return finishOp(_assign, 2); + return finishOp(_slash, 1); + } + + function readToken_mult_modulo() { // '%*' + var next = input.charCodeAt(tokPos+1); + if (next === 61) return finishOp(_assign, 2); + return finishOp(_bin10, 1); + } + + function readToken_pipe_amp(code) { // '|&' + var next = input.charCodeAt(tokPos+1); + if (next === code) return finishOp(code === 124 ? _bin1 : _bin2, 2); + if (next === 61) return finishOp(_assign, 2); + return finishOp(code === 124 ? _bin3 : _bin5, 1); + } + + function readToken_caret() { // '^' + var next = input.charCodeAt(tokPos+1); + if (next === 61) return finishOp(_assign, 2); + return finishOp(_bin4, 1); + } + + function readToken_plus_min(code) { // '+-' + var next = input.charCodeAt(tokPos+1); + if (next === code) return finishOp(_incdec, 2); + if (next === 61) return finishOp(_assign, 2); + return finishOp(_plusmin, 1); + } + + function readToken_lt_gt(code) { // '<>' + var next = input.charCodeAt(tokPos+1); + var size = 1; + if (next === code) { + size = code === 62 && input.charCodeAt(tokPos+2) === 62 ? 3 : 2; + if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1); + return finishOp(_bin8, size); + } + if (next === 61) + size = input.charCodeAt(tokPos+2) === 61 ? 3 : 2; + return finishOp(_bin7, size); + } + + function readToken_eq_excl(code) { // '=!' + var next = input.charCodeAt(tokPos+1); + if (next === 61) return finishOp(_bin6, input.charCodeAt(tokPos+2) === 61 ? 3 : 2); + return finishOp(code === 61 ? _eq : _prefix, 1); + } + + function getTokenFromCode(code) { + switch(code) {

The interpretation of a dot depends on whether it is followed by a digit.

    case 46: // '.'
-      if (next >= 48 && next <= 57) return readNumber(String.fromCharCode(code));
-      ++tokPos;
-      return finishToken(_dot);

Punctuation tokens.

    case 40: ++tokPos; return finishToken(_parenL);
+      return readToken_dot(code);

Punctuation tokens.

    case 40: ++tokPos; return finishToken(_parenL);
     case 41: ++tokPos; return finishToken(_parenR);
     case 59: ++tokPos; return finishToken(_semi);
     case 44: ++tokPos; return finishToken(_comma);
@@ -320,57 +367,60 @@ by a digit.

case 123: ++tokPos; return finishToken(_braceL); case 125: ++tokPos; return finishToken(_braceR); case 58: ++tokPos; return finishToken(_colon); - case 63: ++tokPos; return finishToken(_question);

'0x' is a hexadecimal number.

    case 48: // '0'
-      if (next === 120 || next === 88) return readHexNumber();

Anything else beginning with a digit is an integer, octal + case 63: ++tokPos; return finishToken(_question);

'0x' is a hexadecimal number.

    case 48: // '0'
+      var next = input.charCodeAt(tokPos+1);
+      if (next === 120 || next === 88) return readHexNumber();

Anything else beginning with a digit is an integer, octal number, or float.

    case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9
-      return readNumber(String.fromCharCode(code));

Quotes produce strings.

    case 34: case 39: // '"', "'"
-      return readString(code);

Operators are parsed inline in tiny state machines. '=' (61) is + return readNumber(String.fromCharCode(code));

Quotes produce strings.

    case 34: case 39: // '"', "'"
+      return readString(code);

Operators are parsed inline in tiny state machines. '=' (61) is often referred to. finishOp simply skips the amount of characters it is given as second argument, and returns a token of the type given by its first argument.

    case 47: // '/'
-      if (tokRegexpAllowed) {++tokPos; return readRegexp();}
-      if (next === 61) return finishOp(_assign, 2);
-      return finishOp(_slash, 1);
+      return readToken_slash(code);
 
     case 37: case 42: // '%*'
-      if (next === 61) return finishOp(_assign, 2);
-      return finishOp(_bin10, 1);
+      return readToken_mult_modulo();
 
     case 124: case 38: // '|&'
-      if (next === code) return finishOp(code === 124 ? _bin1 : _bin2, 2);
-      if (next === 61) return finishOp(_assign, 2);
-      return finishOp(code === 124 ? _bin3 : _bin5, 1);
+      return readToken_pipe_amp(code);
 
     case 94: // '^'
-      if (next === 61) return finishOp(_assign, 2);
-      return finishOp(_bin4, 1);
+      return readToken_caret();
 
     case 43: case 45: // '+-'
-      if (next === code) return finishOp(_incdec, 2);
-      if (next === 61) return finishOp(_assign, 2);
-      return finishOp(_plusmin, 1);
+      return readToken_plus_min(code);
 
     case 60: case 62: // '<>'
-      var size = 1;
-      if (next === code) {
-        size = code === 62 && input.charCodeAt(tokPos+2) === 62 ? 3 : 2;
-        if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1);
-        return finishOp(_bin8, size);
-      }
-      if (next === 61)
-        size = input.charCodeAt(tokPos+2) === 61 ? 3 : 2;
-      return finishOp(_bin7, size);
+      return readToken_lt_gt(code);
 
     case 61: case 33: // '=!'
-      if (next === 61) return finishOp(_bin6, input.charCodeAt(tokPos+2) === 61 ? 3 : 2);
-      return finishOp(code === 61 ? _eq : _prefix, 1);
+      return readToken_eq_excl(code);
 
     case 126: // '~'
       return finishOp(_prefix, 1);
-    }

If we are here, we either found a non-ASCII identifier -character, or something that's entirely disallowed.

    var ch = String.fromCharCode(code);
-    if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord();
-    raise(tokPos, "Unexpected character '" + ch + "'");
+    }
+
+    return false;
+  }
+
+  function readToken(forceRegexp) {
+    tokStart = tokPos;
+    if (options.locations) tokStartLoc = curLineLoc();
+    tokCommentsBefore = tokComments;
+    if (forceRegexp) return readRegexp();
+    if (tokPos >= inputLen) return finishToken(_eof);
+
+    var code = input.charCodeAt(tokPos);

Identifier or keyword. '\uXXXX' sequences are allowed in +identifiers, so '\' also dispatches to that.

    if (isIdentifierStart(code) || code === 92 /* '\' */) return readWord();
+    
+    var tok = getTokenFromCode(code);
+
+    if(tok === false) {

If we are here, we either found a non-ASCII identifier +character, or something that's entirely disallowed.

      var ch = String.fromCharCode(code);
+      if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord();
+      raise(tokPos, "Unexpected character '" + ch + "'");
+    } 
+    return tok;
   }
 
   function finishOp(type, size) {