Update parser whitespace for clarity (#8539)
* Update parser whitespace for clarity Has the nice benefit of not requiring a charCode to String conversion. * Remove test262 exceptions Per https://github.com/tc39/ecma262/pull/1218#issuecomment-395340891, ECMA262 follows whatever the latest Unicode version specifies for Zs Space_Separator category. MONGOLIAN VOWEL SEPARATOR was moved to the Cf Other_Format category in Unicode 8.
This commit is contained in:
parent
595240f071
commit
5899940156
@ -16,7 +16,7 @@ import {
|
||||
lineBreak,
|
||||
lineBreakG,
|
||||
isNewLine,
|
||||
nonASCIIwhitespace,
|
||||
isWhitespace,
|
||||
} from "../util/whitespace";
|
||||
import State from "./state";
|
||||
|
||||
@ -331,11 +331,6 @@ export default class Tokenizer extends LocationParser {
|
||||
loop: while (this.state.pos < this.input.length) {
|
||||
const ch = this.input.charCodeAt(this.state.pos);
|
||||
switch (ch) {
|
||||
case charCodes.space:
|
||||
case charCodes.nonBreakingSpace:
|
||||
++this.state.pos;
|
||||
break;
|
||||
|
||||
case charCodes.carriageReturn:
|
||||
if (
|
||||
this.input.charCodeAt(this.state.pos + 1) === charCodes.lineFeed
|
||||
@ -367,11 +362,7 @@ export default class Tokenizer extends LocationParser {
|
||||
break;
|
||||
|
||||
default:
|
||||
if (
|
||||
(ch > charCodes.backSpace && ch < charCodes.shiftOut) ||
|
||||
(ch >= charCodes.oghamSpaceMark &&
|
||||
nonASCIIwhitespace.test(String.fromCharCode(ch)))
|
||||
) {
|
||||
if (isWhitespace(ch)) {
|
||||
++this.state.pos;
|
||||
} else {
|
||||
break loop;
|
||||
|
||||
@ -1,13 +1,53 @@
|
||||
// @flow
|
||||
|
||||
import * as charCodes from "charcodes";
|
||||
|
||||
// Matches a whole line break (where CRLF is considered a single
|
||||
// line break). Used to count lines.
|
||||
|
||||
export const lineBreak = /\r\n?|\n|\u2028|\u2029/;
|
||||
export const lineBreakG = new RegExp(lineBreak.source, "g");
|
||||
|
||||
// https://tc39.github.io/ecma262/#sec-line-terminators
|
||||
export function isNewLine(code: number): boolean {
|
||||
return code === 10 || code === 13 || code === 0x2028 || code === 0x2029;
|
||||
switch (code) {
|
||||
case charCodes.lineFeed:
|
||||
case charCodes.carriageReturn:
|
||||
case charCodes.lineSeparator:
|
||||
case charCodes.paragraphSeparator:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export const nonASCIIwhitespace = /[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]/;
|
||||
// https://tc39.github.io/ecma262/#sec-white-space
|
||||
export function isWhitespace(code: number): boolean {
|
||||
switch (code) {
|
||||
case 0x0009: // CHARACTER TABULATION
|
||||
case 0x000b: // LINE TABULATION
|
||||
case 0x000c: // FORM FEED
|
||||
case charCodes.space:
|
||||
case charCodes.nonBreakingSpace:
|
||||
case charCodes.oghamSpaceMark:
|
||||
case 0x2000: // EN QUAD
|
||||
case 0x2001: // EM QUAD
|
||||
case 0x2002: // EN SPACE
|
||||
case 0x2003: // EM SPACE
|
||||
case 0x2004: // THREE-PER-EM SPACE
|
||||
case 0x2005: // FOUR-PER-EM SPACE
|
||||
case 0x2006: // SIX-PER-EM SPACE
|
||||
case 0x2007: // FIGURE SPACE
|
||||
case 0x2008: // PUNCTUATION SPACE
|
||||
case 0x2009: // THIN SPACE
|
||||
case 0x200a: // HAIR SPACE
|
||||
case 0x202f: // NARROW NO-BREAK SPACE
|
||||
case 0x205f: // MEDIUM MATHEMATICAL SPACE
|
||||
case 0x3000: // IDEOGRAPHIC SPACE
|
||||
case 0xfeff: // ZERO WIDTH NO-BREAK SPACE
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -683,8 +683,6 @@ language/statements/while/labelled-fn-stmt.js(default)
|
||||
language/statements/with/decl-async-fun.js(default)
|
||||
language/statements/with/decl-async-gen.js(default)
|
||||
language/statements/with/labelled-fn-stmt.js(default)
|
||||
language/white-space/mongolian-vowel-separator.js(default)
|
||||
language/white-space/mongolian-vowel-separator.js(strict mode)
|
||||
|
||||
language/identifiers/unicode-escape-nls-err.js(default)
|
||||
language/identifiers/unicode-escape-nls-err.js(strict mode)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user