diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index 2043832b1c..860762f688 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -16,7 +16,7 @@ import { lineBreak, lineBreakG, isNewLine, - nonASCIIwhitespace, + isWhitespace, } from "../util/whitespace"; import State from "./state"; @@ -331,11 +331,6 @@ export default class Tokenizer extends LocationParser { loop: while (this.state.pos < this.input.length) { const ch = this.input.charCodeAt(this.state.pos); switch (ch) { - case charCodes.space: - case charCodes.nonBreakingSpace: - ++this.state.pos; - break; - case charCodes.carriageReturn: if ( this.input.charCodeAt(this.state.pos + 1) === charCodes.lineFeed @@ -367,11 +362,7 @@ export default class Tokenizer extends LocationParser { break; default: - if ( - (ch > charCodes.backSpace && ch < charCodes.shiftOut) || - (ch >= charCodes.oghamSpaceMark && - nonASCIIwhitespace.test(String.fromCharCode(ch))) - ) { + if (isWhitespace(ch)) { ++this.state.pos; } else { break loop; diff --git a/packages/babel-parser/src/util/whitespace.js b/packages/babel-parser/src/util/whitespace.js index 0588213ecd..656e772fb4 100644 --- a/packages/babel-parser/src/util/whitespace.js +++ b/packages/babel-parser/src/util/whitespace.js @@ -1,13 +1,53 @@ // @flow +import * as charCodes from "charcodes"; + // Matches a whole line break (where CRLF is considered a single // line break). Used to count lines. - export const lineBreak = /\r\n?|\n|\u2028|\u2029/; export const lineBreakG = new RegExp(lineBreak.source, "g"); +// https://tc39.github.io/ecma262/#sec-line-terminators export function isNewLine(code: number): boolean { - return code === 10 || code === 13 || code === 0x2028 || code === 0x2029; + switch (code) { + case charCodes.lineFeed: + case charCodes.carriageReturn: + case charCodes.lineSeparator: + case charCodes.paragraphSeparator: + return true; + + default: + return false; + } } -export const nonASCIIwhitespace = /[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]/; +// https://tc39.github.io/ecma262/#sec-white-space +export function isWhitespace(code: number): boolean { + switch (code) { + case 0x0009: // CHARACTER TABULATION + case 0x000b: // LINE TABULATION + case 0x000c: // FORM FEED + case charCodes.space: + case charCodes.nonBreakingSpace: + case charCodes.oghamSpaceMark: + case 0x2000: // EN QUAD + case 0x2001: // EM QUAD + case 0x2002: // EN SPACE + case 0x2003: // EM SPACE + case 0x2004: // THREE-PER-EM SPACE + case 0x2005: // FOUR-PER-EM SPACE + case 0x2006: // SIX-PER-EM SPACE + case 0x2007: // FIGURE SPACE + case 0x2008: // PUNCTUATION SPACE + case 0x2009: // THIN SPACE + case 0x200a: // HAIR SPACE + case 0x202f: // NARROW NO-BREAK SPACE + case 0x205f: // MEDIUM MATHEMATICAL SPACE + case 0x3000: // IDEOGRAPHIC SPACE + case 0xfeff: // ZERO WIDTH NO-BREAK SPACE + return true; + + default: + return false; + } +} diff --git a/scripts/tests/test262/test262_whitelist.txt b/scripts/tests/test262/test262_whitelist.txt index 8d34948baa..ec84454aec 100644 --- a/scripts/tests/test262/test262_whitelist.txt +++ b/scripts/tests/test262/test262_whitelist.txt @@ -683,8 +683,6 @@ language/statements/while/labelled-fn-stmt.js(default) language/statements/with/decl-async-fun.js(default) language/statements/with/decl-async-gen.js(default) language/statements/with/labelled-fn-stmt.js(default) -language/white-space/mongolian-vowel-separator.js(default) -language/white-space/mongolian-vowel-separator.js(strict mode) language/identifiers/unicode-escape-nls-err.js(default) language/identifiers/unicode-escape-nls-err.js(strict mode)