src/tokenizer/index.js
TokContext
and types
are imported from ./context
and used to define the token contexts and tokens.
...
import { types as tt, keywords as keywordTypes, type TokenType } from "./types";
import { type TokContext, types as ct } from "./context";
import ParserErrors, { Errors } from "../parser/error";
...
import State from "./state";
...
The Token
class
Here we define the Token
class which is used to create a token object from the current state.
export class Token {
constructor(state: State) {
this.type = state.type;
this.value = state.value;
this.start = state.start;
this.end = state.end;
this.loc = new SourceLocation(state.startLoc, state.endLoc);
}
type: TokenType;
value: any;
start: number;
end: number;
loc: SourceLocation;
}
The Tokenizer
class
The Tokenizer
class extends ParserErrors
and is used to tokenize the input code.
export default class Tokenizer extends ParserErrors {
isLookahead: boolean;
// Token store.
tokens: Array<Token | N.Comment> = [];
constructor(options: Options, input: string) {
super();
this.state = new State();
this.state.init(options);
this.input = input;
this.length = input.length;
this.isLookahead = false;
}
pushToken(token: Token | N.Comment) {
// Pop out invalid tokens trapped by try-catch parsing.
// Those parsing branches are mainly created by typescript and flow plugins.
this.tokens.length = this.state.tokensLength;
this.tokens.push(token);
++this.state.tokensLength;
}
// Move to the next token
next(): void {
if (!this.isLookahead) {
this.checkKeywordEscapes();
if (this.options.tokens) {
this.pushToken(new Token(this.state));
}
}
this.state.lastTokEnd = this.state.end;
this.state.lastTokStart = this.state.start;
this.state.lastTokEndLoc = this.state.endLoc;
this.state.lastTokStartLoc = this.state.startLoc;
this.nextToken();
}
match(type: TokenType): boolean {
return this.state.type === type;
}
lookahead(): State {
const old = this.state;
this.state = old.clone(true);
this.isLookahead = true;
this.next();
this.isLookahead = false;
const curr = this.state;
this.state = old;
return curr;
}
nextTokenStart(): number {
return this.nextTokenStartSince(this.state.pos);
}
The curContextmethod Inside the
Tokenizer` class
Inside the Tokenizer
class, we define the curContext
method which returns the current context of the parser.
curContext(): TokContext {
return this.state.context[this.state.context.length - 1];
}
The nextToken
method
The nextToken
method reads a single token, updating the parser object’s token-related
properties.
nextToken(): void {
const curContext = this.curContext();
if (!curContext?.preserveSpace) this.skipSpace();
this.state.octalPositions = [];
this.state.start = this.state.pos;
this.state.startLoc = this.state.curPosition();
if (this.state.pos >= this.length) {
this.finishToken(tt.eof);
return;
}
const override = curContext?.override; // Override the next token type
if (override) {
override(this);
} else {
this.getTokenFromCode(this.input.codePointAt(this.state.pos));
}
}
Methods Dealing with Comments and Spaces
Follow a set of methods dealing with comments and spaces
pushComment(
block: boolean,
text: string,
start: number,
end: number,
startLoc: Position,
endLoc: Position,
): void {
const comment = {
type: block ? "CommentBlock" : "CommentLine",
value: text,
start: start,
end: end,
loc: new SourceLocation(startLoc, endLoc),
};
if (this.options.tokens) this.pushToken(comment);
this.state.comments.push(comment);
this.addComment(comment);
}
skipBlockComment(): void {
const startLoc = this.state.curPosition();
const start = this.state.pos;
const end = this.input.indexOf("*/", this.state.pos + 2);
if (end === -1) throw this.raise(start, Errors.UnterminatedComment);
this.state.pos = end + 2;
lineBreakG.lastIndex = start;
let match;
while (
(match = lineBreakG.exec(this.input)) &&
match.index < this.state.pos
) {
++this.state.curLine;
this.state.lineStart = match.index + match[0].length;
}
// If we are doing a lookahead right now we need to advance the position (above code)
// but we do not want to push the comment to the state.
if (this.isLookahead) return;
this.pushComment(
true,
this.input.slice(start + 2, end),
start,
this.state.pos,
startLoc,
this.state.curPosition(),
);
}
skipLineComment(startSkip: number): void {
const start = this.state.pos;
const startLoc = this.state.curPosition();
let ch = this.input.charCodeAt((this.state.pos += startSkip));
if (this.state.pos < this.length) {
while (!isNewLine(ch) && ++this.state.pos < this.length) {
ch = this.input.charCodeAt(this.state.pos);
}
}
// If we are doing a lookahead right now we need to advance the position (above code)
// but we do not want to push the comment to the state.
if (this.isLookahead) return;
this.pushComment(
false,
this.input.slice(start + startSkip, this.state.pos),
start,
this.state.pos,
startLoc,
this.state.curPosition(),
);
}
// Called at the start of the parse and after every token. Skips
// whitespace and comments, and.
skipSpace(): void {
loop: while (this.state.pos < this.length) {
const ch = this.input.charCodeAt(this.state.pos);
switch (ch) {
case charCodes.space:
case charCodes.nonBreakingSpace:
case charCodes.tab:
++this.state.pos;
break;
case charCodes.carriageReturn:
if (
this.input.charCodeAt(this.state.pos + 1) === charCodes.lineFeed
) {
++this.state.pos;
}
// fall through
case charCodes.lineFeed:
case charCodes.lineSeparator:
case charCodes.paragraphSeparator:
++this.state.pos;
++this.state.curLine;
this.state.lineStart = this.state.pos;
break;
case charCodes.slash:
switch (this.input.charCodeAt(this.state.pos + 1)) {
case charCodes.asterisk:
this.skipBlockComment();
break;
case charCodes.slash:
this.skipLineComment(2);
break;
default:
break loop;
}
break;
default:
if (isWhitespace(ch)) {
++this.state.pos;
} else {
break loop;
}
}
}
}
The finishToken
method
The finishToken
method is called at the end of every token. It sets end
, val
, and maintains context
and exprAllowed
, and skips the space after the token, so that the next one’s start
will point at the right position.
finishToken(type: TokenType, val: any): void {
this.state.end = this.state.pos;
this.state.endLoc = this.state.curPosition();
const prevType = this.state.type;
this.state.type = type;
this.state.value = val;
if (!this.isLookahead) this.updateContext(prevType);
}
The readToken_*
family of methods
Follow the readToken_*
family of methods.
These are the functions that are called to fetch the next token. They
are somewhat obscure, because they works in character codes rather
than characters, and because operator parsing has been inlined
into it. All in the name of speed.
readToken_numberSign
// number sign is "#"
readToken_numberSign(): void {
if (this.state.pos === 0 && this.readToken_interpreter()) {
return;
}
const nextPos = this.state.pos + 1;
const next = this.input.charCodeAt(nextPos);
if (next >= charCodes.digit0 && next <= charCodes.digit9) {
throw this.raise(this.state.pos, Errors.UnexpectedDigitAfterHash);
}
if (
next === charCodes.leftCurlyBrace ||
(next === charCodes.leftSquareBracket && this.hasPlugin("recordAndTuple"))
) {
// When we see `#{`, it is likely to be a hash record.
// However we don't yell at `#[` since users may intend to use "computed private fields",
// which is not allowed in the spec. Throwing expecting recordAndTuple is
// misleading
this.expectPlugin("recordAndTuple");
if (this.getPluginOption("recordAndTuple", "syntaxType") !== "hash") {
throw this.raise(
this.state.pos,
next === charCodes.leftCurlyBrace
? Errors.RecordExpressionHashIncorrectStartSyntaxType
: Errors.TupleExpressionHashIncorrectStartSyntaxType,
);
}
if (next === charCodes.leftCurlyBrace) {
// #{
this.finishToken(tt.braceHashL);
} else {
// #[
this.finishToken(tt.bracketHashL);
}
this.state.pos += 2;
} else {
this.finishOp(tt.hash, 1);
}
}
readToken_dot
readToken_dot(): void {
const next = this.input.charCodeAt(this.state.pos + 1);
if (next >= charCodes.digit0 && next <= charCodes.digit9) {
this.readNumber(true);
return;
}
if (
next === charCodes.dot &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.dot
) {
this.state.pos += 3;
this.finishToken(tt.ellipsis);
} else {
++this.state.pos;
this.finishToken(tt.dot);
}
}
readToken_slash
readToken_slash(): void {
// '/'
if (this.state.exprAllowed && !this.state.inType) {
++this.state.pos;
this.readRegexp();
return;
}
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === charCodes.equalsTo) {
this.finishOp(tt.assign, 2);
} else {
this.finishOp(tt.slash, 1);
}
}
readToken_interpreter
readToken_interpreter(): boolean {
if (this.state.pos !== 0 || this.length < 2) return false;
let ch = this.input.charCodeAt(this.state.pos + 1);
if (ch !== charCodes.exclamationMark) return false;
const start = this.state.pos;
this.state.pos += 1;
while (!isNewLine(ch) && ++this.state.pos < this.length) {
ch = this.input.charCodeAt(this.state.pos);
}
const value = this.input.slice(start + 2, this.state.pos);
this.finishToken(tt.interpreterDirective, value);
return true;
}
readToken_mult_modulo
readToken_mult_modulo(code: number): void {
// '%*'
let type = code === charCodes.asterisk ? tt.star : tt.modulo;
let width = 1;
let next = this.input.charCodeAt(this.state.pos + 1);
const exprAllowed = this.state.exprAllowed;
// Exponentiation operator **
if (code === charCodes.asterisk && next === charCodes.asterisk) {
width++;
next = this.input.charCodeAt(this.state.pos + 2);
type = tt.exponent;
}
if (next === charCodes.equalsTo && !exprAllowed) {
width++;
type = tt.assign;
}
this.finishOp(type, width);
}
readToken_pipe_amp
readToken_pipe_amp(code: number): void {
// '||' '&&' '||=' '&&='
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === code) {
if (this.input.charCodeAt(this.state.pos + 2) === charCodes.equalsTo) {
this.finishOp(tt.assign, 3);
} else {
this.finishOp(
code === charCodes.verticalBar ? tt.logicalOR : tt.logicalAND,
2,
);
}
return;
}
if (code === charCodes.verticalBar) {
// '|>'
if (next === charCodes.greaterThan) {
this.finishOp(tt.pipeline, 2);
return;
}
// '|}'
if (
this.hasPlugin("recordAndTuple") &&
next === charCodes.rightCurlyBrace
) {
if (this.getPluginOption("recordAndTuple", "syntaxType") !== "bar") {
throw this.raise(
this.state.pos,
Errors.RecordExpressionBarIncorrectEndSyntaxType,
);
}
this.finishOp(tt.braceBarR, 2);
return;
}
// '|]'
if (
this.hasPlugin("recordAndTuple") &&
next === charCodes.rightSquareBracket
) {
if (this.getPluginOption("recordAndTuple", "syntaxType") !== "bar") {
throw this.raise(
this.state.pos,
Errors.TupleExpressionBarIncorrectEndSyntaxType,
);
}
this.finishOp(tt.bracketBarR, 2);
return;
}
}
if (next === charCodes.equalsTo) {
this.finishOp(tt.assign, 2);
return;
}
this.finishOp(
code === charCodes.verticalBar ? tt.bitwiseOR : tt.bitwiseAND,
1,
);
}
readToken_caret
readToken_caret(): void {
// '^'
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === charCodes.equalsTo) {
this.finishOp(tt.assign, 2);
} else {
this.finishOp(tt.bitwiseXOR, 1);
}
}
readToken_plus_min
readToken_plus_min(code: number): void {
// '+-'
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === code) {
if (
next === charCodes.dash &&
!this.inModule &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan &&
(this.state.lastTokEnd === 0 ||
lineBreak.test(
this.input.slice(this.state.lastTokEnd, this.state.pos),
))
) {
// A `-->` line comment
this.skipLineComment(3);
this.skipSpace();
this.nextToken();
return;
}
this.finishOp(tt.incDec, 2);
return;
}
if (next === charCodes.equalsTo) {
this.finishOp(tt.assign, 2);
} else {
this.finishOp(tt.plusMin, 1);
}
}
readToken_lt_gt
readToken_lt_gt(code: number): void {
// '<>'
const next = this.input.charCodeAt(this.state.pos + 1);
let size = 1;
if (next === code) {
size =
code === charCodes.greaterThan &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan
? 3
: 2;
if (this.input.charCodeAt(this.state.pos + size) === charCodes.equalsTo) {
this.finishOp(tt.assign, size + 1);
return;
}
this.finishOp(tt.bitShift, size);
return;
}
if (
next === charCodes.exclamationMark &&
code === charCodes.lessThan &&
!this.inModule &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.dash &&
this.input.charCodeAt(this.state.pos + 3) === charCodes.dash
) {
// `<!--`, an XML-style comment that should be interpreted as a line comment
this.skipLineComment(4);
this.skipSpace();
this.nextToken();
return;
}
if (next === charCodes.equalsTo) {
// <= | >=
size = 2;
}
this.finishOp(tt.relational, size);
}
readToken_eq_excl
readToken_eq_excl(code: number): void {
// '=!'
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === charCodes.equalsTo) {
this.finishOp(
tt.equality,
this.input.charCodeAt(this.state.pos + 2) === charCodes.equalsTo
? 3
: 2,
);
return;
}
if (code === charCodes.equalsTo && next === charCodes.greaterThan) {
// '=>'
this.state.pos += 2;
this.finishToken(tt.arrow);
return;
}
this.finishOp(code === charCodes.equalsTo ? tt.eq : tt.bang, 1);
}
readToken_question
readToken_question(): void {
// '?'
const next = this.input.charCodeAt(this.state.pos + 1);
const next2 = this.input.charCodeAt(this.state.pos + 2);
if (next === charCodes.questionMark && !this.state.inType) {
if (next2 === charCodes.equalsTo) {
// '??='
this.finishOp(tt.assign, 3);
} else {
// '??'
this.finishOp(tt.nullishCoalescing, 2);
}
} else if (
next === charCodes.dot &&
!(next2 >= charCodes.digit0 && next2 <= charCodes.digit9)
) {
// '.' not followed by a number
this.state.pos += 2;
this.finishToken(tt.questionDot);
} else {
++this.state.pos;
this.finishToken(tt.question);
}
}
getTokenFromCode
getTokenFromCode(code: number): void {
switch (code) {
// The interpretation of a dot depends on whether it is followed
// by a digit or another two dots.
case charCodes.dot:
this.readToken_dot();
return;
// Punctuation tokens.
case charCodes.leftParenthesis:
++this.state.pos;
this.finishToken(tt.parenL);
return;
case charCodes.rightParenthesis:
++this.state.pos;
this.finishToken(tt.parenR);
return;
case charCodes.semicolon:
++this.state.pos;
this.finishToken(tt.semi);
return;
case charCodes.comma:
++this.state.pos;
this.finishToken(tt.comma);
return;
case charCodes.leftSquareBracket:
if (
this.hasPlugin("recordAndTuple") &&
this.input.charCodeAt(this.state.pos + 1) === charCodes.verticalBar
) {
if (this.getPluginOption("recordAndTuple", "syntaxType") !== "bar") {
throw this.raise(
this.state.pos,
Errors.TupleExpressionBarIncorrectStartSyntaxType,
);
}
// [|
this.finishToken(tt.bracketBarL);
this.state.pos += 2;
} else {
++this.state.pos;
this.finishToken(tt.bracketL);
}
return;
case charCodes.rightSquareBracket:
++this.state.pos;
this.finishToken(tt.bracketR);
return;
case charCodes.leftCurlyBrace:
if (
this.hasPlugin("recordAndTuple") &&
this.input.charCodeAt(this.state.pos + 1) === charCodes.verticalBar
) {
if (this.getPluginOption("recordAndTuple", "syntaxType") !== "bar") {
throw this.raise(
this.state.pos,
Errors.RecordExpressionBarIncorrectStartSyntaxType,
);
}
// {|
this.finishToken(tt.braceBarL);
this.state.pos += 2;
} else {
++this.state.pos;
this.finishToken(tt.braceL);
}
return;
case charCodes.rightCurlyBrace:
++this.state.pos;
this.finishToken(tt.braceR);
return;
case charCodes.colon:
if (
this.hasPlugin("functionBind") &&
this.input.charCodeAt(this.state.pos + 1) === charCodes.colon
) {
this.finishOp(tt.doubleColon, 2);
} else {
++this.state.pos;
this.finishToken(tt.colon);
}
return;
case charCodes.questionMark:
this.readToken_question();
return;
case charCodes.graveAccent:
++this.state.pos;
this.finishToken(tt.backQuote);
return;
case charCodes.digit0: {
const next = this.input.charCodeAt(this.state.pos + 1);
// '0x', '0X' - hex number
if (next === charCodes.lowercaseX || next === charCodes.uppercaseX) {
this.readRadixNumber(16);
return;
}
// '0o', '0O' - octal number
if (next === charCodes.lowercaseO || next === charCodes.uppercaseO) {
this.readRadixNumber(8);
return;
}
// '0b', '0B' - binary number
if (next === charCodes.lowercaseB || next === charCodes.uppercaseB) {
this.readRadixNumber(2);
return;
}
}
// Anything else beginning with a digit is an integer, octal
// number, or float. (fall through)
case charCodes.digit1:
case charCodes.digit2:
case charCodes.digit3:
case charCodes.digit4:
case charCodes.digit5:
case charCodes.digit6:
case charCodes.digit7:
case charCodes.digit8:
case charCodes.digit9:
this.readNumber(false);
return;
// Quotes produce strings.
case charCodes.quotationMark:
case charCodes.apostrophe:
this.readString(code);
return;
// Operators are parsed inline in tiny state machines. '=' (charCodes.equalsTo) is
// often referred to. `finishOp` simply skips the amount of
// characters it is given as second argument, and returns a token
// of the type given by its first argument.
case charCodes.slash:
this.readToken_slash();
return;
case charCodes.percentSign:
case charCodes.asterisk:
this.readToken_mult_modulo(code);
return;
case charCodes.verticalBar:
case charCodes.ampersand:
this.readToken_pipe_amp(code);
return;
case charCodes.caret:
this.readToken_caret();
return;
case charCodes.plusSign:
case charCodes.dash:
this.readToken_plus_min(code);
return;
case charCodes.lessThan:
case charCodes.greaterThan:
this.readToken_lt_gt(code);
return;
case charCodes.equalsTo:
case charCodes.exclamationMark:
this.readToken_eq_excl(code);
return;
case charCodes.tilde:
this.finishOp(tt.tilde, 1);
return;
case charCodes.atSign:
++this.state.pos;
this.finishToken(tt.at);
return;
case charCodes.numberSign:
this.readToken_numberSign();
return;
case charCodes.backslash:
this.readWord();
return;
default:
if (isIdentifierStart(code)) {
this.readWord();
return;
}
}
throw this.raise(
this.state.pos,
Errors.InvalidOrUnexpectedToken,
String.fromCodePoint(code),
);
}
finishOp
finishOp(type: TokenType, size: number): void {
const str = this.input.slice(this.state.pos, this.state.pos + size);
this.state.pos += size;
this.finishToken(type, str);
}
readRegexp
readRegexp(): void {
const start = this.state.pos;
let escaped, inClass;
for (;;) {
if (this.state.pos >= this.length) {
throw this.raise(start, Errors.UnterminatedRegExp);
}
const ch = this.input.charAt(this.state.pos);
if (lineBreak.test(ch)) {
throw this.raise(start, Errors.UnterminatedRegExp);
}
if (escaped) {
escaped = false;
} else {
if (ch === "[") {
inClass = true;
} else if (ch === "]" && inClass) {
inClass = false;
} else if (ch === "/" && !inClass) {
break;
}
escaped = ch === "\\";
}
++this.state.pos;
}
const content = this.input.slice(start, this.state.pos);
++this.state.pos;
let mods = "";
while (this.state.pos < this.length) {
const char = this.input[this.state.pos];
const charCode = this.input.codePointAt(this.state.pos);
if (VALID_REGEX_FLAGS.has(char)) {
if (mods.indexOf(char) > -1) {
this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags);
}
} else if (
isIdentifierChar(charCode) ||
charCode === charCodes.backslash
) {
this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags);
} else {
break;
}
++this.state.pos;
mods += char;
}
this.finishToken(tt.regexp, {
pattern: content,
flags: mods,
});
}
readInt
Read an integer in the given radix. Return null if zero digits
were read, the integer value otherwise. When len
is given, this
will return null
unless the integer has exactly len
digits.
When forceLen
is true
, it means that we already know that in case
of a malformed number we have to skip len
characters anyway, instead
of bailing out early. For example, in “\u{123Z}” we want to read up to }
anyway, while in “\u00Z” we will stop at Z instead of consuming four
characters (and thus the closing quote).
readInt(
radix: number,
len?: number,
forceLen?: boolean,
allowNumSeparator: boolean = true,
): number | null {
const start = this.state.pos;
const forbiddenSiblings =
radix === 16
? forbiddenNumericSeparatorSiblings.hex
: forbiddenNumericSeparatorSiblings.decBinOct;
const allowedSiblings =
radix === 16
? allowedNumericSeparatorSiblings.hex
: radix === 10
? allowedNumericSeparatorSiblings.dec
: radix === 8
? allowedNumericSeparatorSiblings.oct
: allowedNumericSeparatorSiblings.bin;
let invalid = false;
let total = 0;
for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) {
const code = this.input.charCodeAt(this.state.pos);
let val;
if (this.hasPlugin("numericSeparator")) {
if (code === charCodes.underscore) {
const prev = this.input.charCodeAt(this.state.pos - 1);
const next = this.input.charCodeAt(this.state.pos + 1);
if (allowedSiblings.indexOf(next) === -1) {
this.raise(this.state.pos, Errors.UnexpectedNumericSeparator);
} else if (
forbiddenSiblings.indexOf(prev) > -1 ||
forbiddenSiblings.indexOf(next) > -1 ||
Number.isNaN(next)
) {
this.raise(this.state.pos, Errors.UnexpectedNumericSeparator);
}
if (!allowNumSeparator) {
this.raise(this.state.pos, Errors.NumericSeparatorInEscapeSequence);
}
// Ignore this _ character
++this.state.pos;
continue;
}
}
if (code >= charCodes.lowercaseA) {
val = code - charCodes.lowercaseA + charCodes.lineFeed;
} else if (code >= charCodes.uppercaseA) {
val = code - charCodes.uppercaseA + charCodes.lineFeed;
} else if (charCodes.isDigit(code)) {
val = code - charCodes.digit0; // 0-9
} else {
val = Infinity;
}
if (val >= radix) {
// If we are in "errorRecovery" mode and we found a digit which is too big,
// don't break the loop.
if (this.options.errorRecovery && val <= 9) {
val = 0;
this.raise(this.state.start + i + 2, Errors.InvalidDigit, radix);
} else if (forceLen) {
val = 0;
invalid = true;
} else {
break;
}
}
++this.state.pos;
total = total * radix + val;
}
if (
this.state.pos === start ||
(len != null && this.state.pos - start !== len) ||
invalid
) {
return null;
}
return total;
}
readRadixNumber
readRadixNumber(radix: number): void {
const start = this.state.pos;
let isBigInt = false;
this.state.pos += 2; // 0x
const val = this.readInt(radix);
if (val == null) {
this.raise(this.state.start + 2, Errors.InvalidDigit, radix);
}
const next = this.input.charCodeAt(this.state.pos);
if (next === charCodes.underscore) {
this.expectPlugin("numericSeparator", this.state.pos);
}
if (next === charCodes.lowercaseN) {
++this.state.pos;
isBigInt = true;
}
if (isIdentifierStart(this.input.codePointAt(this.state.pos))) {
throw this.raise(this.state.pos, Errors.NumberIdentifier);
}
if (isBigInt) {
const str = this.input.slice(start, this.state.pos).replace(/[_n]/g, "");
this.finishToken(tt.bigint, str);
return;
}
this.finishToken(tt.num, val);
}
readNumber
Read an integer, octal integer, or floating-point number.
readNumber(startsWithDot: boolean): void {
const start = this.state.pos;
let isFloat = false;
let isBigInt = false;
let isNonOctalDecimalInt = false;
if (!startsWithDot && this.readInt(10) === null) {
this.raise(start, Errors.InvalidNumber);
}
let octal =
this.state.pos - start >= 2 &&
this.input.charCodeAt(start) === charCodes.digit0;
if (octal) {
if (this.state.strict) {
this.raise(start, Errors.StrictOctalLiteral);
}
if (/[89]/.test(this.input.slice(start, this.state.pos))) {
octal = false;
isNonOctalDecimalInt = true;
}
}
let next = this.input.charCodeAt(this.state.pos);
if (next === charCodes.dot && !octal) {
++this.state.pos;
this.readInt(10);
isFloat = true;
next = this.input.charCodeAt(this.state.pos);
}
if (
(next === charCodes.uppercaseE || next === charCodes.lowercaseE) &&
!octal
) {
next = this.input.charCodeAt(++this.state.pos);
if (next === charCodes.plusSign || next === charCodes.dash) {
++this.state.pos;
}
if (this.readInt(10) === null) this.raise(start, "Invalid number");
isFloat = true;
next = this.input.charCodeAt(this.state.pos);
}
// disallow numeric separators in non octal decimals and legacy octal likes
if (this.hasPlugin("numericSeparator") && (octal || isNonOctalDecimalInt)) {
const underscorePos = this.input
.slice(start, this.state.pos)
.indexOf("_");
if (underscorePos > 0) {
this.raise(underscorePos + start, Errors.ZeroDigitNumericSeparator);
}
}
if (next === charCodes.underscore) {
this.expectPlugin("numericSeparator", this.state.pos);
}
if (next === charCodes.lowercaseN) {
// disallow floats, legacy octal syntax and non octal decimals
// new style octal ("0o") is handled in this.readRadixNumber
if (isFloat || octal || isNonOctalDecimalInt) {
this.raise(start, "Invalid BigIntLiteral");
}
++this.state.pos;
isBigInt = true;
}
if (isIdentifierStart(this.input.codePointAt(this.state.pos))) {
throw this.raise(this.state.pos, Errors.NumberIdentifier);
}
// remove "_" for numeric literal separator, and "n" for BigInts
const str = this.input.slice(start, this.state.pos).replace(/[_n]/g, "");
if (isBigInt) {
this.finishToken(tt.bigint, str);
return;
}
const val = octal ? parseInt(str, 8) : parseFloat(str);
this.finishToken(tt.num, val);
}
readCodePoint
Read a string value, interpreting backslash-escapes.
readCodePoint(throwOnInvalid: boolean): number | null {
const ch = this.input.charCodeAt(this.state.pos);
let code;
if (ch === charCodes.leftCurlyBrace) {
const codePos = ++this.state.pos;
code = this.readHexChar(
this.input.indexOf("}", this.state.pos) - this.state.pos,
true,
throwOnInvalid,
);
++this.state.pos;
if (code !== null && code > 0x10ffff) {
if (throwOnInvalid) {
this.raise(codePos, Errors.InvalidCodePoint);
} else {
return null;
}
}
} else {
code = this.readHexChar(4, false, throwOnInvalid);
}
return code;
}
readString
readString(quote: number): void {
let out = "",
chunkStart = ++this.state.pos;
for (;;) {
if (this.state.pos >= this.length) {
throw this.raise(this.state.start, Errors.UnterminatedString);
}
const ch = this.input.charCodeAt(this.state.pos);
if (ch === quote) break;
if (ch === charCodes.backslash) {
out += this.input.slice(chunkStart, this.state.pos);
// $FlowFixMe
out += this.readEscapedChar(false);
chunkStart = this.state.pos;
} else if (
ch === charCodes.lineSeparator ||
ch === charCodes.paragraphSeparator
) {
++this.state.pos;
++this.state.curLine;
this.state.lineStart = this.state.pos;
} else if (isNewLine(ch)) {
throw this.raise(this.state.start, Errors.UnterminatedString);
} else {
++this.state.pos;
}
}
out += this.input.slice(chunkStart, this.state.pos++);
this.finishToken(tt.string, out);
}
readTmplToken
Reads template string tokens.
readTmplToken(): void {
let out = "",
chunkStart = this.state.pos,
containsInvalid = false;
for (;;) {
if (this.state.pos >= this.length) {
throw this.raise(this.state.start, Errors.UnterminatedTemplate);
}
const ch = this.input.charCodeAt(this.state.pos);
if (
ch === charCodes.graveAccent ||
(ch === charCodes.dollarSign &&
this.input.charCodeAt(this.state.pos + 1) ===
charCodes.leftCurlyBrace)
) {
if (this.state.pos === this.state.start && this.match(tt.template)) {
if (ch === charCodes.dollarSign) {
this.state.pos += 2;
this.finishToken(tt.dollarBraceL);
return;
} else {
++this.state.pos;
this.finishToken(tt.backQuote);
return;
}
}
out += this.input.slice(chunkStart, this.state.pos);
this.finishToken(tt.template, containsInvalid ? null : out);
return;
}
if (ch === charCodes.backslash) {
out += this.input.slice(chunkStart, this.state.pos);
const escaped = this.readEscapedChar(true);
if (escaped === null) {
containsInvalid = true;
} else {
out += escaped;
}
chunkStart = this.state.pos;
} else if (isNewLine(ch)) {
out += this.input.slice(chunkStart, this.state.pos);
++this.state.pos;
switch (ch) {
case charCodes.carriageReturn:
if (this.input.charCodeAt(this.state.pos) === charCodes.lineFeed) {
++this.state.pos;
}
// fall through
case charCodes.lineFeed:
out += "\n";
break;
default:
out += String.fromCharCode(ch);
break;
}
++this.state.curLine;
this.state.lineStart = this.state.pos;
chunkStart = this.state.pos;
} else {
++this.state.pos;
}
}
}
readEscapedChar
Used to read escaped characters
readEscapedChar(inTemplate: boolean): string | null {
const throwOnInvalid = !inTemplate;
const ch = this.input.charCodeAt(++this.state.pos);
++this.state.pos;
switch (ch) {
case charCodes.lowercaseN:
return "\n";
case charCodes.lowercaseR:
return "\r";
case charCodes.lowercaseX: {
const code = this.readHexChar(2, false, throwOnInvalid);
return code === null ? null : String.fromCharCode(code);
}
case charCodes.lowercaseU: {
const code = this.readCodePoint(throwOnInvalid);
return code === null ? null : String.fromCodePoint(code);
}
case charCodes.lowercaseT:
return "\t";
case charCodes.lowercaseB:
return "\b";
case charCodes.lowercaseV:
return "\u000b";
case charCodes.lowercaseF:
return "\f";
case charCodes.carriageReturn:
if (this.input.charCodeAt(this.state.pos) === charCodes.lineFeed) {
++this.state.pos;
}
// fall through
case charCodes.lineFeed:
this.state.lineStart = this.state.pos;
++this.state.curLine;
// fall through
case charCodes.lineSeparator:
case charCodes.paragraphSeparator:
return "";
case charCodes.digit8:
case charCodes.digit9:
if (inTemplate) {
return null;
}
// fall through
default:
if (ch >= charCodes.digit0 && ch <= charCodes.digit7) {
const codePos = this.state.pos - 1;
const match = this.input
.substr(this.state.pos - 1, 3)
.match(/^[0-7]+/);
// This is never null, because of the if condition above.
/*:: invariant(match !== null) */
let octalStr = match[0];
let octal = parseInt(octalStr, 8);
if (octal > 255) {
octalStr = octalStr.slice(0, -1);
octal = parseInt(octalStr, 8);
}
this.state.pos += octalStr.length - 1;
const next = this.input.charCodeAt(this.state.pos);
if (
octalStr !== "0" ||
next === charCodes.digit8 ||
next === charCodes.digit9
) {
if (inTemplate) {
return null;
} else if (this.state.strict) {
this.raise(codePos, Errors.StrictOctalLiteral);
} else {
// This property is used to throw an error for
// an octal literal in a directive that occurs prior
// to a "use strict" directive.
this.state.octalPositions.push(codePos);
}
}
return String.fromCharCode(octal);
}
return String.fromCharCode(ch);
}
}
readHexChar
Used to read character escape sequences (‘\x’, ‘\u’).
readHexChar(
len: number,
forceLen: boolean,
throwOnInvalid: boolean,
): number | null {
const codePos = this.state.pos;
const n = this.readInt(16, len, forceLen, false);
if (n === null) {
if (throwOnInvalid) {
this.raise(codePos, Errors.InvalidEscapeSequence);
} else {
this.state.pos = codePos - 1;
}
}
return n;
}
readWord1
Read an identifier, and return it as a string. Sets this.state.containsEsc
to whether the word contained a ‘\u’ escape.
Incrementally adds only escaped chars, adding other chunks as-is
as a micro-optimization.
readWord1(): string {
let word = "";
this.state.containsEsc = false;
const start = this.state.pos;
let chunkStart = this.state.pos;
while (this.state.pos < this.length) {
const ch = this.input.codePointAt(this.state.pos);
if (isIdentifierChar(ch)) {
this.state.pos += ch <= 0xffff ? 1 : 2;
} else if (this.state.isIterator && ch === charCodes.atSign) {
++this.state.pos;
} else if (ch === charCodes.backslash) {
this.state.containsEsc = true;
word += this.input.slice(chunkStart, this.state.pos);
const escStart = this.state.pos;
const identifierCheck =
this.state.pos === start ? isIdentifierStart : isIdentifierChar;
if (this.input.charCodeAt(++this.state.pos) !== charCodes.lowercaseU) {
this.raise(this.state.pos, Errors.MissingUnicodeEscape);
continue;
}
++this.state.pos;
const esc = this.readCodePoint(true);
if (esc !== null) {
if (!identifierCheck(esc)) {
this.raise(escStart, Errors.EscapedCharNotAnIdentifier);
}
word += String.fromCodePoint(esc);
}
chunkStart = this.state.pos;
} else {
break;
}
}
return word + this.input.slice(chunkStart, this.state.pos);
}
isIterator
Check if a word is an iterator.
isIterator(word: string): boolean {
return word === "@@iterator" || word === "@@asyncIterator";
}
readWord
Read an identifier or keyword token. Will check for reserved words when necessary.
readWord(): void {
const word = this.readWord1();
const type = keywordTypes.get(word) || tt.name;
// Allow @@iterator and @@asyncIterator as a identifier only inside type
if (
this.state.isIterator &&
(!this.isIterator(word) || !this.state.inType)
) {
this.raise(this.state.pos, Errors.InvalidIdentifier, word);
}
this.finishToken(type, word);
}
checkKeywordEscapes
The checkKeywordEscapes
method checks if the current token is a keyword and if it contains an escape sequence. If so, it raises an error indicating that an invalid escaped reserved word was found.
checkKeywordEscapes(): void {
const kw = this.state.type.keyword;
if (kw && this.state.containsEsc) {
this.raise(this.state.start, Errors.InvalidEscapedReservedWord, kw);
}
}
braceIsBlock
The method braceIsBlock determines whether a brace {
should be interpreted as the start of a block statement or not, based on the previous token type and the current parsing context.
braceIsBlock(prevType: TokenType): boolean {
const parent = this.curContext();
if (parent === ct.functionExpression || parent === ct.functionStatement) {
return true;
}
if (
prevType === tt.colon &&
(parent === ct.braceStatement || parent === ct.braceExpression)
) {
return !parent.isExpr;
}
// The check for `tt.name && exprAllowed` detects whether we are
// after a `yield` or `of` construct. See the `updateContext` for
// `tt.name`.
if (
prevType === tt._return ||
(prevType === tt.name && this.state.exprAllowed)
) {
return lineBreak.test(
this.input.slice(this.state.lastTokEnd, this.state.start),
);
}
if (
prevType === tt._else || // If the previous token is one of else,
prevType === tt.semi || // semicolon (;),
prevType === tt.eof || // end of file,
prevType === tt.parenR || // right parenthesis ),
prevType === tt.arrow // arrow (=>),
) { // the brace is a block.
return true;
}
if (prevType === tt.braceL) {
return parent === ct.braceStatement;
}
if ( // If the previous token is a var declaration or a name, is not the start of a block
prevType === tt._var ||
prevType === tt._const ||
prevType === tt.name
) {
return false;
}
if (prevType === tt.relational) {
// `class C<T> { ... }`
return true;
}
// The brace is a block if expressions are not allowed in the current state.
return !this.state.exprAllowed;
}
updateContext
The updateContext
method is used to update the context of the parser based on the current token type and the previous token type. The context is used to determine whether a token is allowed in a given context. For example, a +
token is allowed in an expression context, but not in a statement context.
updateContext(prevType: TokenType): void {
const type = this.state.type;
let update;
if (type.keyword && (prevType === tt.dot || prevType === tt.questionDot)) {
this.state.exprAllowed = false;
} else if ((update = type.updateContext)) {
update.call(this, prevType);
} else {
this.state.exprAllowed = type.beforeExpr;
}
}
}