查看正文内容
/*Token: type, value, line, pos */const TokenType = {
Keyword_if: 1, Keyword_else: 2, Keyword_print: 3, Keyword_putc: 4, Keyword_while: 5,Op_add: 6, Op_and: 7, Op_assign: 8, Op_divide: 9, Op_equal: 10, Op_greater: 11,Op_greaterequal: 12, Op_less: 13, Op_Lessequal: 14, Op_mod: 15, Op_multiply: 16, Op_not: 17,Op_notequal: 18, Op_or: 19, Op_subtract: 20,Integer: 21, String: 22, Identifier: 23,Semicolon: 24, Comma: 25,LeftBrace: 26, RightBrace: 27,LeftParen: 28, RightParen: 29,End_of_input: 99
}class Lexer {
constructor(source) {
this.source = sourcethis.pos = 1 // position in linethis.position = 0 // position in sourcethis.line = 1this.chr = this.source.charAt(0)this.keywords = {
"if": TokenType.Keyword_if,"else": TokenType.Keyword_else,"print": TokenType.Keyword_print,"putc": TokenType.Keyword_putc,"while": TokenType.Keyword_while}}getNextChar() {
this.pos++this.position++if (this.position >= this.source.length) {
this.chr = undefinedreturn this.chr}this.chr = this.source.charAt(this.position)if (this.chr === '\n') {
this.line++this.pos = 0}return this.chr}error(line, pos, message) {
if (line > 0 && pos > 0) {
console.log(message + " in line " + line + ", pos " + pos + "\n")} else {
console.log(message)}process.exit(1)}follow(expect, ifyes, ifno, line, pos) {
if (this.getNextChar() === expect) {
this.getNextChar()return {
type: ifyes, value: "", line, pos }}if (ifno === TokenType.End_of_input) {
this.error(line, pos, "follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")}return {
type: ifno, value: "", line, pos }}div_or_comment(line, pos) {
if (this.getNextChar() !== '*') {
return {
type: TokenType.Op_divide, value: "/", line, pos }}this.getNextChar()while (true) {
if (this.chr === '\u0000') {
this.error(line, pos, "EOF in comment")} else if (this.chr === '*') {
if (this.getNextChar() === '/') {
this.getNextChar()return this.getToken()}} else {
this.getNextChar()}}}char_lit(line, pos) {
let c = this.getNextChar() // skip opening quotelet n = c.charCodeAt(0)if (c === "\'") {
this.error(line, pos, "empty character constant")} else if (c === "\\") {
c = this.getNextChar()if (c == "n") {
n = 10} else if (c === "\\") {
n = 92} else {
this.error(line, pos, "unknown escape sequence \\" + c)}}if (this.getNextChar() !== "\'") {
this.error(line, pos, "multi-character constant")}this.getNextChar()return {
type: TokenType.Integer, value: n, line, pos }}string_lit(start, line, pos) {
let value = ""while (this.getNextChar() !== start) {
if (this.chr === undefined) {
this.error(line, pos, "EOF while scanning string literal")}if (this.chr === "\n") {
this.error(line, pos, "EOL while scanning string literal")}value += this.chr}this.getNextChar()return {
type: TokenType.String, value, line, pos }}identifier_or_integer(line, pos) {
let is_number = truelet text = ""while (/\w/.test(this.chr) || this.chr === '_') {
text += this.chrif (!/\d/.test(this.chr)) {
is_number = false}this.getNextChar()}if (text === "") {
this.error(line, pos, "identifer_or_integer unrecopgnized character: follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")}if (/\d/.test(text.charAt(0))) {
if (!is_number) {
this.error(line, pos, "invaslid number: " + text)}return {
type: TokenType.Integer, value: text, line, pos }}if (text in this.keywords) {
return {
type: this.keywords[text], value: "", line, pos }}return {
type: TokenType.Identifier, value: text, line, pos }}getToken() {
let pos, line// Ignore whitespaceswhile (/\s/.test(this.chr)) {
this.getNextChar() }line = this.line; pos = this.posswitch (this.chr) {
case undefined: return {
type: TokenType.End_of_input, value: "", line: this.line, pos: this.pos }case "/": return this.div_or_comment(line, pos)case "\'": return this.char_lit(line, pos)case "\"": return this.string_lit(this.chr, line, pos)case "<": return this.follow("=", TokenType.Op_lessequal, TokenType.Op_less, line, pos)case ">": return this.follow("=", TokenType.Op_greaterequal, TokenType.Op_greater, line, pos)case "=": return this.follow("=", TokenType.Op_equal, TokenType.Op_assign, line, pos)case "!": return this.follow("=", TokenType.Op_notequal, TokenType.Op_not, line, pos)case "&": return this.follow("&", TokenType.Op_and, TokenType.End_of_input, line, pos)case "|": return this.follow("|", TokenType.Op_or, TokenType.End_of_input, line, pos)case "{": this.getNextChar(); return {
type: TokenType.LeftBrace, value: "{", line, pos }case "}": this.getNextChar(); return {
type: TokenType.RightBrace, value: "}", line, pos }case "(": this.getNextChar(); return {
type: TokenType.LeftParen, value: "(", line, pos }case ")": this.getNextChar(); return {
type: TokenType.RightParen, value: ")", line, pos }case "+": this.getNextChar(); return {
type: TokenType.Op_add, value: "+", line, pos }case "-": this.getNextChar(); return {
type: TokenType.Op_subtract, value: "-", line, pos }case "*": this.getNextChar(); return {
type: TokenType.Op_multiply, value: "*", line, pos }case "%": this.getNextChar(); return {
type: TokenType.Op_mod, value: "%", line, pos }case ";": this.getNextChar(); return {
type: TokenType.Semicolon, value: ";", line, pos }case ",": this.getNextChar(); return {
type: TokenType.Comma, value: ",", line, pos }default: return this.identifier_or_integer(line, pos)}}/*https://stackoverflow.com/questions/9907419/how-to-get-a-key-in-a-javascript-object-by-its-value*/getTokenType(value) {
return Object.keys(TokenType).find(key => TokenType[key] === value)}printToken(t) {
let result = (" " + t.line).substr(t.line.toString().length)result += (" " + t.pos).substr(t.pos.toString().length)result += (" " + this.getTokenType(t.type) + " ").substr(0, 16)switch (t.type) {
case TokenType.Integer:result += " " + t.valuebreak;case TokenType.Identifier:result += " " + t.valuebreak;case TokenType.String:result += " \""+ t.value + "\""break;}console.log(result)}printTokens() {
let twhile ((t = this.getToken()).type !== TokenType.End_of_input) {
this.printToken(t)}this.printToken(t)}
}
const fs = require("fs")
fs.readFile(process.argv[2], "utf8", (err, data) => {
l = new Lexer(data)l.printTokens()
})