当前位置: 代码迷 >> 综合 >> 词法分析器(Lexical Analyzer)【JavaScript实现】
  详细解决方案

词法分析器(Lexical Analyzer)【JavaScript实现】

热度:57   发布时间:2023-10-26 06:17:26.0

查看正文内容

/*Token: type, value, line, pos */const TokenType = {
    Keyword_if: 1, Keyword_else: 2, Keyword_print: 3, Keyword_putc: 4, Keyword_while: 5,Op_add: 6, Op_and: 7, Op_assign: 8, Op_divide: 9, Op_equal: 10, Op_greater: 11,Op_greaterequal: 12, Op_less: 13, Op_Lessequal: 14, Op_mod: 15, Op_multiply: 16, Op_not: 17,Op_notequal: 18, Op_or: 19, Op_subtract: 20,Integer: 21, String: 22, Identifier: 23,Semicolon: 24, Comma: 25,LeftBrace: 26, RightBrace: 27,LeftParen: 28, RightParen: 29,End_of_input: 99
}class Lexer {
    constructor(source) {
    this.source = sourcethis.pos = 1        // position in linethis.position = 0   // position in sourcethis.line = 1this.chr = this.source.charAt(0)this.keywords = {
    "if": TokenType.Keyword_if,"else": TokenType.Keyword_else,"print": TokenType.Keyword_print,"putc": TokenType.Keyword_putc,"while": TokenType.Keyword_while}}getNextChar() {
    this.pos++this.position++if (this.position >= this.source.length) {
    this.chr = undefinedreturn this.chr}this.chr = this.source.charAt(this.position)if (this.chr === '\n') {
    this.line++this.pos = 0}return this.chr}error(line, pos, message) {
    if (line > 0 && pos > 0) {
    console.log(message + " in line " + line + ", pos " + pos + "\n")} else {
    console.log(message)}process.exit(1)}follow(expect, ifyes, ifno, line, pos) {
    if (this.getNextChar() === expect) {
    this.getNextChar()return {
     type: ifyes, value: "", line, pos }}if (ifno === TokenType.End_of_input) {
    this.error(line, pos, "follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")}return {
     type: ifno, value: "", line, pos }}div_or_comment(line, pos) {
    if (this.getNextChar() !== '*') {
    return {
     type: TokenType.Op_divide, value: "/", line, pos }}this.getNextChar()while (true) {
     if (this.chr === '\u0000') {
    this.error(line, pos, "EOF in comment")} else if (this.chr === '*') {
    if (this.getNextChar() === '/') {
    this.getNextChar()return this.getToken()}} else {
    this.getNextChar()}}}char_lit(line, pos) {
    let c = this.getNextChar() // skip opening quotelet n = c.charCodeAt(0)if (c === "\'") {
    this.error(line, pos, "empty character constant")} else if (c === "\\") {
    c = this.getNextChar()if (c == "n") {
    n = 10} else if (c === "\\") {
    n = 92} else {
    this.error(line, pos, "unknown escape sequence \\" + c)}}if (this.getNextChar() !== "\'") {
    this.error(line, pos, "multi-character constant")}this.getNextChar()return {
     type: TokenType.Integer, value: n, line, pos }}string_lit(start, line, pos) {
    let value = ""while (this.getNextChar() !== start) {
    if (this.chr === undefined) {
    this.error(line, pos, "EOF while scanning string literal")}if (this.chr === "\n") {
    this.error(line, pos, "EOL while scanning string literal")}value += this.chr}this.getNextChar()return {
     type: TokenType.String, value, line, pos }}identifier_or_integer(line, pos) {
    let is_number = truelet text = ""while (/\w/.test(this.chr) || this.chr === '_') {
    text += this.chrif (!/\d/.test(this.chr)) {
    is_number = false}this.getNextChar()}if (text === "") {
    this.error(line, pos, "identifer_or_integer unrecopgnized character: follow: unrecognized character: (" + this.chr.charCodeAt(0) + ") '" + this.chr + "'")}if (/\d/.test(text.charAt(0))) {
    if (!is_number) {
    this.error(line, pos, "invaslid number: " + text)}return {
     type: TokenType.Integer, value: text, line, pos }}if (text in this.keywords) {
    return {
     type: this.keywords[text], value: "", line, pos }}return {
     type: TokenType.Identifier, value: text, line, pos }}getToken() {
    let pos, line// Ignore whitespaceswhile (/\s/.test(this.chr)) {
     this.getNextChar() }line = this.line; pos = this.posswitch (this.chr) {
    case undefined: return {
     type: TokenType.End_of_input, value: "", line: this.line, pos: this.pos }case "/":       return this.div_or_comment(line, pos)case "\'":      return this.char_lit(line, pos)case "\"":      return this.string_lit(this.chr, line, pos)case "<":       return this.follow("=", TokenType.Op_lessequal, TokenType.Op_less, line, pos)case ">":       return this.follow("=", TokenType.Op_greaterequal, TokenType.Op_greater, line, pos)case "=":       return this.follow("=", TokenType.Op_equal, TokenType.Op_assign, line, pos)case "!":       return this.follow("=", TokenType.Op_notequal, TokenType.Op_not, line, pos)case "&":       return this.follow("&", TokenType.Op_and, TokenType.End_of_input, line, pos)case "|":       return this.follow("|", TokenType.Op_or, TokenType.End_of_input, line, pos)case "{":       this.getNextChar(); return {
     type: TokenType.LeftBrace, value: "{", line, pos }case "}":       this.getNextChar(); return {
     type: TokenType.RightBrace, value: "}", line, pos }case "(":       this.getNextChar(); return {
     type: TokenType.LeftParen, value: "(", line, pos }case ")":       this.getNextChar(); return {
     type: TokenType.RightParen, value: ")", line, pos }case "+":       this.getNextChar(); return {
     type: TokenType.Op_add, value: "+", line, pos }case "-":       this.getNextChar(); return {
     type: TokenType.Op_subtract, value: "-", line, pos }case "*":       this.getNextChar(); return {
     type: TokenType.Op_multiply, value: "*", line, pos }case "%":       this.getNextChar(); return {
     type: TokenType.Op_mod, value: "%", line, pos }case ";":       this.getNextChar(); return {
     type: TokenType.Semicolon, value: ";", line, pos }case ",":       this.getNextChar(); return {
     type: TokenType.Comma, value: ",", line, pos }default:        return this.identifier_or_integer(line, pos)}}/*https://stackoverflow.com/questions/9907419/how-to-get-a-key-in-a-javascript-object-by-its-value*/getTokenType(value) {
    return Object.keys(TokenType).find(key => TokenType[key] === value)}printToken(t) {
    let result = (" " + t.line).substr(t.line.toString().length)result += (" " + t.pos).substr(t.pos.toString().length)result += (" " + this.getTokenType(t.type) + " ").substr(0, 16)switch (t.type) {
    case TokenType.Integer:result += " " + t.valuebreak;case TokenType.Identifier:result += " " + t.valuebreak;case TokenType.String:result += " \""+ t.value + "\""break;}console.log(result)}printTokens() {
    let twhile ((t = this.getToken()).type !== TokenType.End_of_input) {
    this.printToken(t)}this.printToken(t)}
}
const fs = require("fs")
fs.readFile(process.argv[2], "utf8", (err, data) => {
    l = new Lexer(data)l.printTokens()
})
  相关解决方案