查看正文内容
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;public class Lexer {
private int line;private int pos;private int position;private char chr;private String s;Map<String, TokenType> keywords = new HashMap<>();static class Token {
public TokenType tokentype;public String value;public int line;public int pos;Token(TokenType token, String value, int line, int pos) {
this.tokentype = token; this.value = value; this.line = line; this.pos = pos;}@Overridepublic String toString() {
String result = String.format("%5d %5d %-15s", this.line, this.pos, this.tokentype);switch (this.tokentype) {
case Integer:result += String.format(" %4s", value);break;case Identifier:result += String.format(" %s", value);break;case String:result += String.format(" \"%s\"", value);break;}return result;}}static enum TokenType {
End_of_input, Op_multiply, Op_divide, Op_mod, Op_add, Op_subtract,Op_negate, Op_not, Op_less, Op_lessequal, Op_greater, Op_greaterequal,Op_equal, Op_notequal, Op_assign, Op_and, Op_or, Keyword_if,Keyword_else, Keyword_while, Keyword_print, Keyword_putc, LeftParen, RightParen,LeftBrace, RightBrace, Semicolon, Comma, Identifier, Integer, String}static void error(int line, int pos, String msg) {
if (line > 0 && pos > 0) {
System.out.printf("%s in line %d, pos %d\n", msg, line, pos);} else {
System.out.println(msg);}System.exit(1);}Lexer(String source) {
this.line = 1;this.pos = 0;this.position = 0;this.s = source;this.chr = this.s.charAt(0);this.keywords.put("if", TokenType.Keyword_if);this.keywords.put("else", TokenType.Keyword_else);this.keywords.put("print", TokenType.Keyword_print);this.keywords.put("putc", TokenType.Keyword_putc);this.keywords.put("while", TokenType.Keyword_while);}Token follow(char expect, TokenType ifyes, TokenType ifno, int line, int pos) {
if (getNextChar() == expect) {
getNextChar();return new Token(ifyes, "", line, pos);}if (ifno == TokenType.End_of_input) {
error(line, pos, String.format("follow: unrecognized character: (%d) '%c'", (int)this.chr, this.chr));}return new Token(ifno, "", line, pos);}Token char_lit(int line, int pos) {
char c = getNextChar(); // skip opening quoteint n = (int)c;if (c == '\'') {
error(line, pos, "empty character constant");} else if (c == '\\') {
c = getNextChar();if (c == 'n') {
n = 10;} else if (c == '\\') {
n = '\\';} else {
error(line, pos, String.format("unknown escape sequence \\%c", c));}}if (getNextChar() != '\'') {
error(line, pos, "multi-character constant");}getNextChar();return new Token(TokenType.Integer, "" + n, line, pos);}Token string_lit(char start, int line, int pos) {
String result = "";while (getNextChar() != start) {
if (this.chr == '\u0000') {
error(line, pos, "EOF while scanning string literal");}if (this.chr == '\n') {
error(line, pos, "EOL while scanning string literal");}result += this.chr;}getNextChar();return new Token(TokenType.String, result, line, pos);}Token div_or_comment(int line, int pos) {
if (getNextChar() != '*') {
return new Token(TokenType.Op_divide, "", line, pos);}getNextChar();while (true) {
if (this.chr == '\u0000') {
error(line, pos, "EOF in comment");} else if (this.chr == '*') {
if (getNextChar() == '/') {
getNextChar();return getToken();}} else {
getNextChar();}}}Token identifier_or_integer(int line, int pos) {
boolean is_number = true;String text = "";while (Character.isAlphabetic(this.chr) || Character.isDigit(this.chr) || this.chr == '_') {
text += this.chr;if (!Character.isDigit(this.chr)) {
is_number = false;}getNextChar();}if (text.equals("")) {
error(line, pos, String.format("identifer_or_integer unrecopgnized character: (%d) %c", (int)this.chr, this.chr));}if (Character.isDigit(text.charAt(0))) {
if (!is_number) {
error(line, pos, String.format("invaslid number: %s", text));}return new Token(TokenType.Integer, text, line, pos);}if (this.keywords.containsKey(text)) {
return new Token(this.keywords.get(text), "", line, pos);}return new Token(TokenType.Identifier, text, line, pos);}Token getToken() {
int line, pos;while (Character.isWhitespace(this.chr)) {
getNextChar();}line = this.line;pos = this.pos;switch (this.chr) {
case '\u0000': return new Token(TokenType.End_of_input, "", this.line, this.pos);case '/': return div_or_comment(line, pos);case '\'': return char_lit(line, pos);case '<': return follow('=', TokenType.Op_lessequal, TokenType.Op_less, line, pos);case '>': return follow('=', TokenType.Op_greaterequal, TokenType.Op_greater, line, pos);case '=': return follow('=', TokenType.Op_equal, TokenType.Op_assign, line, pos);case '!': return follow('=', TokenType.Op_notequal, TokenType.Op_not, line, pos);case '&': return follow('&', TokenType.Op_and, TokenType.End_of_input, line, pos);case '|': return follow('|', TokenType.Op_or, TokenType.End_of_input, line, pos);case '"': return string_lit(this.chr, line, pos);case '{': getNextChar(); return new Token(TokenType.LeftBrace, "", line, pos);case '}': getNextChar(); return new Token(TokenType.RightBrace, "", line, pos);case '(': getNextChar(); return new Token(TokenType.LeftParen, "", line, pos);case ')': getNextChar(); return new Token(TokenType.RightParen, "", line, pos);case '+': getNextChar(); return new Token(TokenType.Op_add, "", line, pos);case '-': getNextChar(); return new Token(TokenType.Op_subtract, "", line, pos);case '*': getNextChar(); return new Token(TokenType.Op_multiply, "", line, pos);case '%': getNextChar(); return new Token(TokenType.Op_mod, "", line, pos);case ';': getNextChar(); return new Token(TokenType.Semicolon, "", line, pos);case ',': getNextChar(); return new Token(TokenType.Comma, "", line, pos);default: return identifier_or_integer(line, pos);}}char getNextChar() {
this.pos++;this.position++;if (this.position >= this.s.length()) {
this.chr = '\u0000';return this.chr;}this.chr = this.s.charAt(this.position);if (this.chr == '\n') {
this.line++;this.pos = 0;}return this.chr;}void printTokens() {
Token t;while ((t = getToken()).tokentype != TokenType.End_of_input) {
System.out.println(t);}System.out.println(t);}public static void main(String[] args) {
if (args.length > 0) {
try {
File f = new File(args[0]);Scanner s = new Scanner(f);String source = " ";while (s.hasNext()) {
source += s.nextLine() + "\n";}Lexer l = new Lexer(source);l.printTokens();} catch(FileNotFoundException e) {
error(-1, -1, "Exception: " + e.getMessage());}} else {
error(-1, -1, "No args");}}
}