当前位置: 代码迷 >> 综合 >> 语法分析器(syntax analyzer)【C实现】
  详细解决方案

语法分析器(syntax analyzer)【C实现】

热度:13   发布时间:2023-10-26 06:12:31.0

查看正文内容

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdbool.h>
#include <ctype.h>#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))typedef enum {
    tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr,tk_Geq, tk_Eql, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print,tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident,tk_Integer, tk_String
} TokenType;typedef enum {
    nd_Ident, nd_String, nd_Integer, nd_Sequence, nd_If, nd_Prtc, nd_Prts, nd_Prti, nd_While,nd_Assign, nd_Negate, nd_Not, nd_Mul, nd_Div, nd_Mod, nd_Add, nd_Sub, nd_Lss, nd_Leq,nd_Gtr, nd_Geq, nd_Eql, nd_Neq, nd_And, nd_Or
} NodeType;typedef struct {
    TokenType tok;int err_ln;int err_col;char *text;             /* ident or string literal or integer value */
} tok_s;typedef struct Tree {
    NodeType node_type;struct Tree *left;struct Tree *right;char *value;
} Tree;// dependency: Ordered by tok, must remain in same order as TokenType enum
struct {
    char       *text, *enum_text;TokenType   tok;bool        right_associative, is_binary, is_unary;int         precedence;NodeType    node_type;
} atr[] = {
    {
    "EOI",             "End_of_input"   , tk_EOI,     false, false, false, -1, -1        },{
    "*",               "Op_multiply"    , tk_Mul,     false, true,  false, 13, nd_Mul    },{
    "/",               "Op_divide"      , tk_Div,     false, true,  false, 13, nd_Div    },{
    "%",               "Op_mod"         , tk_Mod,     false, true,  false, 13, nd_Mod    },{
    "+",               "Op_add"         , tk_Add,     false, true,  false, 12, nd_Add    },{
    "-",               "Op_subtract"    , tk_Sub,     false, true,  false, 12, nd_Sub    },{
    "-",               "Op_negate"      , tk_Negate,  false, false, true,  14, nd_Negate },{
    "!",               "Op_not"         , tk_Not,     false, false, true,  14, nd_Not    },{
    "<",               "Op_less"        , tk_Lss,     false, true,  false, 10, nd_Lss    },{
    "<=",              "Op_lessequal"   , tk_Leq,     false, true,  false, 10, nd_Leq    },{
    ">",               "Op_greater"     , tk_Gtr,     false, true,  false, 10, nd_Gtr    },{
    ">=",              "Op_greaterequal", tk_Geq,     false, true,  false, 10, nd_Geq    },{
    "==",              "Op_equal"       , tk_Eql,     false, true,  false,  9, nd_Eql    },{
    "!=",              "Op_notequal"    , tk_Neq,     false, true,  false,  9, nd_Neq    },{
    "=",               "Op_assign"      , tk_Assign,  false, false, false, -1, nd_Assign },{
    "&&",              "Op_and"         , tk_And,     false, true,  false,  5, nd_And    },{
    "||",              "Op_or"          , tk_Or,      false, true,  false,  4, nd_Or     },{
    "if",              "Keyword_if"     , tk_If,      false, false, false, -1, nd_If     },{
    "else",            "Keyword_else"   , tk_Else,    false, false, false, -1, -1        },{
    "while",           "Keyword_while"  , tk_While,   false, false, false, -1, nd_While  },{
    "print",           "Keyword_print"  , tk_Print,   false, false, false, -1, -1        },{
    "putc",            "Keyword_putc"   , tk_Putc,    false, false, false, -1, -1        },{
    "(",               "LeftParen"      , tk_Lparen,  false, false, false, -1, -1        },{
    ")",               "RightParen"     , tk_Rparen,  false, false, false, -1, -1        },{
    "{",               "LeftBrace"      , tk_Lbrace,  false, false, false, -1, -1        },{
    "}",               "RightBrace"     , tk_Rbrace,  false, false, false, -1, -1        },{
    ";",               "Semicolon"      , tk_Semi,    false, false, false, -1, -1        },{
    ",",               "Comma"          , tk_Comma,   false, false, false, -1, -1        },{
    "Ident",           "Identifier"     , tk_Ident,   false, false, false, -1, nd_Ident  },{
    "Integer literal", "Integer"        , tk_Integer, false, false, false, -1, nd_Integer},{
    "String literal",  "String"         , tk_String,  false, false, false, -1, nd_String },
};char *Display_nodes[] = {
    "Identifier", "String", "Integer", "Sequence", "If", "Prtc","Prts", "Prti", "While", "Assign", "Negate", "Not", "Multiply", "Divide", "Mod","Add", "Subtract", "Less", "LessEqual", "Greater", "GreaterEqual", "Equal","NotEqual", "And", "Or"};static tok_s tok;
static FILE *source_fp, *dest_fp;Tree *paren_expr();void error(int err_line, int err_col, const char *fmt, ... ) {
    va_list ap;char buf[1000];va_start(ap, fmt);vsprintf(buf, fmt, ap);va_end(ap);printf("(%d, %d) error: %s\n", err_line, err_col, buf);exit(1);
}char *read_line(int *len) {
    static char *text = NULL;static int textmax = 0;for (*len = 0; ; (*len)++) {
    int ch = fgetc(source_fp);if (ch == EOF || ch == '\n') {
    if (*len == 0)return NULL;break;}if (*len + 1 >= textmax) {
    textmax = (textmax == 0 ? 128 : textmax * 2);text = realloc(text, textmax);}text[*len] = ch;}text[*len] = '\0';return text;
}char *rtrim(char *text, int *len) {
             // remove trailing spacesfor (; *len > 0 && isspace(text[*len - 1]); --(*len));text[*len] = '\0';return text;
}TokenType get_enum(const char *name) {
          // return internal version of namefor (size_t i = 0; i < NELEMS(atr); i++) {
    if (strcmp(atr[i].enum_text, name) == 0)return atr[i].tok;}error(0, 0, "Unknown token %s\n", name);return 0;
}tok_s gettok() {
    int len;tok_s tok;char *yytext = read_line(&len);yytext = rtrim(yytext, &len);// [ ]*{lineno}[ ]+{colno}[ ]+token[ ]+optional// get line and columntok.err_ln  = atoi(strtok(yytext, " "));tok.err_col = atoi(strtok(NULL, " "));// get the token namechar *name = strtok(NULL, " ");tok.tok = get_enum(name);// if there is extra data, get itchar *p = name + strlen(name);if (p != &yytext[len]) {
    for (++p; isspace(*p); ++p);tok.text = strdup(p);}return tok;
}Tree *make_node(NodeType node_type, Tree *left, Tree *right) {
    Tree *t = calloc(sizeof(Tree), 1);t->node_type = node_type;t->left = left;t->right = right;return t;
}Tree *make_leaf(NodeType node_type, char *value) {
    Tree *t = calloc(sizeof(Tree), 1);t->node_type = node_type;t->value = strdup(value);return t;
}void expect(const char msg[], TokenType s) {
    if (tok.tok == s) {
    tok = gettok();return;}error(tok.err_ln, tok.err_col, "%s: Expecting '%s', found '%s'\n", msg, atr[s].text, atr[tok.tok].text);
}Tree *expr(int p) {
    Tree *x = NULL, *node;TokenType op;switch (tok.tok) {
    case tk_Lparen:x = paren_expr();break;case tk_Sub: case tk_Add:op = tok.tok;tok = gettok();node = expr(atr[tk_Negate].precedence);x = (op == tk_Sub) ? make_node(nd_Negate, node, NULL) : node;break;case tk_Not:tok = gettok();x = make_node(nd_Not, expr(atr[tk_Not].precedence), NULL);break;case tk_Ident:x = make_leaf(nd_Ident, tok.text);tok = gettok();break;case tk_Integer:x = make_leaf(nd_Integer, tok.text);tok = gettok();break;default:error(tok.err_ln, tok.err_col, "Expecting a primary, found: %s\n", atr[tok.tok].text);}while (atr[tok.tok].is_binary && atr[tok.tok].precedence >= p) {
    TokenType op = tok.tok;tok = gettok();int q = atr[op].precedence;if (!atr[op].right_associative)q++;node = expr(q);x = make_node(atr[op].node_type, x, node);}return x;
}Tree *paren_expr() {
    expect("paren_expr", tk_Lparen);Tree *t = expr(0);expect("paren_expr", tk_Rparen);return t;
}Tree *stmt() {
    Tree *t = NULL, *v, *e, *s, *s2;switch (tok.tok) {
    case tk_If:tok = gettok();e = paren_expr();s = stmt();s2 = NULL;if (tok.tok == tk_Else) {
    tok = gettok();s2 = stmt();}t = make_node(nd_If, e, make_node(nd_If, s, s2));break;case tk_Putc:tok = gettok();e = paren_expr();t = make_node(nd_Prtc, e, NULL);expect("Putc", tk_Semi);break;case tk_Print: /* print '(' expr {',' expr} ')' */tok = gettok();for (expect("Print", tk_Lparen); ; expect("Print", tk_Comma)) {
    if (tok.tok == tk_String) {
    e = make_node(nd_Prts, make_leaf(nd_String, tok.text), NULL);tok = gettok();} elsee = make_node(nd_Prti, expr(0), NULL);t = make_node(nd_Sequence, t, e);if (tok.tok != tk_Comma)break;}expect("Print", tk_Rparen);expect("Print", tk_Semi);break;case tk_Semi:tok = gettok();break;case tk_Ident:v = make_leaf(nd_Ident, tok.text);tok = gettok();expect("assign", tk_Assign);e = expr(0);t = make_node(nd_Assign, v, e);expect("assign", tk_Semi);break;case tk_While:tok = gettok();e = paren_expr();s = stmt();t = make_node(nd_While, e, s);break;case tk_Lbrace:         /* {stmt} */for (expect("Lbrace", tk_Lbrace); tok.tok != tk_Rbrace && tok.tok != tk_EOI;)t = make_node(nd_Sequence, t, stmt());expect("Lbrace", tk_Rbrace);break;case tk_EOI:break;default: error(tok.err_ln, tok.err_col, "expecting start of statement, found '%s'\n", atr[tok.tok].text);}return t;
}Tree *parse() {
    Tree *t = NULL;tok = gettok();do {
    t = make_node(nd_Sequence, t, stmt());} while (t != NULL && tok.tok != tk_EOI);return t;
}void prt_ast(Tree *t) {
    if (t == NULL)printf(";\n");else {
    printf("%-14s ", Display_nodes[t->node_type]);if (t->node_type == nd_Ident || t->node_type == nd_Integer || t->node_type == nd_String) {
    printf("%s\n", t->value);} else {
    printf("\n");prt_ast(t->left);prt_ast(t->right);}}
}void init_io(FILE **fp, FILE *std, const char mode[], const char fn[]) {
    if (fn[0] == '\0')*fp = std;else if ((*fp = fopen(fn, mode)) == NULL)error(0, 0, "Can't open %s\n", fn);
}int main(int argc, char *argv[]) {
    init_io(&source_fp, stdin,  "r",  argc > 1 ? argv[1] : "");init_io(&dest_fp,   stdout, "wb", argc > 2 ? argv[2] : "");prt_ast(parse());
}

输出:

Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier     count
Integer        1
Assign
Identifier     n
Integer        1
Assign
Identifier     limit
Integer        100
While
Less
Identifier     n
Identifier     limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier     k
Integer        3
Assign
Identifier     p
Integer        1
Assign
Identifier     n
Add
Identifier     n
Integer        2
While
And
LessEqual
Multiply
Identifier     k
Identifier     k
Identifier     n
Identifier     p
Sequence
Sequence
;
Assign
Identifier     p
NotEqual
Multiply
Divide
Identifier     n
Identifier     k
Identifier     k
Identifier     n
Assign
Identifier     k
Add
Identifier     k
Integer        2
If
Identifier     p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier     n
;
Prts
String         " is prime\n"
;
Assign
Identifier     count
Add
Identifier     count
Integer        1
;
Sequence
Sequence
Sequence
;
Prts
String         "Total primes found: "
;
Prti
Identifier     count
;
Prts
String         "\n"
;
  相关解决方案