作业描述:
支持2种操作:
1. 赋值 a = 4.0; b= a* 2.2;
2. 打印 print(a); print(b-7);
(突然意识到自己写的好像是 跳转表 的实现,不过人家用的是goto,我是标记一下然后continue)2018-3-11
https://github.com/rsy56640/Assignment-in-WHUISS/tree/master/Principle_of_Compiling/1st
(github上的代码会更新,这篇文章不会更新)
2018-3-10
第一次写这个,感觉写的太恶心了。。。
说恶心是不无道理的:
一开始想写一个自动机的,但是后来发现状态转移矩阵太稀疏了,还不如直接if。
而且写的过程中不断地增加边界条件,简直是贪心不足蛇吞象
比如这种:
a + +.72; //支持
b = a +-3.14;//支持
a ++.22 //报错
另外异常类没有规划好,这个的确是个比较为难的事,
不过还好遵循Herb老爷子的教诲:总是使用NVI, http://www.gotw.ca/publications/mill18.htm
所以最后写了5.6个异常类,,,尴尬。。
总结一下经验:
- 首先确定好所有状态和边界,不要在写的时候再心血来潮地加上一些诡异的边界条件!!
不要心软,多余的操作直接报错。 - 还有一个小问题:规划好异常类,比如第几句,是否报告句内具体位置,是否显示id名称等等。
- 下次还是直接用lex生成器吧,再也不写了。。。
Token.h
#pragma once
#ifndef _TOKEN_H
#define _TOKEN_H
#include <iostream>
#include <vector>
#include <string>
#include <stack>
#include <unordered_map>
#include <unordered_set>
#include <type_traits>
#include <utility>
#include <cassert>namespace lexical
{//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6enum class type { identifier, constant, opcode, print, equal, lp, rp };//such as: a = 3.14 + 7;// b = a / 2.0;// print(b);// print(a - 1);//class Tokenclass Token{public://default constructor for vectorToken():_ty(type(0)),_symbol(""),_num(0.0),_is_double(true){}Token(type ty, _STD string symbol = "", double num = 0.0, bool is_double = true):_ty(ty),_symbol(symbol),_num(num),_is_double(is_double){}type _ty;_STD string _symbol; //id, op, lp, rp, semicolondouble _num; //constbool _is_double;};//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//*c points to one sentence including the last semicolon//@ parameter list://@ char* c: one sentence that is seperated by semicolon.//@ _STD size_t size: sentence size that does not include semicolon.//@ return value: token_list.extern _STD vector<Token> seperate_one_sentence(char* c, _STD size_t size);//Base class for Exceptionclass Exception_base{public:void print_exception(){this->print();}private:virtual void print() = 0;};//class Lexical_Exceptionclass Lexical_Exception :public Exception_base{public:Lexical_Exception(_STD string msg, _STD size_t sentence_num, _STD size_t pos):_msg(msg),_sentence_num(sentence_num),_pos(pos){}friend _STD ostream& operator<<(_STD ostream& os, Lexical_Exception& e){os << e._msg << " in the sentence: " << e._sentence_num << " at position: " << e._pos;return os;}private:void print(){std::cout << (*this) << std::endl;}_STD string _msg;_STD size_t _sentence_num;_STD size_t _pos;};//convert char to intinline static int char_to_int(char c){return static_cast<int>(c) - 48;}//record the sentence num according to the semicolon.static _STD size_t sentence_num = 0;//class Id_Exceptionclass Id_Exception :public Exception_base{public:Id_Exception(_STD string msg, _STD string identifier,_STD size_t sentence_num, _STD size_t pos):_msg(msg),_identifier(identifier),_sentence_num(sentence_num),_pos(pos){}friend _STD ostream& operator<<(_STD ostream& os, Id_Exception& e){os << e._msg << e._identifier << " in the sentence "<< e._sentence_num << " at " << e._pos;return os;}private:void print(){std::cout << (*this) << std::endl;}_STD string _msg;_STD string _identifier;_STD size_t _sentence_num;_STD size_t _pos;};//symbol tablestatic _STD unordered_map<_STD string, _STD pair<double, bool> > symbol_table;//class Token_list exceptionclass Token_list_Exception :public Exception_base{public:Token_list_Exception(_STD string msg, _STD size_t sentence_num, Token token):_msg(msg),_sentence_num(sentence_num),_token(token){}friend _STD ostream& operator<<(_STD ostream& os, Token_list_Exception& e){os << e._msg;if (static_cast<int>(e._token._ty) == 1)os << e._token._num;else os << e._token._symbol;os << " in the sentence " << e._sentence_num;return os;}private:void print(){std::cout << (*this) << std::endl;}_STD string _msg;_STD size_t _sentence_num;Token _token;};//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//execute one sentence//@ parameter list://@ _STD vector<Token> token_list: token list which contains the seperating tokensextern void execution_one_sentence(_STD vector<Token> token_list);//class expression exceptionclass Expr_Exception :public Exception_base{public:Expr_Exception(_STD string msg, _STD size_t sentence_num, _STD size_t pos, Token token):_msg(msg),_sentence_num(sentence_num),_pos(pos),_token(token){}friend _STD ostream& operator<<(_STD ostream& os, Expr_Exception& e){os << e._msg;if (static_cast<int>(e._token._ty) == 0)os << " the identifier: " << e._token._symbol;if (static_cast<int>(e._token._ty) == 1)os << " the number: " << e._token._num;if (static_cast<int>(e._token._ty) >= 2)os << " the operator: " << e._token._symbol;os << " in the sentence " << e._sentence_num << " at " << e._pos;return os;}private:void print(){std::cout << (*this) << std::endl;}_STD string _msg;_STD size_t _sentence_num;_STD size_t _pos;Token _token;};//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//calcuate the expression//@ parameter list://@ _STD vector<Token> expr_list: an expression list.//@ const _STD size_t pos: the relative position to the sentence.//@ return value: the result of the expression.extern _STD pair<double, bool>calculate(_STD vector<Token> expr_list, const _STD size_t pos);}
#endif // !_TOKEN_H
Token.cpp
#include "token.h"namespace lexical
{//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//*c points to one sentence including the last semicolon_STD vector<Token> seperate_one_sentence(char* c, _STD size_t size){sentence_num++;const static _STD vector<double> digit_after_dot ={1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,0.00000001, 0.000000001, 0.0000000001, 0.00000000001,0.000000000001, 0.0000000000001, 0.00000000000001,0.000000000000001, 0.0000000000000001, 0.00000000000000001,0.000000000000000001, 0.0000000000000000001, 0.00000000000000000001};const static _STD unordered_set<int> digit ={1,2,3,4,5,6,7,8,9,0};const static _STD unordered_set<char> op ={'+','-','*','/','=','(',')'};const static _STD unordered_set<char> token ={'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm','n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M','N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z','0', '1', '2', '3', '4', '5', '6', '7', '8', '9','$','_'};_STD vector<Token> token_list;const int length = size;int cur_pos = 0;bool after_equal = false;//ignore the spacewhile (cur_pos < length){if (c[cur_pos] == ' '){cur_pos++;continue;}//a constbool is_const = false;bool has_dot = false;bool is_minus = false;double num = 0.0;int carry_after_dot = 0;//such as +1, a + -2if (c[cur_pos] == '+' || c[cur_pos] == '-'){//valid situation//there is already an op or equal before +, -if (token_list.empty()|| static_cast<int>(token_list.back()._ty) == 2 //op|| static_cast<int>(token_list.back()._ty) == 4 //equal|| static_cast<int>(token_list.back()._ty) == 5 //left parenthesis){//lest the case that x ++2;if (cur_pos > 0 && c[cur_pos] == c[cur_pos - 1])throw Lexical_Exception(_STD string("unexpected operator").append(" '").append(_STD string(&c[cur_pos], 1)).append("'"),sentence_num, cur_pos);is_minus = (c[cur_pos] == '-');cur_pos++;//invalid successed contentwhile (c[cur_pos] == ' ')cur_pos++;if (digit.find(char_to_int(c[cur_pos])) == digit.end()&& c[cur_pos] != '.')throw Lexical_Exception(_STD string("unexpected operator").append(" '").append(_STD string(&c[cur_pos], 1)).append("'"),sentence_num, cur_pos);}}while (digit.find(char_to_int(c[cur_pos])) != digit.end()|| c[cur_pos] == '.'){is_const = true;if (c[cur_pos] == '.'){if (has_dot)throw Lexical_Exception(_STD string("unexpected operator").append(" '.'"),sentence_num, cur_pos);has_dot = true;carry_after_dot = 0;cur_pos++;continue;}if (!has_dot){num *= 10.0;num += static_cast<double>(char_to_int(c[cur_pos]));cur_pos++;}else{//too many after dotif (carry_after_dot >= 20){cur_pos++;continue;}carry_after_dot++;num += static_cast<double>(char_to_int(c[cur_pos]))* digit_after_dot[carry_after_dot];cur_pos++;}}if (is_const){if (is_minus)num *= -1.0;if (has_dot)token_list.emplace_back(static_cast<type>(1), static_cast<_STD string>(""), num);else{token_list.emplace_back(static_cast<type>(1), static_cast<_STD string>(""),static_cast<int>(num), false);}continue;}//an operandif (op.find(c[cur_pos]) != op.end()){// op "=" type 4if (c[cur_pos] == '='){token_list.emplace_back(static_cast<type>(4), _STD string(&c[cur_pos], 1));cur_pos++;continue;}// op "(" type 5if (c[cur_pos] == '('){token_list.emplace_back(static_cast<type>(5), _STD string(&c[cur_pos], 1));cur_pos++;continue;}// op ")" type 6if (c[cur_pos] == ')'){token_list.emplace_back(static_cast<type>(6), _STD string(&c[cur_pos], 1));cur_pos++;continue;}//other optoken_list.emplace_back(static_cast<type>(2), _STD string(&c[cur_pos], 1));cur_pos++;continue;}//a wordbool is_word = false;int word_count = 0;_STD string tmp;while (token.find(c[cur_pos]) != token.end()){is_word = true;cur_pos++;word_count++;}if (is_word){tmp = _STD forward<_STD string>(_STD string(&c[cur_pos - word_count], word_count));//Token "print"if (tmp.compare("print") == 0){token_list.emplace_back(static_cast<type>(3), tmp);continue;}token_list.emplace_back(static_cast<type>(0), tmp);continue;}//other than the const/op/word//throw exceptionthrow Lexical_Exception(_STD string("unexpected operator").append(" '").append(_STD string(&c[cur_pos], 1)).append("'"),sentence_num, cur_pos);}return token_list;}//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//execute one sentencevoid execution_one_sentence(_STD vector<Token> token_list){//allow 3 operations://1. do declaration ex: a; //initialize with 0.0//2. assignment ex: a = 4.0 + -.72; b = a; c = b = a * +.777;//3. print ex: print(a); print(a + b * 4);const int size = token_list.size();if (0 == size)return;//1. do declarationif (size == 1){auto& token = token_list[0];if (static_cast<int>(token._ty) == 0){symbol_table[token._symbol] = _STD make_pair(0.0, true);return;}throw Token_list_Exception("unecpected token ", sentence_num, token_list[0]);}//2. assignment//for such statement://c = b = a * 2; //first calculate the expression a*2//then push the b,c into symbol tableif (static_cast<int>(token_list[0]._ty) == 0){//assignment has right associativity//so find the last '='int cur = size - 1;for (; cur >= 0; cur--)if (static_cast<int>(token_list[cur]._ty) == 4)break;//no '='if (cur == -1)throw Token_list_Exception("unecpected token ", sentence_num, token_list[1]);//the last is '='if (cur == size - 1)throw Token_list_Exception("unecpected token ", sentence_num, token_list[cur]);//verify whether the sequence satisfy "id = id = ... = expr"for (int i = 1; i < cur; ++i){if (static_cast<int>(token_list[i]._ty) != (i & 1) ? 4 : 0)throw Token_list_Exception("unecpected token", sentence_num, token_list[i]);}//put expr into the calculation function//then process from right to left//fork the expr_STD vector<Token> expr(size - cur - 1);for (int i = cur + 1, j = 0; i < size; ++i, ++j)expr[j] = token_list[i];auto number = calculate(expr, cur + 1);//process from right to leftint i = cur - 1;for (; i > 0; i -= 2){auto& token = token_list[i];auto search = symbol_table.find(token._symbol);if (search != symbol_table.end())throw Token_list_Exception("There is no such identifier: ", sentence_num, token);symbol_table[token._symbol] = number;}//now i = 0symbol_table[token_list[0]._symbol] = number;return;}//3. print//only one form:// 'print' '(' 'expr' ')'//put expr into the calculate functionif (static_cast<int>(token_list[0]._ty) == 3){//verify the formif (size < 4|| static_cast<int>(token_list[1]._ty) != 5|| static_cast<int>(token_list[size - 1]._ty) != 6)throw Token_list_Exception("unexpected ", sentence_num, token_list[size - 1]);//calculate the expr_STD vector<Token> expr(size - 3);for (int i = 2, j = 0; i < size - 1; ++i, ++j)expr[j] = token_list[i];auto number = calculate(expr, 3);if (!number.second)std::cout << static_cast<int>(number.first) << std::endl;else std::cout << static_cast<double>(number.first) << std::endl;return;}//no id/print matchthrow Token_list_Exception("unecpected token ", sentence_num, token_list[0]);}//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6//calcuate the expression_STD pair<double, bool>calculate(_STD vector<Token> expr_list, const _STD size_t pos){//[[maybe_unused]]bool is_double = false; //judge whether the result is doubleint size = expr_list.size();//if (size == 0)return NULL;int cur_pos = 0;bool next_identifer = true; //only identifier/constant/left parenthesis permittedint left_parenthesis = 0, right_parenthesis = 0;//verify whether the expr_list is legalwhile (cur_pos < size){//left parenthesisif (static_cast<int>(expr_list[cur_pos]._ty) == 5){cur_pos++;left_parenthesis++;next_identifer = true;continue;}//right parenthesisif (static_cast<int>(expr_list[cur_pos]._ty) == 6){if (next_identifer)throw Expr_Exception("There is something wrong near",sentence_num, cur_pos + pos, expr_list[cur_pos]);cur_pos++;right_parenthesis++;next_identifer = false;continue;}//identifier / constantif (static_cast<int>(expr_list[cur_pos]._ty) < 2){if (!next_identifer)throw Expr_Exception("There is something wrong near",sentence_num, cur_pos + pos, expr_list[cur_pos]);cur_pos++;next_identifer = false;continue;}//operatorif (static_cast<int>(expr_list[cur_pos]._ty) == 2){if (next_identifer)throw Lexical_Exception("There is something wrong near",sentence_num, cur_pos + pos);cur_pos++;next_identifer = true;continue;}}//end verifyif (next_identifer)throw Expr_Exception("unexpected end of expr",sentence_num, cur_pos + pos, expr_list[cur_pos - 1]);if (left_parenthesis != right_parenthesis)throw Expr_Exception("Parentheses does not match",sentence_num, cur_pos + pos, expr_list[cur_pos - 1]);//using two stacks to calculate the expression//priority: +, -: 1 *, /: 2 (: 3 ):0//meet number, push into stack.//meet operator, compare the current priority with the previous,// if previous are higher or equal, then do the previous operation,// if current are higher, then push the current operator into the stack.struct Operator{char _c;int _priority;Operator(char c, int priority):_c(c),_priority(priority){}};struct Number{double _num;bool _is_double;Number(double num, bool is_double):_num(num),_is_double(is_double){}};_STD stack<Operator> operator_stack;_STD stack<Number> number_stack;cur_pos = 0;while (cur_pos < size){auto& token = expr_list[cur_pos];//idnetifierif (static_cast<int>(token._ty) == 0){auto search = symbol_table.find(token._symbol);if (search != symbol_table.end()){cur_pos++;number_stack.emplace((*search).second.first, (*search).second.second);}else{throw Id_Exception("There is no such identifier: ",token._symbol, sentence_num, cur_pos + pos);}continue;}//constantif (static_cast<int>(token._ty) == 1){cur_pos++;number_stack.emplace(token._num, token._is_double);continue;}//operator +, -, *, /if (static_cast<int>(token._ty) == 2){const char c = token._symbol[0];cur_pos++;//no operator in the stackif (operator_stack.empty()){if (c == '+' || c == '-')operator_stack.emplace(c, 1);else operator_stack.emplace(c, 2);continue;}// if previous are higher or equal, then do the previous operation,// if current are higher, then push the current operator into the stack.auto& op = operator_stack.top();int _priority = op._priority;int cur_priority = 1;if (c == '*' || c == '/')cur_priority = 2;//left parenthesisif (_priority == 3){operator_stack.emplace(c, cur_priority);continue;}//push currentif (cur_priority > _priority)operator_stack.emplace(c, cur_priority);//do previous operationelse{auto& num2 = number_stack.top(); number_stack.pop();auto& num1 = number_stack.top(); number_stack.pop();operator_stack.pop();double result = 0.0;bool is_double = false;//do operationswitch (op._c){case '+':result = num1._num + num2._num;is_double = num1._is_double || num2._is_double;break;case '-':result = num1._num - num2._num;is_double = num1._is_double || num2._is_double;break;case '*':result = num1._num * num2._num;is_double = num1._is_double || num2._is_double;break;case '/':result = num1._num / num2._num;is_double = num1._is_double || num2._is_double;if (!is_double)result = static_cast<int>(result);break;}//push current op, and push the new constantoperator_stack.emplace(c, cur_priority);number_stack.emplace(result, is_double);}}//left parenthesisif (static_cast<int>(token._ty) == 5){cur_pos++;operator_stack.emplace('(', 3);continue;}//right parenthesisif (static_cast<int>(token._ty) == 6){//while (!operator_stack.empty() && operator_stack.top()._c != '('){auto& op = operator_stack.top(); operator_stack.pop();auto& num2 = number_stack.top(); number_stack.pop();auto& num1 = number_stack.top(); number_stack.pop();double result = 0.0;bool is_double = false;//do operationswitch (op._c){case '+':result = num1._num + num2._num;is_double = num1._is_double || num2._is_double;break;case '-':result = num1._num - num2._num;is_double = num1._is_double || num2._is_double;break;case '*':result = num1._num * num2._num;is_double = num1._is_double || num2._is_double;break;case '/':result = num1._num / num2._num;is_double = num1._is_double || num2._is_double;if (!is_double)result = static_cast<int>(result);break;}number_stack.emplace(result, is_double);}//end while//if emptyif (operator_stack.empty())throw Lexical_Exception("unexpected right parenthesis ')'", sentence_num, cur_pos + pos);//if top = '( 'else{operator_stack.pop();cur_pos++;continue;}}// end if right perenthesis}//end traverse of the expression//do the final calculationwhile (!operator_stack.empty()){auto& op = operator_stack.top(); operator_stack.pop();auto& num2 = number_stack.top(); number_stack.pop();auto& num1 = number_stack.top(); number_stack.pop();double result = 0.0;bool is_double = false;//do operationswitch (op._c){case '+':result = num1._num + num2._num;is_double = num1._is_double || num2._is_double;break;case '-':result = num1._num - num2._num;is_double = num1._is_double || num2._is_double;break;case '*':result = num1._num * num2._num;is_double = num1._is_double || num2._is_double;break;case '/':result = num1._num / num2._num;is_double = num1._is_double || num2._is_double;if (!is_double)result = static_cast<int>(result);break;}number_stack.emplace(result, is_double);}//end whileauto& result = number_stack.top();return _STD make_pair(result._num, result._is_double);}//end the function calaulate}//end namespace
main.cpp
#include "token.h"
using namespace lexical;int main()
{//id:0 const:1 op:2 print:3 equal:4 lp:5 rp:6const int num = 4;char* c[num];c[0] = "a=(3+4.0)* -.3 +9/4.5;";c[1] = " b =(a + + 7.1 ) -.722* (-3.770224058066800548405486 - a);";c[2] = "print(a);";c[3] = "print(b-3);";using lexical::symbol_table;_STD vector<int> index(4);for (int i = 0; i < num; ++i)for (; c[i][index[i]] != ';'; ++index[i]);_STD vector<Token> v[num];[[maybe_unused]]_STD pair<double, bool> result;try{for (int i = 0; i < num; ++i){v[i] = seperate_one_sentence(c[i], index[i]);execution_one_sentence(v[i]);}}catch (Exception_base& e){e.print_exception();}/*symbol_table["rsy"] = _STD make_pair(0.0, true);_STD cout << static_cast<double>(symbol_table["a"].first)<< "\t" << symbol_table["b"].first << _STD endl;*/system("pause");return 0;
}