+1 480 409 0818 

Scanning and Parsing C Code

#pragma once #include #include #include #include #include #include #include using namespace std; #define N_RESERVED_WORDS 9 enum { ift, elset, whilet, floatt, integert, chart, breakt, continuet, voidt, addopt, mulopt, assignopt, relopt, lparent, rparent, lbracet, rbracet, lbrkt, rbrkt, commat, semit, periodt, valuet, valuert, idt, stringt, eoft, unknownt }; const char *TYPE[] = { "ift", "elset", "whilet", "floatt", "integert", "chart", "breakt", "continuet", "voidt", "addopt", "mulopt", "assignopt", "relopt", "lparent", "rparent", "lbracet", "rbracet", "lbrkt", "rbrkt", "commat", "semit", "periodt", "valuet", "valuert", "idt", "stringt", "eoft", "unknownt" }; const char *PRINT[] = { "if", "else", "while", "float", "int", "char", "break", "continue", "void", "+, - or ||", "*, /, %% or &&", "=", "==, !=, <, <=, > or >=", "(", ")", "{", "}", "[", "]", ",", ";", ".", "valuet", "valuert", "idt", "stringt", "eoft", "unknownt" }; const char *RESERVED_WORDS[] = {"if","else","while","float","int","char","break","continue","void"}; int RVALS[] = {ift,elset,whilet,floatt,integert,chart,breakt,continuet,voidt}; // class used to perform the lexical analysis class LexicalAnalyzer { private: string code; // variable to hold the code to analyze int pos; // current character position in code public: int nline; // current line number in code int Token; string Lexeme; int Value; float ValueR; string Literal; LexicalAnalyzer(string filename) { code=loadFile(filename); pos=0; nline=1; } // function that returns true if the given character is a letter bool isLetter(char c) { if(c>='a' && c<='z') return true; if(c>='A' && c<='Z') return true; return false; } // function that returns true if the given character is a digit bool isDigit(char c) { if(c>='0' && c<='9') return true; return false; } // get the next token in the code file void GetNextToken() { int start; int state=0; int type; while(state>=0 && pos<=code.length()) { char ch=code[pos++]; switch(state) { case 0: // initial state start=pos-1; if(isLetter(ch)) state=1; else if(isDigit(ch)) state=2; else { switch(ch) { case '/': state=5; break; case '=': state=8; break; case '!': state=9; break; case '<': state=9; break; case '>': state=9; break; case '+': case '-': state=10; break; case '|': state=11; break; case '*': state=12; break; case '%': state=12; break; case '&': state=13; break; case '(': type=lparent; state=14; break; case ')': type=rparent; state=14; break; case '{': type=lbracet; state=14; break; case '}': type=rbracet; state=14; break; case '[': type=lbrkt; state=14; break; case ']': type=rbrkt; state=14; break; case ',': type=commat; state=14; break; case ';': type=semit; state=14; break; case '.': type=periodt; state=14; break; case '"': state=15; break; case '\n': nline++; break; case ' ': case '\t': break; case 0: state=-100; break; // end of file default: cout << "Error: invalid character "<< ch <<" in line " << nline << endl; exit(1); } } break; case 1: //idt if(!isLetter(ch) && !isDigit(ch) && ch!='_') { state=-1; // end reading token pos--; // try same char again } break; case 2: //digits if(!isDigit(ch)) { if(ch!='.') { state=-2; pos--; // try same char again } else state=3; } break; case 3: //numt if(!isDigit(ch)) { cout << "Error: unexpected character after . in line" << nline << endl; exit(1); } else state=4; break; case 4: // real if(!isDigit(ch)) { state=-3; pos--; // try same char again } break; case 5: if(ch=='*') state=6; else { state=12; // mulop / pos--; // try same char again } break; case 6: // comment if(ch=='*') // first char of ending comment state=7; break; case 7: if(ch=='/') // end of comment state=0; else state=6; break; case 8: if(ch=='=') state=-4; // relop == else { type=assignopt; state=-5; // assignop = pos--; // try same char again } break; case 9: // relop if(ch=='=') state=-4; else { if(code[pos-2]=='!') { cout << "Error: invalid character ! in line " << nline << endl; exit(1); } state=-4; pos--; } break; case 10: // addop pos--; // try same char again state=-6; break; case 11: if(ch=='|') state=-6; //addop || else { cout << "Error: invalid character | in line " << nline << endl; exit(1); } break; case 12: // mulop pos--; // try same char again state=-7; break; case 13: if(ch=='&') state=-7; //mulop && else { cout << "Error: invalid character & in line " << nline << endl; exit(1); } break; case 14: // other symbols pos--; // try same char again state=-8; //symbol break; case 15: //string literal if(ch=='"') state=-9; else if(ch=='\n') { cout << "Error: missing closing \" for string in line " << nline << endl; exit(1); } break; } } if(state>0) { cout << "Error: unexpected end of file found in line " << nline << endl; exit(1); } else { Lexeme=code.substr(start,pos-start); switch(state) { case -1: // idt Token=idt; for(int i=0; i27) { cout << "Error: identifier " << Lexeme << " in line " << nline << " is too long."<< endl; exit(1); } break; case -2: // integer Token=valuet; Value=atoi(Lexeme.c_str()); break; case -3: // real Token=valuert; ValueR=atof(Lexeme.c_str()); break; case -4: // relop Token=relopt; break; case -5: // assignop Token=assignopt; break; case -6: //addop Token=addopt; break; case -7: //mulop Token=mulopt; break; case -8: //symbol Token=type; break; case -9: //string Token=stringt; Literal=Lexeme; break; case -100: // eof Token=eoft; } } } // read all the file and return it as a string string loadFile(string filename) { ifstream input(filename.c_str()); if(!input.is_open()) { cout << "Error opening file: " << filename << endl; exit(1); } stringstream ss; ss << input.rdbuf(); return ss.str(); } string printCurrentToken() { stringstream ss; switch(Token) { case valuet: ss << Value; break; case valuert: ss << ValueR; break; case stringt: ss << "\"" << Literal << "\""; break; case eoft: ss << "EOF" ; break; case idt: ss << Lexeme; break; default: ss << PRINT[Token]; } return ss.str(); } string printToken(int symbol) { stringstream ss; switch(symbol) { case valuet: ss << "Integer Value"; break; case valuert: ss << "Real Value"; break; case stringt: ss << "Literal"; break; case eoft: ss << "EOF" ; break; case idt: ss << "Identifier"; break; default: ss << PRINT[symbol]; } return ss.str(); } }; #include "LexicalAnalyzer.h" #include "Parser.h" // main program int main() { string filename; cout << "Please enter the name of the file to load: "; getline(cin,filename); LexicalAnalyzer lex(filename); // create lexical analyzer Parser parser(lex); // create parser parser.parse(); // do the parsing return 0; } #include #include // Recursive parser class class Parser { private: LexicalAnalyzer lex; // if the current symbol is equal to the symbol needed, return true // and advance to next symbol else return false bool accept(int symbol) { if(lex.Token==symbol) { lex.GetNextToken(); return true; } else return false; } // if the current symbol is equal to the expected symbol argument, return true // and advance to next symbol else generates an error void expect(int symbol) { if(!accept(symbol)) { cout << "Error: unexpected symbol: " << lex.printCurrentToken() << " in line " << lex.nline << ", expected "<< lex.printToken(symbol) << endl; exit(1); } } // parse for the TYPE part from the grammar bool parseType() { if(lex.Token==floatt || lex.Token==integert || lex.Token==chart) { lex.GetNextToken(); return true; } else return false; } // parse for the PARAMTAIL part from the grammar void parseParamTail() { if(accept(commat)) { if(parseType()) { expect(idt); parseParamTail(); } else { cout << "Error: unexpected symbol: " << lex.printCurrentToken() << " in line " << lex.nline << ", unknown type" << endl; exit(1); } } } // parse for the PARAMLIST part from the grammar void parseParamList() { if(parseType()) { expect(idt); parseParamTail(); } } // parse for the STAT_LIST part from the grammar void parseStatList() { //empty } // parse for the RET_STAT part from the grammar void parseRetStat() { //empty } // parse for the IDTAIL part from the grammar void parseIdTail() { if(accept(commat)) { expect(idt); parseIdTail(); } } // parse for the IDLIST part from the grammar void parseIdList() { expect(idt); parseIdTail(); expect(semit); parseDecl(); } // parse for the DECL part from the grammar void parseDecl() { if(parseType()) parseIdList(); } // parse for the COMPOUND part from the grammar void parseCompound() { expect(lbracet); parseDecl(); parseStatList(); parseRetStat(); expect(rbracet); } // parse for the REST part from the grammar void parseRest() { if(accept(lparent)) { parseParamList(); expect(rparent); parseCompound(); } else { parseIdTail(); expect(semit); parseProgram(); } } // parse for the PROG part from the grammar void parseProgram() { if(parseType()) { expect(idt); parseRest(); parseProgram(); } } public: // constructor for the class saves the lexical analyzer for using it in the parsing Parser(LexicalAnalyzer Lexa) : lex(Lexa) { } // parse is the main function used to start parsing the program in the lexer void parse() { lex.GetNextToken(); parseProgram(); if(lex.Token==eoft) cout << "Program parsing ended successfully." << endl; else cout << "Error: unexpected symbol: " << lex.printCurrentToken() << " in line " << lex.nline << ", expected end of file" << endl; } };