// cykp.cpp CYK Parser input grammar file, strings to parse, accept or reject // for use with Context Free Grammars, CFG's // // Grammar definition of a CFG G = ( V, T, P, S ) // where V is set of variables // T is the set of terminal symbols // P is the productions of the grammar // S is the starting variable, S is in V // // Input Conventions: // // the file name extension for the grammar is typically .g // free form plain text input file // typical execution is cykp your-grammar.g < strings.dat > your_output.out // or cykp < grammar-and-input-strings > your-output.out // // A comment line has "// " as the first three characters // A line may end with a comment using whitespace "//" // Anything after whitespace after last expected input is a comment // // There are only a few keywords: // start. variable, terminal, trace and verbose // any line not starting with these keywords is a production // // all input must end with a comment "//" or a semicolon ";" // // Terminal symbols must be entered, else execution stops. // // Variables do not need to be entered, they are collected from // the initial production variable, V from V -> // // A production is of the form: // V -> alpha // where V is a variable, alpha is space separated // // variables and/or terminals // // the standard " | " is allowed in productions // // productions must end with " //" or " ;" // // special input-symbol #b for the blank character, // #e for epsilon, the null string // // at least one production is required, // // variable, terminal, trace and productions may continue across end-of-line // and thus must be terminated by a comment, "//", or a semicolon, ";" // // Use #b to input a blank (space) character in the string // // Method: read input grammar // eliminate useless variables that can not become terminals // eliminate useless variables that can not be reached from S // eliminate epsilon productions, A -> #e // eliminate unit productions, A -> B // make productions Chomsky Normal Form // make productions Greibach Normal Form // // This is one of a series of automata simulators and formal language programs // dfa deterministic finite automata (with epsilon transitions and halt) // nfa nondeterministic finite automata // tm Turing machine, deterministic (recognizer and function computation) // ntm nondeterministic Turing machine (language recognizer only) // // compile with GNU g++ 2.8.1 or later or Microsoft 6.0 or later // or cl /GX /ML cykp.cpp ... // g++ -o cykp cykp.cpp cyk_eliminate.cpp cyk_chomsky.cpp \ // cyk_greibach.cpp cyk_parse.cpp // ANSI/ISO C++ // release JSS 2/15/00 v0.8 // update JSS 3/27/00 v0.9 // update JSS 4/18/00 v1.0 Greibach working // update JSS 4/2/01 v1.1 Accept "|" in productions // update JSS 3/16/04 v1.2 Greibach not working, add input "greibach" #include "cykp.h" // all C++ and cykp type definitions int verbose = 0; // verbosity,0=minimum, 1=some, 2=each step, 3>=debug bool accept_null = false; // if null string in language // (set in epsilon elim, test in parse) int main(int argc, char *argv[]) // cykp grammar.g < input-file > output-file { // cykp < grammar-and-input > output-file // primary objects var_type V; // V = all named variables ter_type T; // T = all possible terminals variable S; // S = starting variable prod_type P; // P = productions of the grammar var_type trace; // variables to trace during parsing or "all" production p1(""); // a production, temp variable var_type VT; // temp for testing V intersect T is phi prod_type PT; // temp for collecting valid productions // primary iterators var_type::const_iterator v_iter; // for V and VT ter_type::const_iterator t_iter; // for T prod_type::const_iterator p_iter; // for P and PT alpha::const_iterator a_iter; // for w // control variables bool accepted = false; // input string parsed OK bool bad_production; // ill formed production bool do_greibach = false; // until bug fixed // input variables string keyword; // for inputting candidate keyword variable from_var; // for production variable string item; // from V union T, an element of alpha variable v_item; // for inputting a variable terminal t_item; // for inputting a terminal string input_string; // temp input string variable trace_var; // trace variable or "all" // keywords and special symbols string key_start = "start"; // followed by a state string key_var = "variable"; // followed by a string for variable name string key_ter = "terminal"; // followed by a terminal symbol, #b string key_enddef = "enddef"; // end of grammar, input strings may follow string key_trace = "trace"; // variables to trace string key_verbos = "verbose"; // amount of printout string key_greibach = "greibach"; // do greibach string comm = "//"; // start comment string empty = ""; // empty string cout << "cykp v1.2 starting" << endl; // open grammar file (if no file, read from standard input) // to be implemented in v1.1 while(!cin.eof()) // main input loop { cin >> keyword; if(cin.eof()) break; if(keyword==key_start) { if(S!=empty) cout << "Over writing a previous start." << endl; cin >> S; V.insert(S); if(S=="//") cout << "start looks like a comment?" << endl; cin.ignore(255,'\n'); continue; } else if(keyword==key_var) { while(!cin.eof()) { cin >> v_item; if(cin.eof()) break; // make 3 lines a function, check ends also if(v_item=="//") break; if(v_item==";") break; V.insert(v_item); } cin.ignore(255,'\n'); continue; } else if(keyword==key_ter) { while(!cin.eof()) { cin >> t_item; if(cin.eof()) break; if(t_item=="//") break; if(t_item==";") break; if(t_item=="#b") t_item = " "; // blank special T.insert(t_item); } cin.ignore(255,'\n'); continue; } else if(keyword==key_trace) { while(!cin.eof()) { cin >> trace_var; if(cin.eof()) break; if(trace_var=="//") break; if(trace_var==";") break; trace.insert(trace_var); } cin.ignore(255,'\n'); continue; } else if(keyword==key_verbos) { cin >> verbose; cin.ignore(255,'\n'); continue; } else if(keyword==key_greibach) { do_greibach = true; cin.ignore(255,'\n'); continue; } else if(keyword==key_enddef) // stop inputting { // now may only read input strings cin.ignore(255,'\n'); break; } else if(keyword=="//") // skip comment "// " { cin.ignore(255,'\n'); continue; } else if(keyword==";") // stray ";" { cin.ignore(255,'\n'); continue; } else // should be a production { from_var = keyword; V.insert(from_var); cin >> item; if(cin.eof()) break; if(!(item == "->" || item == ":=")) // not a well formed production { cout << from_var << ' ' << item << " is not V -> or V :=" << " Thus, ignoring line" << endl; cin.ignore(255,'\n'); continue; } p1 = production(from_var); while(!cin.eof()) { cin >> item; if(cin.eof()) break; if(cin.eof()) break; if(item=="//") break; if(item==";") break; if(item=="|") { if(verbose>=3) cout << "adding production " << p1 << endl; P.push_back(p1); p1.w.clear(); // clear right hand side of production continue; } p1.w.push_back(item); } cin.ignore(255,'\n'); // delete trailing comments if(verbose>=3) cout << "adding production " << p1 << endl; P.push_back(p1); } // end 'if' check for key words } // end main input loop if(verbose>0)cout << "reading input finished." << endl; // print primary data objects, check for errors and warnings if(P.size()==0) { cout << "No productions input prevent parsing. Stopping." << endl; return 1; } if(verbose>0) { cout << "Start symbol: " << S << endl; cout << "Variables: "; for(v_iter=V.begin(); v_iter!=V.end(); v_iter++) cout << *v_iter << " "; cout << endl; } if(T.size()==0) { cout << "No terminal symbols input prevent parsing. Stopping." << endl; return 1; } set_intersection(V.begin(), V.end(), T.begin(), T.end(), inserter(VT, VT.begin())); if(VT.size()!=0) // V intersect T must be empty { cout << "V intersect T non empty. Stopping." << endl; return 1; } if(verbose>0) { cout << "Terminals: "; for(t_iter=T.begin(); t_iter!=T.end(); t_iter++) cout << *t_iter << " "; cout << endl; } // sort and print productions p1 = production(""); sort(P.begin(), P.end(), production::smaller ); if(verbose>0) cout << "Sorted productions:" << endl; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { // test for duplicate or bad, eliminate bad_production = false; if(verbose>0) cout << *p_iter << endl; if(p1 == *p_iter) { if(verbose>0) cout << " duplicate production" << endl; continue; // skips saving production } p1 = *p_iter; for(a_iter=p1.w.begin(); a_iter!=p1.w.end(); a_iter++) { if(T.find(*a_iter)==T.end() && V.find(*a_iter)==V.end() && (!(*a_iter=="#e"))) { if(verbose>0)cout << " " << *a_iter << " item in production is not a terminal or variable" << endl; bad_production = true; } } if(!bad_production) PT.push_back(p1); } if(!trace.empty()) { if(trace.find("all")!=trace.end()) trace = V; if(verbose>0) { cout << "Trace:"; for(v_iter=trace.begin(); v_iter!=trace.end(); v_iter++) cout << *v_iter << " "; cout << endl; } } if(PT.size()==0) { cout << "no good productions left, stopping." << endl; return 1; } cyk_eliminate(V, T, PT, S); cyk_chomsky (V, T, PT, S); if(do_greibach) cyk_greibach (V, T, PT, S); // main parsing for each input string while(!cin.eof()) //read input strings to parse, one per line { cin >> input_string; if(cin.eof()) return 0; if(input_string=="//") // comment { cin.ignore(255,'\n'); continue; } if(input_string=="#e") input_string = ""; // null string // parse input string with processed grammar cout << endl << "about to parse " << input_string << endl; accepted = cyk_parse(V, T, PT, S, input_string); } return 0; } // end of main // for outputting objects of class production ostream &operator<<(ostream &stream, production p) { alpha::const_iterator a_iter; // for w stream << p.V << " -> "; for(a_iter=p.w.begin(); a_iter!=p.w.end(); a_iter++) { stream << *a_iter << '\t'; // output terminals and variables in rule } return stream; } // end cykp.cpp