// cyk_eliminate.cpp Eliminate useless variables, epsilon productions // and unit productions // (1a) variables that can never be terminals // (1b) variables that can not be reached from S // (2) eliminate epsilon productions // (3) eliminate unit productions, A -> B #include "cykp.h" void cyk_eliminate(var_type &V, ter_type &T, prod_type &P, variable &S) { // (1a) p89, lemma 4.1, Fig 4.7 var_type newv; // initially phi var_type temp; // just for verbose printout var_type NV; // nullable variables int nvar; // number nullable in production int ivar; // combinations of nullable int kvar; // index of nullable prod_type::iterator p_iter; // new values will be returned prod_type::iterator p2iter; // loop in loop ter_type::const_iterator t_iter; // not going to change alpha::iterator a_iter; // right side of production int n_newv; // number of items in newv, test oldv=newv int n_newt; // number of items in newt var_type::const_iterator v_iter; // for verbose printout prod_type P2; // temporary holder of productions bool nullable; // flag in loop production tp(""); // a temporary production string epsilon = "#e"; // represents epsilon in a rule if(verbose>3) cout << "cyk_eliminate 1a)" << endl; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // 2) { p_iter->color = 0; // flag to delete production not set to 2 by end for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); /* a_iter++ */ ) { if(T.find(*a_iter)==T.end() && *a_iter!=epsilon) break; if(++a_iter==p_iter->w.end()) newv.insert(p_iter->V); } } // end 2) if(verbose>3) { cout << "newv= "; for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++) cout << *v_iter << " "; cout << endl; } if(verbose>3) cout << "cyk_eliminate 1a) n" << endl; n_newv = 0; while(n_newv != newv.size()) // 3) { n_newv = newv.size(); // 4) for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { // 5) for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); /* a_iter++ */ ) { if(T.find(*a_iter)==T.end() && newv.find(*a_iter)==newv.end()) break; if(++a_iter==p_iter->w.end()) newv.insert(p_iter->V); } } } if(verbose>3) { cout << "newv= "; for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++) cout << *v_iter << " "; cout << endl; } set_difference(V.begin(), V.end(), newv.begin(), newv.end(), inserter(temp, temp.begin())); if(verbose>3) { cout << "eliminate temp= "; for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++) cout << *v_iter << " "; cout << endl; } if(temp.size()>0) { if(verbose>=2) cout << "lemma 4.1 variables removed: "; for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++) { if(verbose>=2) cout << *v_iter << " "; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { if(*v_iter == p_iter->V) { p_iter->color = 1; // mark for delete V -> for V in temp continue; } for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { if(*v_iter == *a_iter) { p_iter->color = 1; // mark for delete V -> ... A ...; A in temp break; } } } } if(verbose>=2) cout << endl; } V = newv; // 6) if(verbose>3) cout << "cyk_eliminate 1b)" << endl; // (1b) page 89, lemma 4.2 eliminate productions, terminals and variables // that can not be sentential forms (reached from S) newv.clear(); newv.insert(S); // new variables n_newv = 0; n_newt = 0; ter_type newt; // new terminals while(n_newv != newv.size() || n_newt != newt.size()) { n_newv = newv.size(); n_newt = newt.size(); for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // loop through productions { if(p_iter->color > 0) continue; // already processed or deleted if(newv.find(p_iter->V)!=newv.end()) { p_iter->color = 2; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { if(T.find(*a_iter)!=T.end()) newt.insert(*a_iter); if(V.find(*a_iter)!=V.end()) newv.insert(*a_iter); } } } // end loop through productions } // end while loop if(verbose>3) { cout << "newv= "; for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++) cout << *v_iter << " "; cout << endl; cout << "newt= "; for(v_iter=newt.begin(); v_iter!=newt.end(); v_iter++) cout << *v_iter << " "; cout << endl; } if(verbose>=2) { temp.clear(); set_difference(V.begin(), V.end(), newv.begin(), newv.end(), inserter(temp, temp.begin())); if(temp.size()>0) { cout << "lemma 4.2 variables removed: "; for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++) { cout << *v_iter << " "; } cout << endl; } temp.clear(); set_difference(T.begin(), T.end(), newt.begin(), newt.end(), inserter(temp, temp.begin())); if(temp.size()>0) { cout << "lemma 4.2 terminals removed: "; for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++) { cout << *v_iter << " "; } cout << endl; } } // end if(verbose>=2) V = newv; T = newt; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // 2) { if(p_iter->color==2) { P2.push_back(*p_iter); // productions to keep } else { if(verbose>=2) { cout << "deleting useless production:" << endl; cout << p_iter->V << " -> " ; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { cout << *a_iter << " " ; } cout << endl; } } } // end lemma 4.2 P = P2; // update P from P2 if(verbose>3) cout << "cyk_eliminate 2)" << endl; // (2) p 90, Theorem 4.3 eliminate epsilon productions if(verbose>=3) cout << "Eliminate epslion Productions" << endl; // remove S -> #e and flag accept_null for parser // eliminate #e from other productions, expand combinations NV.clear(); n_newv = -1; while(NV.size() != n_newv) { n_newv = NV.size(); for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // 2) { p_iter->color = 0; // flag to delete production not zero by end if(p_iter->w.size() == 1 && p_iter->w[0] == "#e") { p_iter->color = 1; NV.insert(p_iter->V); if(verbose>=3) cout << "nullable variable: " << p_iter->V << endl; continue; } nullable = true; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { if(*a_iter=="#e") { p_iter->color = 1; // bad production cout << "deleting bad epsilon production:" << endl; cout << p_iter->V << " -> " ; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { cout << *a_iter << " " ; } cout << endl; break; } if(NV.find(*a_iter) == NV.end()) // not nullable { nullable = false; break; } } // end a_iter loop through right hand side if(nullable) { p_iter->color = 1; NV.insert(p_iter->V); if(verbose>=3) cout << "nullable variable: " << p_iter->V << endl; } } // end of loop through productions } // end of while loop finding nullable variables if(NV.find(S)!=NV.end()) accept_null = true; // flag set for parser P2.clear(); for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { if(p_iter->color==0) { P2.push_back(*p_iter); // productions to keep } else { if(verbose>=2) { cout << "deleting epsilon production:" << endl; cout << p_iter->V << " -> " ; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { cout << *a_iter << " " ; } cout << endl; } } } // end lemma 4.3 P = P2; // update P from P2 // BUT, still have to generate ALL combinations of rules with // nullable variables !!! for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { // count nullable variables (actually 2^nullable) nvar = 1; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { if(NV.find(*a_iter)!=NV.end()) nvar=nvar+nvar; // double it. } if(nvar==1) continue; nvar--; // not keep all for(ivar=0; ivarV); kvar=0; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { if(NV.find(*a_iter)!=NV.end()) { if(((ivar>>kvar)%2)==1) tp.w.push_back(*a_iter); kvar++; } else { tp.w.push_back(*a_iter); } } if(verbose>=2) { cout << "adding with no #e:" << ivar << endl; cout << tp.V << " -> " ; for(a_iter=tp.w.begin(); a_iter!=tp.w.end(); a_iter++) { cout << *a_iter << " " ; } cout << endl; } P2.push_back(tp); } } // end eliminate epsilon productions P = P2; // update P from P2 if(verbose>3) cout << "cyk_eliminate 3)" << endl; // (3) p 91, Theorem 4.4 eliminate unit productions if(verbose>=3) cout << "Eliminate Unit Productions" << endl; variable v_keep; variable v_repl; bool found = true ; int n_unit = 1; int i_unit = 0; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) p_iter->color = 0; while(found && i_unitw.size()==1 && T.find(p_iter->w[0])==T.end() && p_iter->w[0]!=epsilon) // "#e" { // have unit production v_keep = p_iter->V; v_repl = p_iter->w[0]; p_iter->color = 1; // mark erase unit production n_unit++; if(v_keep==v_repl) continue; // A -> A just delete for(p2iter=P.begin(); p2iter!=P.end(); p2iter++) // copy/substitute { if(p2iter->V == v_repl && (p2iter->w.size()!=1 || V.find(p2iter->w[0])==V.end() )) { tp = *p2iter; tp.V = v_keep; P2.push_back(tp); found = true; } } } } if(found) { for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) P2.push_back(*p_iter); P = P2; } } // end eliminate unit productions // sort, remove duplicate rules and deleted rules P2.clear(); production p1(""); sort(P.begin(), P.end(), production::smaller ); for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { // test for duplicate and skip if((p1 == *p_iter) || (p_iter->color==1)) continue; p1 = *p_iter; P2.push_back(p1); // keep production } P = P2; sort(P.begin(), P.end(), production::smaller ); if(verbose>=2) { cout << "after eliminate, Variables:" << endl; for(v_iter=V.begin(); v_iter!=V.end(); v_iter++) cout << *v_iter << " "; cout << endl; cout << "after eliminate, Terminals:" << endl; for(t_iter=T.begin(); t_iter!=T.end(); t_iter++) cout << *t_iter << " "; cout << endl; cout << "after eliminate, sorted productions:" << endl; for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) { cout << p_iter->V << " -> " ; for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++) { cout << *a_iter << " " ; } cout << endl; } } // end verbose>=2 return; } // end cyk_eliminate