// myhill.cpp minimize a Deterministic Finite Automata // // Machine definition of a DFA M = (Q, Sigma, delta, q0, F) // where Q is set of states // Sigma is input tape alphabet // delta is transition table // q0 is the starting state // F is the set of final states // // Input Conventions: // // the file name extension is typically .dfa // free form plain text input file // typical execution is myhill output.file < your_input.dfa > info_output.out // // A comment line has "// " as the first three characters // A line may end with a comment using whitespace "//" // Anything after whitespace after last expected input is a comment // // There are only a few keywords: // start, final, halt, trace, limit, enddef and tape // any line not starting with these keywords is a transition // 'enddef' should be used // // there is no such thing as a continuation line // // A transition is a three tuple: // from-state input-symbol to-state // // special input-symbol #b for the blank character, // // at least one transition is required, // the typical dummy transition is phi #b phi // // Anything after the required field(s) is a comment // // Input tape data after enddef is passed on to the output file // // Method: Textbook Introduction to Automata Theory, Languages, // and Computation by Hopcroft and Ullman, Section 3.4 // // This is one of a series that also includes automata simulators // dfa deterministic finite automata (with epsilon transitions and halt) // nfa nondeterministic finite automata // tm Turing machine, deterministic (recognizer and function computation) // ntm nondeterministic Turing machine (language recognizer only) // // compile with GNU g++ 2.8.1 or later or Microsoft 6.0 or later // g++ -o myhill myhill.cpp or cl /GX /ML myhill.cpp // ANSI/ISO C++ // release JSS 3/12/00 v1.0 #include #include #include #include #include #include #include #include using namespace std; class transition // an entry in 'delta' transition_table { public: transition(string from_state, char input_symbol, string to_state) {from=from_state; input=input_symbol; to=to_state; color=0;} static bool smaller(transition a, transition b) { if(a.fromb.from) return false; else return a.input labels); static void mark(tlm &M, int p, int q, char x); static void marklist(tlm &M, int p, int q, char x); static void addlist(tlm &M, int r, int s, int p, int q); int main(int argc, char *argv[]) // myhill output-file < input-file > info-output { // primary objects set states; // Q =all named states set from_states; // source states for checking set to_states; // target states for checking string start = ""; // q0 = starting state set final; // F = set of final states set sigma; // Sigma = set of tape symbols set trace; // states to trace vector tape; // input tape vector t_table; // original delta transition table vector delta; // delta after remove unreachable typedef pair t_pair; // to make t_index typedef multimap > t_map; t_map t_index; // t_table index pair p_pair; // for t_index deque dfs_que; // states to search set Q; // Q set of states after remove unreachable set Q_F; // Q-F set of states after remove unreached set F; // F set of final states after remove unreached map > s_index; // state to matrix tlm M; // matrix for marking distinct states typedef pair sc_pair; // to make sig_index typedef map s_map;// sigma to integer map type for ndelta s_map sig_index; // sigma to integer map for ndelta ndl ndelta; // numeric delta for fast marking typedef pair st_pair; // string mapping pair typedef vector st_vect; // string mapping vector ofstream file_out; char line[256]; // for copying input to output char buf[256]; // for printing input file comments // primary iterators vector::iterator x; // for t_table int tx; // for t_table t_map::iterator pt; // for t_index set::iterator iter; // for sets set::const_iterator iter1; // for sets set::const_iterator iter2; // for sets set::const_iterator iterc; // for sigma // control variables bool no_minimize = false; // set true by various errors string state; // present state int i; // loop index int p; // matrix M or ndelta subscript int q; // matrix M subscript string prev_from = " "; // for detecting nondeterministic char prev_input = ' '; // for detecting nondeterministic bool nondeterministic = false; // use nfa simulator, not this dfa int color; // for DFS string next_state; // for DFS // input variables string keyword; // for inputting candidate keyword string from_state; // for inputting one from_state string input_symbol; // character extracted later string to_state; // for inputting one to-state string final_state; // for inputting one final or halt state string trace_state; // for inputting one trace_state string initial_tape =""; // check for #b (may read more than 1) string check; // check for // that ends data on a line string input_str; // temp string for #] bool halt_mode = false; // flag to pass along int limit = 10000; // limit to pass along // keywords and special symbols string key_start = "start"; // followed by a state string key_tape = "tape"; // followed by a string, may have #b string key_final = "final"; // followed by a final state name string key_halt = "halt"; // followed by a final state name, halt mode string key_trace = "trace"; // followed by "all" or a state string key_limit = "limit"; // followed by an integer string key_enddef= "enddef"; // only a "tape" may follow this string comm = "//"; // start comment string empty = ""; // empty string char epsilon = '\0'; // unprintable, #e epsilon cout << "myhill v1.0 starting" << endl; if(argc<2) { cout << "no output file given, using myhill_temp.dfa" << endl; file_out.open("myhill_temp.dfa"); } else { file_out.open(argv[1]); if(!file_out) { cout << "can not open output file, using myhill_temp.dfa" << endl; file_out.open("myhill_temp.dfa"); } cout << "writing minimized DFA to " << argv[1] << endl; } while(!cin.eof()) // main input loop { cin >> keyword; if(cin.eof()) break; if(keyword==key_start) { if(start!=empty) cout << "Over writing a previous start state." << endl; cin >> start; states.insert(start); if(start=="//") cout << "start looks like a comment?" << endl; cin.ignore(255,'\n'); continue; } else if(keyword==key_tape) { cin >> initial_tape; cin.ignore(255,'\n'); continue; } else if(keyword==key_final) { cin >> final_state; final.insert(final_state); if(final_state=="//") cout << "final looks like a comment?" << endl; cin.ignore(255,'\n'); continue; } else if(keyword==key_halt) { halt_mode = true; cin >> final_state; if(final_state!=comm && final_state!=empty) { // allow stand alone "halt" just to set flag final.insert(final_state); } cin.ignore(255,'\n'); continue; } else if(keyword==key_trace) { cin >> trace_state; trace.insert(trace_state); if(trace_state=="//") cout << "trace state looks like a comment?" << endl; cin.ignore(255,'\n'); continue; } else if(keyword==key_limit) { cin >> limit; cin.ignore(255,'\n'); continue; } else if(keyword==key_enddef) // stop inputting { // now may only read "tape" cin.ignore(255,'\n'); break; } else if(keyword=="//") // skip comment, now printing "// " { cin.getline(buf, 255, '\n'); cout << keyword << buf << endl; // cin.ignore(255,'\n'); continue; } else { from_state = keyword; states.insert(from_state); from_states.insert(from_state); } cin >> input_symbol; if(cin.eof()) break; if(input_symbol=="#b") input_symbol[0]=' '; // a space if(input_symbol=="#e") input_symbol[0]=epsilon; // epsilon cin >> to_state; if(cin.eof()) break; states.insert(to_state); to_states.insert(to_state); sigma.insert(input_symbol[0]); cin.ignore(255,'\n'); // delete trailing comments t_table.push_back(transition(from_state, input_symbol[0], to_state)); } // end main input loop cout << "reading input finished." << endl; // print primary data objects, check for errors and warnings if(states.size()==0) { cout << "No states input prevent minimization. Stopping." << endl; return 1; } cout << "start state " << start << endl; cout << "states:"; for(iter=states.begin(); iter!=states.end(); iter++) cout << *iter << " "; cout << endl; iter = from_states.find(start); // check that start is a legal state if(iter==states.end()) { cout << start << " is not a legal starting state." << endl; no_minimize=true; } cout << endl; cout << "final states:"; if(final.size()==0) { cout << " No final states" << endl; no_minimize=true; } else if(to_states.size()!=0) { for(iter=final.begin(); iter!=final.end(); iter++) cout << *iter << " "; cout << endl; for(iter=final.begin(); iter!=final.end(); iter++) // check final states { iter2 = to_states.find(*iter); if(iter2==to_states.end() && (*iter!=start)) { cout << *iter << " is not a valid final state." << endl; no_minimize=true; } } cout << endl; } if(t_table.size()!=0) { sort(t_table.begin(), t_table.end(), transition::smaller ); cout << "Sorted original transition table:" << endl; for(x=t_table.begin(); x!=t_table.end(); x++) { cout << *x << endl; if(x->from==prev_from && (x->input==prev_input || prev_input==epsilon)) { nondeterministic = true; cout << " duplicate transition" << endl; } prev_from=x->from; prev_input=x->input; } } else { cout << "No transitions (three tuples)" << endl; no_minimize=true; } if(nondeterministic) { cout << "nondeterministic 'delta' transition table" << endl; cout << "either eliminate duplicate transitions or use nfa to dfa" << endl; return 1; } if(t_table.size() != (from_states.size()*sigma.size())) { cout << "not a complete DFA, |sigma|=" << sigma.size() << " |states|=" << from_states.size() << " but only " << t_table.size() << " unique transitions." << endl; no_minimize=true; } if(no_minimize) { cout << "Errors in input prevent minimization. Stopping." << endl; return 1; } // build map for state to transition table index for(tx=0; txsecond; // process this group next_state = t_table[tx].to; color = t_table[tx].color; if(color==0 && Q.find(next_state)==Q.end()) { dfs_que.push_back(next_state); } t_table[tx].color = 1; } } cout << endl << "reachable states: "; for(iter=Q.begin(); iter!=Q.end(); iter++) cout << *iter << " "; cout << endl; // eliminate unusable transitions cout << "remaining transitions:" << endl; for(x=t_table.begin(); x!=t_table.end(); x++) { if(Q.find(x->to)!=Q.end() && Q.find(x->from)!=Q.end()) { cout << *x << endl; delta.push_back(*x); } } sort(delta.begin(), delta.end(), transition::smaller ); // eliminate non reachable final states F.clear(); cout << "remaining final states: "; for(iter=final.begin(); iter!=final.end(); iter++) { if(Q.find(*iter)!=Q.end()) { cout << *iter << " "; F.insert(*iter); } } cout << endl; if(F.size()<=0) { cout << "no final states remain, language is phi." << endl; return 0; } // compute Q - F Q_F.clear(); set_difference(Q.begin(), Q.end(), F.begin(), F.end(), inserter(Q_F,Q_F.begin())); cout << "Q - F : "; for(iter=Q_F.begin(); iter!=Q_F.end(); iter++) { cout << *iter << " "; } cout << endl; // build map for state to delta index t_index.clear(); for(tx=0; txsecond; q=s_index.find(delta[tx].to)->second; i=sig_index.find(delta[tx].input)->second; ndelta[p][i]=q; } // initialize matrix M for(p=0; psecond; for(iter2=Q_F.begin(); iter2!=Q_F.end(); iter2++) { q=s_index.find(*iter2)->second; mark(M, p, q, 'x'); } } cout << "matrix M after step 1)" << endl; printm(M, Q); // check for marking F x F state pairs distinguishable for(iter=F.begin(); iter!=F.end(); iter++) { p=s_index.find(*iter)->second; for(iter2=F.begin(); iter2!=F.end(); iter2++) { q=s_index.find(*iter2)->second; if(p>=q) continue; // not psecond; for(iter2=Q_F.begin(); iter2!=Q_F.end(); iter2++) { q=s_index.find(*iter2)->second; if(p>=q) continue; // not pq) {qq=p; pp=q;} M[pp][qq].s=x; } static void marklist(tlm &M, int p, int q, char x) { // recursively mark list at (p,q) and delete marked entries int pp=p; int qq=q; int r; int s; tl *link; // link to more (p,q) pairs from addlist if(p>q) {qq=p; pp=q;} r=M[pp][qq].p; s=M[pp][qq].q; if(r==s)return; // no list M[pp][qq].p=0; // must really walk list M[pp][qq].q=0; if(!marked(M, r, s, x)) { mark(M, r, s, x); marklist(M, r, s, x); } link=M[pp][qq].next; while(link!=0) { r=link->p; s=link->q; if(r==s)return; link->p=0; link->q=0; if(!marked(M, r, s, x)) { mark(M, r, s, x); marklist(M, r, s, x); } link=link->next; } } static void addlist(tlm &M, int r, int s, int p, int q) { int pp=r; int qq=s; tl *link; if(r>s) {qq=r; pp=s;} if(M[pp][qq].p==0 && M[pp][qq].q==0) { M[pp][qq].p=p; M[pp][qq].q=q; return; } else { if(M[pp][qq].p==p && M[pp][qq].q==q) return; if(M[pp][qq].next==0) { M[pp][qq].next=new tl(' ', p, q, 0); return; } link=M[pp][qq].next; while(link!=0) { if(link->p==0 && link->q==0) { link->p=p; link->q=q; return; } if(link->p==p && link->q==q) return; if(link->next==0) { link->next=new tl(' ', p, q, 0); return; } link=link->next; } } } static bool marked(tlm M, int p, int q, char x) { int pp=p; int qq=q; if(p>q) {qq=p; pp=q;} return M[pp][qq].s==x; } // for outputting objects of class transition ostream &operator<<(ostream &stream, transition trans) { string input_token=string(1,trans.input); if(trans.input=='\0') input_token=string("#e"); if(trans.input=='\1') input_token=string("#]"); if(trans.input=='\2') input_token=string("#["); stream << trans.from << '\t' << input_token << '\t' << trans.to << '\t' ; return stream; } static void printm(tlm M, set labels) { int size=labels.size(); set::const_iterator i; set::const_iterator j; int p; int q; cout << '\t'; for(j=labels.begin(); j!=labels.end(); j++) cout << *j << '\t'; cout << endl; p=0; for(i=labels.begin(); i!=labels.end(); i++) { cout << *i << '\t'; q=0; for(j=labels.begin(); j!=labels.end(); j++) { cout << M[p][q].s << '\t'; q++; } cout << endl; p++; } cout << endl; } // end print matrix // end myhill.cpp