// File : token.C // // Implementation of the Token and TokenStream classes #include #include #include #include #include #include "token.h" // overloaded << operator for Token ostream& operator << (ostream& ostr, const Token& tk) { switch(tk.kind) { case Token::UNDEF : ostr << "???" ; break ; case Token::NUMBER: ostr << tk.value ; break ; case Token::PLUS: ostr << '+' ; break ; case Token::MINUS: ostr << '-' ; break ; case Token::TIMES: ostr << '*' ; break ; case Token::DIVIDE: ostr << '/' ; break ; case Token::L_PAREN: ostr << '(' ; break ; case Token::R_PAREN: ostr << ')' ; break ; case Token::EOL: ostr << '\n' ; break ; default: cerr << "Internal Error: bad token kind" << endl ; exit(1) ; } return ostr ; } //=========================================================================== // TokenStream member functions // Constructor TokenStream::TokenStream(const char *input) { int length ; // make a copy of the input length = strlen(input) ; str = strdup(input) ; if (str == NULL) { cerr << "Could not duplicate string for tokenizer" << endl ; exit(1) ; } // check if string has trailing '\n' if (str[length - 1] == '\n') { length-- ; str[length] = '\0' ; } // initialize last = length - 1 ; skip_spaces(0) ; pos = lookahead_pos ; looked = 0 ; } // Destructor // TokenStream::~TokenStream() { free(str) ; } // Set lookahead position to next non-whitespace position void TokenStream::skip_spaces(int i) { while(isspace(str[i])) i++ ; lookahead_pos = i ; } // Return the next token in the input string. // Just looking, do not consume the token. // Assumptions: // pos is the index of the beginning of the next token // (leading spaces should have been skipped) // // last is the index of the last character of the input // string ('\0' is beyond the last character). // // looked says whether we've looked ahead before. // // lookahead_pos, if we've looked ahead, points to // the beginning of the token after the look ahead token // // lookahead is the lookahead token, if we've looked ahead // before. Token TokenStream::look() { Token tk ; int val, i ; if (looked) return lookahead ; // Previously looked ahead? // Look for the next token, mark that we looked ahead looked = 1 ; i = pos ; // Bogus input position ? if (i < 0) { tk.kind = Token::UNDEF ; lookahead = tk ; looked = 0 ; return tk ; } // Past end of input ? if (pos > last) { tk.kind = Token::EOL ; lookahead = tk ; lookahead_pos = i ; return tk ; } // it's a decimal number if (isdigit(str[i])) { val = 0 ; do { val = 10*val + str[i] - '0' ; i++ ; } while (isdigit(str[i])) ; tk.kind = Token::NUMBER ; tk.value = val ; lookahead = tk ; skip_spaces(i) ; return tk ; } // Single character cases switch(str[i]) { case '+' : tk.kind = Token::PLUS ; break ; case '-' : tk.kind = Token::MINUS ; break ; case '*' : tk.kind = Token::TIMES ; break ; case '/' : tk.kind = Token::DIVIDE ; break ; case '(' : tk.kind = Token::L_PAREN ; break ; case ')' : tk.kind = Token::R_PAREN ; break ; default : tk.kind = Token::UNDEF ; lookahead = tk ; lookahead_pos = i ; return tk ; } lookahead = tk ; skip_spaces(i+1) ; return tk ; } // Consume the next token. // Read documentation for LookAhead(). void TokenStream::eat() { // Weird case: Eat a token without having seen it. if ( !looked ) look() ; pos = lookahead_pos ; looked = 0 ; } // Print error message and point to where the syntax error occurred. void TokenStream::error() { int i ; cerr << str << endl ; for (i = 0 ; i < pos ; i++) { // print right number of spaces cerr << ' ' ; } cerr << "^--- syntax error" << endl ; }