// File : tokenizer.C // // Implementation of the Tokenizer class which supports parsing #include #include #include #include #include "token.h" #include "tokenizer.h" // Constructors // Since you can't set the string str later, the default constructor // is just about useless. Tokenizer::Tokenizer() { str = NULL ; } // Alternate constructor. Useful. // Tokenizer::Tokenizer(char *input) { int length ; length = strlen(input) ; str = strdup(input) ; if (str == NULL) { fprintf(stderr, "Could not duplicate string for tokenizer\n") ; exit(1) ; } // check if string has trailing '\n' if (str[length - 1] == '\n') { length-- ; str[length] = '\0' ; } last = length - 1 ; SkipSpaces(0) ; // initialize lookahead position pos = lookahead_pos ; looked = 0 ; lookahead.kind = UNDEF ; } // Destructor // Tokenizer::~Tokenizer() { free(str) ; } // Set lookahead position to next non-whitespace position void Tokenizer::SkipSpaces(int i) { while(isspace(str[i])) i++ ; lookahead_pos = i ; } /* Return the next token in the input string. Just looking, do not consume the token. Assumptions: pos is the index of the beginning of the next token (leading spaces should have been skipped) last is the index of the last character of the input string ('\0' is beyond the last character). looked says whether we've looked ahead before. lookahead_pos, if we've looked ahead, points to the beginning of the token after the look ahead token lookahead is the lookahead token, if we've looked ahead before. */ token_t Tokenizer::LookAhead() { token_t token ; int val, i ; if (looked) return lookahead ; // Previously looked ahead? // Look for the next token, mark that we looked ahead looked = 1 ; i = pos ; // Bogus input position ? if (i < 0) { token.kind = UNDEF ; lookahead = token ; looked = 0 ; return token ; } // Past end of input ? if (pos > last) { token.kind = EOL ; lookahead = token ; lookahead_pos = i ; return token ; } // it's a decimal number if (isdigit(str[i])) { val = 0 ; do { val = 10*val + str[i] - '0' ; i++ ; } while (isdigit(str[i])) ; token.kind = NUMBER ; token.value = val ; lookahead = token ; SkipSpaces(i) ; return token ; } /* Single character cases */ switch(str[i]) { case '+' : token.kind = PLUS ; break ; case '-' : token.kind = MINUS ; break ; case '*' : token.kind = TIMES ; break ; case '/' : token.kind = DIVIDE ; break ; case '(' : token.kind = L_PAREN ; break ; case ')' : token.kind = R_PAREN ; break ; default : token.kind = UNDEF ; lookahead = token ; lookahead_pos = i ; return token ; } lookahead = token ; SkipSpaces(i+1) ; return token ; } /* Consume the next token. Read documentation for LookAhead(). */ void Tokenizer::EatToken() { // Weird case: Eat a token without having seen it. if ( !looked) LookAhead() ; pos = lookahead_pos ; looked = 0 ; } // Print error message and point to where the syntax error occurred. void Tokenizer::PrintError() { int i ; printf("%s\n", str) ; for (i = 0 ; i < pos ; i++) { printf(" ") ; } printf("^--- syntax error\n") ; }