//  File : tokenizer.C
//  
//  Implementation of the Tokenizer class which supports parsing

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "token.h"
#include "tokenizer.h"


// Constructors

// Since you can't set the string str later, the default constructor
// is just about useless.

Tokenizer::Tokenizer() {
   str = NULL ;
}

// Alternate constructor.  Useful.
//
Tokenizer::Tokenizer(char *input) {
   int length ;
 
   length = strlen(input) ;
   str = strdup(input) ;
   if (str == NULL) {
      fprintf(stderr, "Could not duplicate string for tokenizer\n") ;
      exit(1) ;
   }

   // check if string has trailing '\n'
   if (str[length - 1] == '\n') {
      length-- ;
      str[length] = '\0' ;
   }

   last = length - 1 ;
   SkipSpaces(0) ;  // initialize lookahead position
   pos = lookahead_pos ;
   looked = 0 ;
   lookahead.kind = UNDEF ;
}


// Destructor
//
Tokenizer::~Tokenizer() {
   free(str) ;
}


// Set lookahead position to next non-whitespace position 

void Tokenizer::SkipSpaces(int i) {

   while(isspace(str[i])) i++ ;
   lookahead_pos = i ;
}


/* Return the next token in the input string.  
   Just looking, do not consume the token.
   Assumptions:
     pos is the index of the beginning of the next token
        (leading spaces should have been skipped)

     last is the index of the last character of the input
        string ('\0' is beyond the last character).

     looked says whether we've looked ahead before.

     lookahead_pos, if we've looked ahead, points to
        the beginning of the token after the look ahead token

     lookahead is the lookahead token, if we've looked ahead
        before.
*/

token_t Tokenizer::LookAhead() {
   token_t token ;
   int val, i ;

   if (looked) return lookahead ; // Previously looked ahead? 

   // Look for the next token, mark that we looked ahead 
   looked = 1 ;
   i = pos ;

   // Bogus input position ?
   if (i < 0) {
      token.kind = UNDEF ;
      lookahead = token ;
      looked = 0 ;
      return token ;
   }

   // Past end of input ?
   if (pos > last) {
      token.kind = EOL ;
      lookahead = token ;
      lookahead_pos = i ;
      return token ;
   }

   // it's a decimal number
   if (isdigit(str[i])) {
      val = 0 ;
      do {
      val = 10*val + str[i] - '0' ;
      i++ ;
      } while (isdigit(str[i])) ;

      token.kind = NUMBER ;
      token.value = val ;
      lookahead = token ;
      SkipSpaces(i) ;
      return token ;
   }

   /* Single character cases */
   switch(str[i]) {
      case '+' : token.kind = PLUS ; break ;

      case '-' : token.kind = MINUS ; break ;

      case '*' : token.kind = TIMES ; break  ;

      case '/' : token.kind = DIVIDE ; break ;

      case '(' : token.kind = L_PAREN ; break ;

      case ')' : token.kind = R_PAREN ; break ;

      default :
         token.kind = UNDEF ;
         lookahead = token ;
         lookahead_pos = i ;
         return token ;
   }

   lookahead = token ;
   SkipSpaces(i+1) ;
   return token ;

}


/* Consume the next token.
   Read documentation for LookAhead().
*/

void Tokenizer::EatToken() {

   // Weird case: Eat a token without having seen it.
   if ( !looked) LookAhead() ; 

   pos = lookahead_pos ;
   looked = 0 ;
}   


// Print error message and point to where the syntax error occurred.

void Tokenizer::PrintError() {
   int i ;

   printf("%s\n", str) ;
   for (i = 0 ;  i < pos ; i++) {
      printf(" ") ;
   }
   printf("^--- syntax error\n") ;
}