/* Justin Reilly
 * Spring 1996
 * 4/28/97
 * Project 4
 

To compile: cc vigenere.c -o vigenere

Description:

This program works by dividing the cipher text into substrings and
then doing all possible shifts on each of those substrings.  For
each of these shifts, the absolute values of the percent a letter
should appear and the percent it does appear are summed.  The lowest
score is the additive inverse of the key letter in mod 26.  It does
this for keys from length 1 to length MAX_KEY_LENGTH which is set to
15.  Then to see which key length is english it checks the number
of common digrams.  This program uses floating point numbers, which
are slower, but more accurate.  It is also capable of encrypting files
and treating non-alphabetic characters in different ways.

*/



#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <math.h>

#define MAX_SIZE 2000             /* Maximum input size */
#define MAX_KEY_SIZE 50           /* Maximum key sixe */
#define TRUE 1
#define LF 10                     /* Linefeed character */   



/* table of letter frequencies from Stinson (table 1.1) */

float letter_freq[]={.082,.015,.028,.043,.127,.022,.020,.061,.070,.002,
		     .008,.040,.024,.067,.075,.019,.001,.060,.063,.091,
		     .028,.010,.023,.001,.020,.001};



/* 30 most common digrams, used for recognizing english text when
 * choosing correct keylength
 */

char *common_digrams[30]={"th","he","in","er","an","re","ed","on","es",
                          "st","en","at","to","nt","ha","nd","ou","ea",
                          "ng","as","or","ti","is","et","it","ar","te",
                          "se","hi","of"};


int _SKIP=0;               
int _ENCRYPT=0;
int _SWITCH=0;
int _NOFILE=0;

void usage();
void check_switch(char *arg);
void encrypt(FILE *file1, FILE *file2);
void decrypt(FILE *file1, FILE *file2);
void getkey(char key[],int *keylen);
char *find_key(char key[],char new_text[],int keysize,int letters);  
int check_intervals(int start, char *text,int letters,int keylen);
int smallest_one(float array[],int size);
float freq_score(char *text, int shift);
int is_common_digram(char a, char b);
void _decrypt(char *key, int keylen,char *text,char *output); 
int count_digrams(char *text);
int y_or_n(char *prompt);  

main(int argc,char *argv[])  
{
 FILE *infile,*outfile;



  if(argc==1||argc>5) usage();          /* Check if arguments are correct */
  check_switch(argv[1]);

  if(!argv[1+_SWITCH]) usage();                       /* Open files */
  if((infile=fopen(argv[1+_SWITCH],"r"))==NULL){
    printf("Error opening input file.\n");
    exit(0);
  }

  if(argv[2+_SWITCH]){
    if((outfile=fopen(argv[2+_SWITCH],"w"))==NULL){
      printf("Error opening output file.\n");
      exit(0);
    }
  }else{
    outfile=stdout;  
    _NOFILE=1;
  }

  if(_ENCRYPT)
    encrypt(infile,outfile);
  else decrypt(infile,outfile);

  fclose(infile);
  fclose(outfile);

}







/* Prints error message and exits */

void usage()
{
  fprintf(stderr,"Usage: ");
  fprintf(stderr,"vigenere [-es] infile outfile\n\n");
  fprintf(stderr,"\t-e   encrypt infile and write to outfile (default \n");
  fprintf(stderr,"\t     mode is to decrypt infile)\n");
  fprintf(stderr,"\t-s   skips nonalphabetic characters but advances key\n");
  fprintf(stderr,"\t     position (default mode is to ignore nonalphabetic");
  fprintf(stderr," charcters)\n\n");
  exit(0);
}



/* Checks to see if the -e or -s options are used and sets global variables
 * _SKIP and _ENCRYPT 
 */

void check_switch(char *arg)
{
  if(arg[0]=='-'){
    _SWITCH=1;
    if(strchr(arg,'e')){
      _ENCRYPT=1;
      if(strlen(arg)==2) return;
     }
    if(strchr(arg,'s')){
      _SKIP=1;
     }
    else usage();
  }
}



/* encrypt encrypts file1 and outputs it to file2.  If file2 is not
 * specified the file is written to stdout.  This function prompts
 * the user for the keyword
 */


void encrypt(FILE *file1,FILE *file2)
{
  int nch,keylen,keypos=0,ch;
  char key[MAX_KEY_SIZE];
  

  getkey(key,&keylen);

  while((nch=getc(file1))!=EOF) { 
    if(isalpha(nch)){                   /* encrypt alphabetic characters */
     ch=tolower(nch)-'a';
     ch+=(tolower((key[keypos]))-'a');
     ch%=26;
     ch+='a';
     if(isupper(nch))
       putc(toupper(ch),file2);
     else putc(ch,file2);
     keypos++;
     
   }
    else{                            /* put other characters in ouput file */ 
      putc(nch,file2);
      if(_SKIP && nch!=LF) keypos++;
    }
    keypos%=keylen;
  }
}  
    


/* getkey prompts the user for a key.  It is rejected if it contains
 * nonalphabetic characters.  Keylen is set to whatever the length of
 * the key is
 */   

void getkey(char  key[],int *keylen)
{
  int i,valid_key=1;
  fflush(stdin);
  printf("Enter key: ");        /* get key and make sure that it is */
  while(TRUE){                  /* all letters                      */
    scanf("%sMAX_KEY_SIZE",key);
    *keylen=strlen(key);
    for(i=0;i<*keylen;i++)
      if(!isalpha(key[i])) valid_key=0;
    if(valid_key) break;
    printf("Key must be all letters\nRe-enter key: ");
    valid_key=1;
  }
}


/* decrypt is the main decryption loop.  It reads the ciphertext into a
 * string, then it gets a list of the best key for each key length.  It
 * picks the best one of these by which one has the most dcommon digrams
 */


void decrypt(FILE *file1,FILE *file2)
{
  int i=0,nch,letters=0,j=0,len;
  char cipher_text[MAX_SIZE],new_text[MAX_SIZE];
  char keylist[MAX_KEY_SIZE][MAX_KEY_SIZE];
  float digrams[MAX_KEY_SIZE];
  char output[MAX_SIZE];

  while((nch=getc(file1))!=EOF &&i<MAX_SIZE-1){  /* read text from file */ 
    cipher_text[i]=nch;    
    i++;               
  }
  cipher_text[i]='\0';

  i=0;

  /* copy original ciphertext to a new string.  This is the string that
   * calculations will be done with.  The original is kept so that output
   * will match the input exactly.
   */
  
  while(nch=cipher_text[i++]){
    if(isalpha(nch))
      new_text[j++]=tolower(nch);
    else if(_SKIP){
      if(nch!=LF)
	new_text[j++]=nch;
    }
  }
  new_text[j]='\0';
  letters=j-1;

  


  /* get a list of best keys for each keylength */

  for(i=0;i<MAX_KEY_SIZE;i++){         /* go through possible keys */
    find_key(keylist[i],new_text,i+1,letters);
    _decrypt(keylist[i],i+1,new_text,output);
    digrams[i]=200-(float)count_digrams(output);
  }


  /* the digram number is a float so that smallest_one will work with 
   * it.  It is subtracted from 200 so that the largest number will  
   * become the smallest.
   */
 
  i=smallest_one(digrams,MAX_KEY_SIZE); /*find one with most digrams */

  printf("Keylength: %d\n",i+1);
  printf("Key: %s\n",keylist[i]);

  _decrypt(keylist[i],i+1,cipher_text,output);

   printf("Decrypted message:\n\n%s\n",output);
  
   /* ask the user if any changes need to be made to the
    * key.  If the keylength is large in comparison to
    * the size of the cipher text letters will be wrong
    */ 
   
   while(TRUE){
     if(y_or_n("Change key (y/n):")){
       getkey(keylist[0],&len);
       /*       printf("%s %n",keylist[0],len);   */
       _decrypt(keylist[0],len,cipher_text,output);
       printf("%s\n",output);
     }else break;
   }

   if(y_or_n("Save decrypted file (y/n):")){
     if(_NOFILE) printf("No output file was specified, printing to stdout...\n\n");
     fprintf(file2,"%s\n",output);
   }
       

}



/* findkey finds the best key for a certain keylength */

char *find_key(char key[],char new_text[],int keysize,int letters)
{
  int i;
  
  for(i=0;i<keysize;i++){   
    key[i]=((26-check_intervals(i,new_text,letters,keysize))%26)+'a'; 
   }
  key[keysize]='\0';
  return(key);
}


/* check_intervals splits the string up into substrings and then
 * does frequency checks on all of them.  It returns an integer
 * between zero and 25
 */

int check_intervals(int start, char *text,int letters,int keylen)
{
  char temp[MAX_SIZE];
  float all_choices[26];
  int j=0,i=0;

  while(start<=letters){       /* temp is a string starting with start and */
    temp[j]=text[start];       /* ten taking each character keylen apart   */  
    start+=keylen;
    j++;
  }
  temp[j]='\0';


  for(j=0;j<26;j++){         /* get the frequency score for each shift */
    all_choices[j]=freq_score(temp,j);
    /*      printf("%c %f ",j+'a',all_choices[j]); */ 
  }

  return(smallest_one(all_choices,26));  

}


/* smallest_one just returns the smallest number from an array */

int smallest_one(float array[],int size)
{
  float smallest;
  int i=0,j;
 
  smallest=array[0];
  for(j=1;j<size;j++){
    if(array[j]<smallest){
      smallest=array[j];
      i=j;
    }
  }
  return(i);
}


/* frequency score takes a string and shifts each alphabetic
 * character by shift.  It subtracts the percent a letter
 * should occur from the percent it actually does
 */  

float freq_score(char *text, int shift)
{
        int i=0,ch,total_letters=0,freq_array[26];
        float sum=0,partial,letters=0;
 
 
        for(i=0;i<26;i++) freq_array[i]=0;
	i=0;
        
        while(ch=text[i++]){
	  if(isalpha(ch)){
	    freq_array[(((ch-'a')+shift)%26)]++;
	    letters++;
	  }
	}
 
        for(i=0;i<26;i++){
          partial=fabs(letter_freq[i]-(freq_array[i]/letters));
          sum+=partial;
        }
 
 
        return(sum);
}



/* is_common_digram checks if two characters, a and b, are a common 
 * digram
 */

int is_common_digram(char a, char b)
{
  int i;
  for(i=0;i<30;i++){
    if(a==common_digrams[i][0] && b==common_digrams[i][1])
      return(1);
  }
  return(0);
}



/* count_digrams counts the common digrams in a string */

int count_digrams(char *text)
{
  int count=0,i=1;
 
  while(text[i]){
    if(is_common_digram(text[i-1],text[i]))
       count++;
       i++;
  }
  return(count);
 
}
 

/* _decrypt decrypts a string with a certain key.   The decrypted
 * string is in output
 */

void _decrypt(char *key, int keylen,char *text,char *output)
{
  int i=0,keypos=0,keych;
  char ch,tmp,nch,kch;

  while((ch=text[i])!=0){
    if(isalpha(ch)){
      nch=tolower(ch)-'a';
      kch=(26-(key[keypos]-'a'))%26;
      tmp=((nch+kch))%26+ 'a';
      if(isupper(ch)) 
	output[i]=toupper(tmp);
      else output[i]=tmp;
      
      keypos++;
    }else{
      output[i]=ch;

      if(_SKIP && ch!=LF) keypos++;
    }
    keypos%=keylen;
    i++;
  }
  output[i]='\0';


 
}

/* y_or_n prompts the user for a yes or no response */

int y_or_n(char *prompt)
{
  char ch;
  while(TRUE){
    fflush(stdin);
    printf("%s ",prompt);
    ch=getc(stdin);
    if(tolower(ch)=='y') return(1);
    if(tolower(ch)=='n') return(0);
  }
}
























