/*--------------------------------------------------------------------------
Fred Engel, Spring 1996
CMSC443 Cryptology
Project 4 (Break a Vigenere cipher)

file: oscar.c

purpose: Take a text file that was encrypted using the Vigenere method and
  produce the corresponding plaintext.

compilation: requires 'cc oscar.c -o oscar -lm' to include the math.h file

use: 

  User types oscar to start the program.  He is prompted for the input
file, the maximum keylength to try (it can be changed later if not correct),
and method for shift a key character (on alphas only, printables only, or
all chars).  The user then has a menu to choose operations from.  Usually,
the user will select autosolve, which will automatically call menu items
2 (find keylength), 3 (find relative key), 4 (find actual key), and 5
(display plaintext) in order. Then the user might select 7 to save the 
plaintext to a file.

  If the plaintext does not look correct, the user can change the options to
try for a larger keylength or possibly to see if another key-shift method
was used.

  Alternatively, the user can select 2, 3, and 4 seperately to change what
the program has determined to be the correct answer.

limitations:

The maximum keylength the program can try is 300. If the user enters a max
to try that is higher than 300, the program sets it to 300. Also, the max
that the user selects helps to speed up the search, but the program will 
still statically allocate arrays based on 300.

There are only three key-shift methods provided: shift on alpha only,
printable (does not include spaces, tabs, etc.) only, or all (except newline)
characters.  A new method would have to be added to the program if the user 
wanted a different method.

The ciphertext to decrypt must have a ratio of about 40 to 50 character for
each key character in order for the program to get usable values out of the
statistics it collects on the ciphertext.  Ratio's close to that but still
too small may still provide some useful information.

---------------------------------------------------------------------------*/

#define MAXKEYLENGTH 300 /* used to allocate arrays */
#define COLTOTAL 26      /* used to reference column totals
                            in char_freq array */

#define AUTO 0   /* indicates if user selected autosolve */
#define MANUAL 1 /* indicates if user did not select autosolve */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

/* function declarations - see definitions for details */
int get_options();               
int initialize(int[], int[], int*);
int get_selection();
int get_keylength(int);
void sort(float[], int[]);
void get_keyshift(int, int[]);
int get_the_key(int, int[], int[]);
int get_conv_char(int, int, int, int);
void display_ciphertext();
void display_plaintext(int, int[]);
void save_plaintext(int, int[]);

/* holds the known English character frequencies */
int norm_char_freq[26] = {82, 15, 28, 43, 127, 22, 20, 61, 70, 2, 8, 40, 24,
			  67, 75, 19, 1, 60, 63, 91, 28, 10, 23, 1, 20, 1};

FILE* infile;           /* the input file pointer */
char infilename[40];    /* the input file name */
int when_to_shift = 1;  /* indicate key-shift-method */
int max_keylength = 50; /* indicate user-selected maximum keylength */

/*---------------------------------------------------------------------------
function: void main()
purpose:  control flow of the program
given:    no data passed in.
returns:  no value returned.
---------------------------------------------------------------------------*/
int main()
{
  int selection;             /* indicates user's menu selection */
  int keylength = 0;         /* the keylength */
  int rel_key[MAXKEYLENGTH]; /* the relative key */
  int key[MAXKEYLENGTH];     /* the actual key */
  int i;                     /* used for loop control */
  
  printf("\nStarting Oscar - an automated Vigenere decryption tool.\n");
  keylength = initialize(rel_key, key, &max_keylength);
  while((selection = get_selection()) != 0) {
    switch(selection) {
    case 1:
      keylength = get_keylength(AUTO);
      get_keyshift(keylength, rel_key);
      keylength = get_the_key(keylength, rel_key, key);
      display_plaintext(keylength, key);
      break;
    case 2: 
      keylength = get_keylength(MANUAL); 
      break;
    case 3: 
      get_keyshift(keylength, rel_key); 
      break;
    case 4: 
      keylength = get_the_key(keylength, rel_key, key); 
      break;
    case 5: display_plaintext(keylength, key); break;
    case 6: save_plaintext(keylength, key); break;
    case 7: keylength = initialize(rel_key, key, &keylength); break;
    case 8: display_ciphertext(); break;
    case 9: get_options(); break;
    default: break;
    }
    /* display current values to user */
    printf("\n\nKeylength currently set to: %d\n", keylength);
    printf("\nRelative key currently set to: ");
    for(i = 0; i < keylength; i++)
      if(rel_key[i] < 0)
	printf("_");
      else
	printf("%c", (char)(((rel_key[i]) % 26) + 65));
    printf("\n");
    printf("\nEncryption key currently set to: ");
    for(i = 0; i < keylength; i++)
      if(key[i] < 0)
	printf("_");
      else
	printf("%c", (char)(((26 - key[i]) % 26) + 65));
    printf("\n");
    printf("\nDecryption key currently set to: ");
    for(i = 0; i < keylength; i++)
      if(key[i] < 0)
	printf("_");
      else
	printf("%c", (char)((key[i] % 26) + 65));
    printf("\n");
  }
  printf("\nOscar Terminated\n");
  return 0;
}

/*---------------------------------------------------------------------------
function: int get_options()
purpose:  allow the user to set two options: maximum keylength to try and
  key-shift method.
given:    no data passed in.
returns:  no value returned.
---------------------------------------------------------------------------*/
get_options()
{
  int input;

  printf("\nWhat is the maximum keylength to try (up to 300): ");
  scanf("%d", &max_keylength);
  /* max_keylength must between between 1 and 300 */
  if(max_keylength > 300)
    max_keylength = 300;
  else 
    if(max_keylength < 1)
      max_keylength = 10;
  printf("\nWhen should a key character be shifted?\n\n");
  printf("1. Alpha characters only\n");
  printf("2. All printable characters\n");
  printf("3. All characters except new-line\n");
  printf("\nCurrent setting: %d, Enter a new setting: ", when_to_shift);
  scanf("%d", &when_to_shift);
  /* default to 1 if invalid input */
  if((when_to_shift < 1) || (when_to_shift > 3))
    when_to_shift = 1;
}

/*---------------------------------------------------------------------------
function: int initialize(int rel_key[], int key[], int* keylength)
purpose:  Get a new ciphertext file to process and get new options for it.
given:    int rel_key[] - relative key array gets initialized to -1s
          int key[] - key array gets initialized to -1s
          int* keylength - keylength gets initialized to 0.
---------------------------------------------------------------------------*/
int initialize(int rel_key[], int key[], int* keylength)
{
  int i;
  
  printf("\nEnter the filename of the ciphertext: ");
  scanf("%s", infilename);
  if((infile = fopen(infilename, "r")) == NULL) {
    printf("\nERROR: unable to open %s\n", infilename);
  }
  fclose(infile);
  for(i = 0; i < MAXKEYLENGTH; i++) {
    rel_key[i] = -1;
    key[i] = -1;
  }
  *keylength = 0;
  get_options();
  return 0;
}

/*--------------------------------------------------------------------------
function: int get_selection()
purpose:  get user input for the main menu
given:    no data passed in
returns:  the user selection
--------------------------------------------------------------------------*/
int get_selection()
{
  char input[80];
  int selection;
  int input_not_valid = 1;
  
  while(input_not_valid) {
    printf("\n\t1. Auto Solve (recommended first)\n");
    printf("\t2. Find Keylength\n");
    printf("\t3. Find Relative Key\n");
    printf("\t4. Find Actual Key\n");
    printf("\t5. Display Plaintext\n");
    printf("\t6. Save Plaintext\n");
    printf("\t7. Load New Ciphertext\n");
    printf("\t8. Display Ciphertext\n");
    printf("\t9. Set Options\n");
    printf("\tX. Exit Oscar\n\n");
    printf("Enter selection: ");
    scanf("%s", input);
    if((input[0] == 'x') || (input[0] == 'X')) {
      printf("\nEXIT OSCAR - Are you sure (y/n)? ");
      scanf("%s", input);
      if ((input[0] == 'y') || (input[0] == 'Y')) {
	selection = 0;
	input_not_valid = 0;
      }
      else
	input_not_valid = 1;
    }
    else
      if(((int)input[0] > 48) && ((int)input[0] < 58)) {
	selection = ((int)input[0]) - 48;
	input_not_valid = 0;
      }
      else {
	printf("\nERROR ON INPUT: Enter 1-7 or X.\n");
	input_not_valid = 1;
      }
  }
  return selection;
}

/*--------------------------------------------------------------------------
function: void display_ciphertext()
purpose:  display the ciphertext input file
given:    no data passed in
returns:  no value returned
--------------------------------------------------------------------------*/
void display_ciphertext()
{
  char c;
  int ch;
  char input[80];
  int all_count = 0;
  int printable_count = 0;
  int alpha_count = 0;
  
  if((infile = fopen(infilename, "r")) == NULL) {
    printf("\nOSCAR ERROR - unable to open input file\n");
    fclose(infile);
    return;
  }
  printf("\nThe ciphertext is:\n");
  while(fscanf(infile, "%c", &c) != EOF) {
    printf("%c", c);
    ch = (int)c;
    if(c != '\n')
      all_count++;
    if(ch > 32)
      printable_count++;
    if( ((ch > 64) && (ch < 91)) || ((ch > 96) && (ch < 123)) )
      alpha_count++;  
  }
  fclose(infile);
  printf("\n\n%d total chars, %d printable, %d alpha\n",
         all_count, printable_count, alpha_count);
  printf("\nEnter 'c' to continue: ");
  scanf("%s", input);
}

/*--------------------------------------------------------------------------
function: void display_plaintext(int keylength, int key[])
purpose:  display the plaintext on the screen
given:    int keylength - length of the key
          int key[] - the key itself
returns:  no value returned.
--------------------------------------------------------------------------*/
void display_plaintext(int keylength, int key[])
{
  char c;
  int ch;
  int current_pos;
  int i;
  char input[80];
  int index;
  
  if(keylength == 0) {
    printf("\nCANNOT DISPLAY - find a keylength first.\n");
    return;
  }
  else
    for(i = 0; i < keylength; i++)
      if(key[i] == -1) {
	printf("\nTHE KEY IS NOT COMPLETE - an underscore (_)\n");
	printf("will be displayed where a blank key appears\n");
	printf("\nEnter 'c' to continue: ");
	scanf("%s", input);
	break;
      }
  if((infile = fopen(infilename, "r")) == NULL) {
    printf("\nOSCAR ERROR - unable to open input file!\n");
    fclose(infile);
    return;
  }
  current_pos = 0;
  printf("\nThe plaintext is:\n");
  while(fscanf(infile, "%c", &c) != EOF) {
    ch = (int)c;
    if((key[current_pos] == -1) && (c != '\n'))
      printf("_");
    else
      if((ch > 64) && (ch < 91)) {
	index = (((ch - 65) + key[current_pos]) % 26) + 65;
	printf("%c", (char)(index));
	current_pos = (current_pos + 1) % keylength;
      }
      else
	if((ch > 96) && (ch < 123)) {
	  index = (((ch - 97) + key[current_pos]) % 26) + 97;
	  printf("%c", (char)(index));
	  current_pos = (current_pos + 1) % keylength;
	}
	else {
	  printf("%c", c);
          if( ((when_to_shift == 3) && (c != '\n')) ||
              ((when_to_shift == 2) && ((int)c > 32)) ) 
	    current_pos = (current_pos + 1) % keylength;
	}
  }
  fclose(infile);
  printf("\n\nEnter 'c' to continue: ");
  scanf("%s", input);
}

/*--------------------------------------------------------------------------
function: void save_plaintext(int keylength, int key[])
purpose:  Save the plaintext to a file
given:    int keylength - the length of the key
          int key[] - the key itself
returns:  no value returned.
--------------------------------------------------------------------------*/
void save_plaintext(int keylength, int key[])
{
  char c;
  int ch;
  int current_pos;
  int i;
  char input[80];
  int index;
  char outfilename[80];
  FILE* outfile;
  
  if(keylength == 0) {
    printf("\nCANNOT SAVE - find a keylength first.\n");
    return;
  }
  else
    for(i = 0; i < keylength; i++)
      if(key[i] == -1) {
	printf("\nTHE KEY IS NOT COMPLETE - an underscore (_)\n");
	printf("will be displayed where a blank key appears\n");
	printf("\nEnter 'c' to continue: ");
	scanf("%s", input);
	break;
      }
  printf("\nEnter the name of the output file: ");
  scanf("%s", outfilename);
  if((outfile = fopen(outfilename, "w")) == NULL) {
    printf("OSCAR ERROR - unable to open output file\n");
    fclose(outfile);
    return;
  }
  if((infile = fopen(infilename, "r")) == NULL) {
    printf("\nOSCAR ERROR - unable to open input file\n");
    fclose(infile);
    return;
  }
  current_pos = 0;
  printf("Saving %s...\n", outfilename);
  while(fscanf(infile, "%c", &c) != EOF) {
    ch = (int)c;
    if((key[current_pos] == -1) && (c != '\n'))
      fprintf(outfile, "_");
    else
      if((ch > 64) && (ch < 91)) {
	index = (((ch - 65) + key[current_pos]) % 26) + 65;
	fprintf(outfile, "%c", (char)(index));
	current_pos = (current_pos + 1) % keylength;
      }
      else
	if((ch > 96) && (ch < 123)) {
	  index = (((ch - 97) + key[current_pos]) % 26) + 97;
	  fprintf(outfile, "%c", (char)(index));
	  current_pos = (current_pos + 1) % keylength;
	}
	else {
	  fprintf(outfile, "%c", c);
	  if ( ((when_to_shift == 3) && (c != '\n')) ||
	       ((when_to_shift == 2) && (ch > 32)) )
	    current_pos = (current_pos + 1) % keylength;
	}
  }
  fclose(infile);
  fclose(outfile);
  printf("\tSave complete\n");
}

/*--------------------------------------------------------------------------
function: int get_the_key(int keylength, int rel_key[], int key[])
purpose:  determine the actual key given the relative key
given:    int keylength - the length of the key
          int rel_key[] - the relative key
          int key[] - where to put the computed key
--------------------------------------------------------------------------*/
int get_the_key(int keylength, int rel_key[], int key[])
{
  int this_char_freq[26];
  int current_pos;
  int i, j;
  char c;
  int ch;
  int count;
  int index;
  float temp;
  float variance;
  float char_dev;
  float best_char_dev = 99;
  int best_shift;

  if(keylength == 0) {
    printf("\nCANNOT COMPUTE KEY - find a keylength first!\n");
    return 0;
  }
  printf("\nOscar is building the key based on relative key: ");
  for(i = 0; i < keylength; i++)
    printf("%c", (char)(rel_key[i] + 65));
  printf("...\n");
  infile = fopen(infilename, "r");
  for(i = 0; i < 26; i++) {
    for(j = 0; j < 26; j++)
      this_char_freq[j] = 0;
    current_pos = 0;
    /* read the file */
    fseek(infile, 0, SEEK_SET);
    count = 0;
    while(fscanf(infile, "%c", &c) != EOF) {
      ch = (int)c;
      if((ch > 64) && (ch < 91)) {
	index = get_conv_char(ch, 65, rel_key[current_pos], i);
	this_char_freq[index]++;
	count++;
	current_pos = (current_pos + 1) % keylength;
      }
      else
	if((ch > 96) && (ch < 123)) {
	  index = get_conv_char(ch, 97, rel_key[current_pos], i);
	  this_char_freq[index]++;
	  count++;
	  current_pos = (current_pos + 1) % keylength;
	}
	else
	  if( ((when_to_shift == 3) && (c != '\n')) ||
              ((when_to_shift == 2) && (ch > 32)) )
	    current_pos = (current_pos + 1) % keylength;
    }
    /* compute the standard deviation on character frequencies */
    variance = 0;
    for(j = 0; j < 26; j++) {
      temp = (((float)norm_char_freq[j] / 1000) -
	      ((float)this_char_freq[j] / count));
      variance += (temp * temp);
    }
    char_dev = sqrtf(variance);
    /* save the best standard deviation */
    if(char_dev < best_char_dev) {
      best_char_dev = char_dev;
      best_shift = i;
    }
  }
  fclose(infile);
  /* set the key */
  for(i = 0; i < keylength; i++) {
    key[i] = (rel_key[i] + (26 - best_shift)) % 26;
  }
  for(i = 0; i < keylength; i++) {
    key[i] = (26 - key[i]) % 26;
  }
  printf("\tKey complete\n");  
  return keylength;
}

/*--------------------------------------------------------------------------
function: int get_conv_char(int ch, int offset, int rel_key, int shift)
purpose:  convert a character based on an offsest, relative key, and shift
given:    int ch - the character to convert
          int offset - 65 (uppercase) or 97 (lowercase)
          int rel_key - the relative key character
          int shift - the current shift try on the relative key character
returns:  the converted character
--------------------------------------------------------------------------*/
int get_conv_char(int ch, int offset, int rel_key, int shift)
{
  int orig_char;
  int key_char;

  orig_char = ch - offset;
  key_char = 26 - rel_key + shift;
  return (orig_char + key_char) % 26;
}
  

/*--------------------------------------------------------------------------
function: void get_keyshift(int keylength, int key[])
purpose:  determine the relative keyshift given an accurate keylength
given:    int keylength - the length of the key
          int key[] - where to put the relative key
returns:  no value returned.
--------------------------------------------------------------------------*/
void get_keyshift(int keylength, int key[])
{
  float mic;
  int rel_shift[MAXKEYLENGTH][MAXKEYLENGTH];
  int char_freq[27][MAXKEYLENGTH];
  int current_col;
  char c;
  int ch;
  int i, j, k, g;
  
  if(keylength == 0) {
    printf("\nCANNOT COMPUTE RELATIVE KEY - find a keylength first.\n");
    return;
  }
  printf("\nOscar is building a relative key based on keylength %d...\n",
	 keylength);
  for(i = 0; i < 27; i++)
    for(j = 0; j < keylength; j++)
      char_freq[i][j] = 0;
  /* read the file */
  infile = fopen(infilename, "r");
  current_col = 0;
  while(fscanf(infile, "%c", &c) != EOF) {
    ch = (int)c;
    if((ch > 64) && (ch < 91)) {
      char_freq[ch - 65][current_col]++;
      current_col = (current_col + 1) % keylength;
    }
    else
      if((ch > 96) && (ch < 123)) {
	char_freq[ch - 97][current_col]++;
	current_col = (current_col + 1) % keylength;
      }
      else
        if( ((when_to_shift == 3) && (c != '\n')) ||
            ((when_to_shift == 2) && (ch > 32)) )
	  current_col = (current_col + 1) % keylength;
  }
  fclose(infile);
  /* calculate the column totals */
  for(i = 0; i < 26; i++)
    for(j = 0; j < keylength; j++)
      char_freq[COLTOTAL][j] += char_freq[i][j];
  /* initialize the rel_key array */
  for(i = 0; i < keylength; i++) {
    for(j = 0; j < keylength; j++)
      rel_shift[i][j] = -1;
    key[i] = -1;
  }
  /* compute the mutual ic's */
  for(i = 0; i < keylength - 1; i++)
    for(j = i + 1; j < keylength ; j++)
      for(g = 26; g > 0; g--) {
	mic = 0;
	for(k = 0; k < 26; k++) {
	  mic += (float)char_freq[k][i] * (float)(char_freq[(k+g)%26][j]);
	}
	mic = mic / ((float)char_freq[COLTOTAL][i] *
		     (float)char_freq[COLTOTAL][j]);
	if(mic > .06) {
	  if(rel_shift[i][j] == -1)
	    rel_shift[i][j] = g;
	  else
	    if(rel_shift[i][j] != -2)
	      rel_shift[i][j] = -2;
	}
      }
  /* check to see which mutual ic's are useful */
  key[0] = 0;
  for(i = 0; i < keylength - 1; i++)
    for(j = i + 1; j < keylength; j++) {
      if(rel_shift[i][j] > -1) {
	if((key[i] != -1) && (key[j] == -1)) {
	  key[j] = (key[i] + rel_shift[i][j]) % 26;
	}
	else
	  if((key[i] == -1) && (key[j] != -1)) {
	    key[i] = (key[j] - rel_shift[i][j] + 26) % 26;
	  }
      }
    }
  printf("\tRelative Key complete\n");
}

/*--------------------------------------------------------------------------
function: int get_keylength(int methdo)
purpose:  determine the keylength that was used on the ciphertext
given:    int method - was AUTO or MANUAL (allows user to select) selected
returns:  int represented the keylength
--------------------------------------------------------------------------*/
int get_keylength(int method)
{
  float ic[MAXKEYLENGTH];
  int char_freq[27][MAXKEYLENGTH];
  float lowest_ics[MAXKEYLENGTH];
  int lowest_ics_keylength[MAXKEYLENGTH];
  float lowest_ic;
  int keylength = 0;
  int found_length = 0;
  int current_col;
  char c;
  int ch;
  char input[80];
  int i, j;

  if((infile = fopen(infilename, "r")) == NULL) {
    printf("\nOSCAR ERROR - unable to open input file\n");
    fclose(infile);
    return 0;
  }
  printf("\nOscar is checking for the probable keylength...\n");
  for(keylength = 1; keylength <= max_keylength; keylength++) {
    if((keylength % 50) == 0)
      printf("\tOscar has checked up to keylength %d\n", keylength);
    for(i = 0; i < 27; i++)
      for(j = 0; j < keylength; j++) {
	char_freq[i][j] = 0;
	ic[j] = 0;
      }
    /* read the file */
    fseek(infile, 0, SEEK_SET);
    current_col = 0;
    while(fscanf(infile, "%c", &c) != EOF) {
      ch = (int)c;
      if((ch > 64) && (ch < 91)) {
	char_freq[ch - 65][current_col]++;
	current_col = (current_col + 1) % keylength;
      }
      else
	if((ch > 96) && (ch < 123)) {
	  char_freq[ch - 97][current_col]++;
	  current_col = (current_col + 1) % keylength;
	}
        else
          if( ((when_to_shift == 3) && (c != '\n')) ||
              ((when_to_shift == 2) && (ch > 32)) )
	    current_col = (current_col + 1) % keylength;
    }
    /* compute the column totals */
    for(i = 0; i < 26; i++)
      for(j = 0; j < keylength; j++)
	char_freq[COLTOTAL][j] += char_freq[i][j];
    /* compute the ic numerators */
    for(i = 0; i < 26; i++)
      for(j = 0; j < keylength; j++)
	if(char_freq[i][j] > 1)
	  ic[j] += (float)char_freq[i][j] * (float)(char_freq[i][j] - 1);
    /* compute the ic's and store the lowest ic of the group */
    lowest_ic = 1;
    for(i = 0; i < keylength; i++) {
      ic[i] = ic[i] / ((float)char_freq[COLTOTAL][i] *
		       (float)(char_freq[COLTOTAL][i] - 1));
      if(ic[i] < lowest_ic)
	lowest_ic = ic[i];
    }
    lowest_ics[keylength-1] = lowest_ic;
    lowest_ics_keylength[keylength-1] = keylength;
  }
  fclose(infile);
  /* sort the lowest ics to find the largest of the lowest one */
  sort(lowest_ics, lowest_ics_keylength);
  /* if AUTO then return the keylength with best ic */
  if(method == AUTO) {
    printf("\tKeylength complete\n");
    return lowest_ics_keylength[0];
  }
  /* if MANUAL - report findings to user and let user select */
  printf("\nA valid IC should be near .065\n");
  printf("These are the best from this ciphertext:\n\n");
  printf("Keylength      Lowest IC in Group\n");
  for(i = 0; i < 10; i++)
    printf("%5d\t\t  %.4f\n", lowest_ics_keylength[i], lowest_ics[i]);
  if(lowest_ics[0] < .05)
    printf("\nNo recommended keylength!\n");
  else
    printf("\nRecommended keylength is %d\n", lowest_ics_keylength[0]);
  printf("\nEnter desired keylength: ");
  scanf("%d", &keylength);
  return keylength;
}

/*--------------------------------------------------------------------------
function: void sort(float lowest_ics[], int lowest_ics_keylength[])
purpose:  sort the ICs in descending order
given:    float lowest_ics[] - the ic's
          int lowest_ics_keylength - the corresponding keylengths for ic's
returns:  no value returned.
--------------------------------------------------------------------------*/
void sort(float lowest_ics[], int lowest_ics_keylength[])
{
  int temp;
  float tempf;
  int i, j;

  for(i = 0; i < max_keylength - 1; i++)
    for(j = i + 1; j < max_keylength; j++)
      if(lowest_ics[j] > lowest_ics[i]) {
	tempf = lowest_ics[j];
	lowest_ics[j] = lowest_ics[i];
	lowest_ics[i] = tempf;
	temp = lowest_ics_keylength[j];
	lowest_ics_keylength[j] = lowest_ics_keylength[i];
	lowest_ics_keylength[i] = temp;
      }
}



