
/************************************************************************
 *                      Myron Kennedy                                   *
 *                                                                      *
 *                      CMSC443                                         *
 *                                                                      *
 *                                                                      *
 *  This program uses Friedman's method to approximate the key length   *
 *  of a running key cipher.  It reads from an input file, gets a total *
 *  letter count, calculates the frequencies of the letters, finds the  *
 *  index of coincidence, and gives an approximate value for the key    *
 *  length (usually within +1 or -1 of actual keylength value).  Use    *
 *  "cc" to compile.                                                    * 
 *                                                                      *
 *         run format:  executable_file  input_file output_file         *
 *                                                                      *
 *  Results of the program are found in the output_file; the user does  *
 *  not have to provide any inputs for an interactive session.          *
 ************************************************************************/

#include <stdio.h>
#include <stdlib.h>
 
#define ALPHA_SIZE  26           /* # of letters in English alphabet */
   
  /* function prototypes */
void InitializeArrays (double *, double *);
void CharacterCount (FILE *, FILE *, double *);
void ShowFrequencies (FILE *, double *, double *);
double IndexOfCoincidence (FILE *, double, double *);
void GetKeyLength (FILE *, double, double);

main (int argc, char **argv)
{
  FILE  *Input, *Output;

  int j;
  double ltr_cnt,                /* total number of letters     */
         IC,                     /* the index of coincidence    */
         ch_count[ALPHA_SIZE],   /* stores count of each letter */
         freq[ALPHA_SIZE];       /* stores relative frequencies */

    /* check for correct command line format */
  if (argc != 3)
    {
     printf("\n\t Usage: %s input_file output_file\n", argv[0]);
     exit(1);
    }   

    /* open the input and output files */
  Input  = fopen(argv[1], "r");
  Output = fopen(argv[2], "w");

  InitializeArrays(ch_count, freq);
  CharacterCount(Input, Output, ch_count);
  ltr_cnt = 0;

    /* get total number of letters in file */
  for (j = 0; j < 26; j++)
     ltr_cnt = ltr_cnt + ch_count[j];
  fprintf(Output, "\n\n\t *** Total number of letters = %0.0lf ***", 
                   ltr_cnt);

    /* get a frequency count for each letter */
  for (j = 0; j < 26; j++)
    freq[j] = ch_count[j]/ltr_cnt;     
  ShowFrequencies(Output, ch_count, freq);
  IC = IndexOfCoincidence(Output, ltr_cnt, ch_count);
  GetKeyLength(Output, ltr_cnt, IC);

    /* close the input and output files */
  fclose(Input);
  fclose(Output);
}


/***********************  InitializeArrays  *****************************
 This function initializes all array values of its arguments to 0.
 ************************************************************************/ 
void InitializeArrays (double *cnt, double *freq)
{
  int i;
   
  for (i = 0; i < 26; i++)
    {
     cnt[i] = 0;
     freq[i] = 0;
    }
}


/**********************  CharacterCount  ********************************
 This function computes the number of occurrences for each letter found
 in an input file.  It also prints the original input file to the output
 file.
 ************************************************************************/
void CharacterCount (FILE *Input, FILE *Output, double *count)
{
  int ch;

  fprintf (Output, "\n\n This is the original input, read from a file:\n\n");
    
  while ((ch = getc(Input)) != EOF)
    {
      if (ch == 'a' || ch == 'A')
        count[0] = count[0] + 1;
      else if (ch == 'b' || ch == 'B')
        count[1] = count[1] + 1;
      else if (ch == 'c' || ch == 'C')
        count[2] = count[2] + 1;
      else if (ch == 'd' || ch == 'D')
        count[3] = count[3] + 1;
      else if (ch == 'e' || ch == 'E')
        count[4] = count[4] + 1;
      else if (ch == 'f' || ch == 'F')
        count[5] = count[5] + 1;
      else if (ch == 'g' || ch == 'G')
        count[6] = count[6] + 1;
      else if (ch == 'h' || ch == 'H')
        count[7] = count[7] + 1;
      else if (ch == 'i' || ch == 'I')
        count[8] = count[8] + 1;
      else if (ch == 'j' || ch == 'J')
        count[9] = count[9] + 1;
      else if (ch == 'k' || ch == 'K')
        count[10] = count[10] + 1;
      else if (ch == 'l' || ch == 'L')
        count[11] = count[11] + 1;
      else if (ch == 'm' || ch == 'M')
        count[12] = count[12] + 1;
      else if (ch == 'n' || ch == 'N')
        count[13] = count[13] + 1;
      else if (ch == 'o' || ch == 'O')
        count[14] = count[14] + 1;
      else if (ch == 'p' || ch == 'P')
        count[15] = count[15] + 1;
      else if (ch == 'q' || ch == 'Q')
        count[16] = count[16] + 1;
      else if (ch == 'r' || ch == 'R')
        count[17] = count[17] + 1;
      else if (ch == 's' || ch == 'S')
        count[18] = count[18] + 1;
      else if (ch == 't' || ch == 'T')
        count[19] = count[19] + 1;
      else if (ch == 'u' || ch == 'U')
        count[20] = count[20] + 1;
      else if (ch == 'v' || ch == 'V')
        count[21] = count[21] + 1;
      else if (ch == 'w' || ch == 'W')
        count[22] = count[22] + 1;
      else if (ch == 'x' || ch == 'X')
        count[23] = count[23] + 1;
      else if (ch == 'y' || ch == 'Y')
        count[24] = count[24] + 1;
      else if (ch == 'z' || ch == 'Z')
        count[25] = count[25] + 1; 

      putc(ch, Output);  /* show the original file read */
    }
}


/*************************  ShowFrequencies  ****************************
 This function prints the number of occurences for each letter and 
 their corresponding relative frequencies to the output file.
 ************************************************************************/
void ShowFrequencies (FILE *Output, double *count, double *frequency)
{ 
  char ch;
  int i;

  ch = 'A';
  fprintf(Output, "\n\n\t *** Probabilities of letter frequencies *** ");
  fprintf(Output, "\n\n\t\t Occurrences      Frequency");
  for (i = 0; i < ALPHA_SIZE; i++)
    {
     fprintf(Output, "\n\t\t   %c = %4.0lf", ch, count[i]);
     fprintf(Output, "        %lf", frequency[i]);
     ch++;
    }
}


/**********************  IndexOfCoincidence *****************************
 This function prints the index of coincidence for the given input file
 using Friedman's method to the output file and returns that value.
 ************************************************************************/
double IndexOfCoincidence (FILE *Output, double ch, double count[])
{
  int i;  
  double IC;
  
  
  for (i = 0; i < ALPHA_SIZE; i++)
    IC += (count[i] * (count[i] - 1))/(ch * (ch - 1));
 
  fprintf(Output, "\n\n\t *** Index of Coincidence = %lf", IC);
  return IC;
}


/**************************  GetKeyLength  ******************************
 This function prints the actual result that Friedman's formula computes
 to the output file and converts that value to an integer to show what 
 the possible key length could be.
 ************************************************************************/
void GetKeyLength (FILE *Output, double ch, double IC)
{
  double len;
  int key_length;
  
  len = (0.027 * ch)/(((ch - 1) * IC) - (0.038 * ch) + 0.065);
    /* round the value */ 
  key_length = (int)(len) + 1;   
  fprintf(Output, "\n\n\t Length using Friedman's formula = %lf", len);
  fprintf(Output, "\n\t ******************************************");
  fprintf(Output, "\n\t *             KEY LENGTH = %d             *",
                    key_length);     
  fprintf(Output, "\n\t ******************************************");
  fprintf(Output, "\n\n");
}


