// mipsasm.cpp  a very simple MIPS architecture assembler (incomplete)
// v1.1 opcode for cmpl and sll changed for cs411
// v1.2 opcode for beq, addi, cmpl changed for cs411
// v1.3 opcode for cmpl, and, addi changed for cs411
// v1.31 fix istrstream to istringstream, no longer compiled
// v1.4 opcode for cmpl, sll, srl changed for cs411
// v1.5 opcode change for fall05
// v1.5 opcode change for fall06
// v1.6 opcode change for spring07
// v1.7 opcode change for fall07
// v1.8 opcode change for spring08
// v1.9 opcode change for fall08 add div
// v1.10 opcode change for spring09 mul23,or12,beq33 use or for and
// v1.11 opcode change for fall09  use "and" for "or" in ALU
// v1.12 opcode change for spring 2010
// v1.13 opcode change for fall 2010
//
// mipsasm input_file output_file
//         input_file - typically *.asm is free format except labels must
//                      start in column one
//         output_file - typically *.abs is hex_address hex_content source_line
//
//  label: op reg,offs(xreg)  one of many formats
//         op reg,reg,reg     another format
//
//            op  is a mips opcode
//            reg can be simple number 0 to 31, $0 to $31, $sp, $t7 etc
//            ":"  "," "("  ")" "\t" are all converted to space so you
//            can enter any line just space separating fields
//
//            comments OK after blank after last possible field
//            //  /*  --  #  ;   make rest of line a comment
//
// compile  g++ -o mipsasm  mipsasm.cpp
//      or  cl /GX /ML mipsasm.cpp

#include <fstream>                   // for file I/O
#include <sstream>                   // for strings, istrstream
#include <iostream>                  // for basic cout
#include <string>                    // for input buffer
#include <set>                       // for lookup tables
#include <cctype>                    // isdigit
using namespace std;                 // bring standard names into scope

static void tblbuild();              // build regtab and optab
static void hexout(fstream &out_file, int &val);
static int  get_addr(string &field);
static void set_addr(string &lab, int loc);
static int debug=0;

class symloc // for symtab and regtab
{
  public:
    symloc(string aname, int aloc){name=aname; loc=aloc;}
    bool operator< (const symloc &a) const
                   {if(name<a.name) return true; return false;}
    bool operator== (const symloc &a) const
                    {if(name==a.name) return true; return false;}
    void setloc(int aloc) {loc=aloc;}
  public:
    string name;
    int loc;
};

class optype // for optab
{
  public:
    optype(string aname, int aopfmt, int amajor, int aminor,
           int afields,  int afix){name=aname; opfmt=aopfmt;
           major=amajor; minor=aminor; fields=afields;
           fix=afix;}
    bool operator< (const optype &a) const
                   {if(name<a.name) return true; return false;}
    bool operator== (const optype &a) const
                    {if(name==a.name) return true; return false;}
  public:
    string name;
    int opfmt;    // 0=reg, 1=mem, 2=beq, 3=jump, 4=shift
                  // 5=float and double, 6=word, 6=data,
                  // 7=org, 8=set, 9=debug
    int major;    // bits 31 downto 26
    int minor;    // bits 5 downto 0
    int fields;   // registers + addresses
    int fix;      // other
};
 
set<symloc, less<symloc> > symtab;
set<symloc, less<symloc> > regtab;
set<symloc, less<symloc> >::iterator p_sym;
set<optype, less<optype> > optab;
set<optype, less<optype> >::const_iterator p_op;

int main(int argc, char* argv[])
{
  fstream my_input;        // assembly language input file handle
  fstream my_output;       // hex absolute memory image output
  string  line;            // just a place for inputting
  string  parse_line;      // for parsing
  char    basic_line[128]; // raw input
  char    tab = '\t';
  char    space = ' ';
  bool    do_blank;
  string  lab, op, field[3];          // parsed input line
  int     regval[3];                  // values of registers
  int     opkind, opfields, opmajor, opminor, opfix;
  int     loc=0;
  int     val=0;
  int     i;
  int     pass;
  
  tblbuild();

  if(argc != 3)
  {
    cout << "mipsasm  input_file  output_file" << endl;
    return 1;
  } 

  //                                   xx.asm                 xx.abs
  cout << "mipsasm v1.13 assembling " << argv[1] << " into " <<
          argv[2] << endl;

  my_input.open(argv[1], ios::in);   // open input file
  if(!my_input)                      // believe it!, anything can go wrong
  {
    cout << "can not open " << argv[1] << " for reading" << endl;
    return 1;
  }

  my_output.open(argv[2], ios::out);  // creates file if necessary and opens
  if(!my_output)                      // believe it!, anything can go wrong
  {
    cout << "can not open " << argv[2] << " for writing" << endl;
    return 1;
  }
  
  for(pass=1; pass<=2; pass++) // two pass assembler
  {
    if(pass==2)
    {  
      my_input.close();
      my_input.open(argv[1], ios::in);
      loc = 0; // reset location counter
    }
    while(!my_input.eof()) // main loop over source code
    {
      lab="";
      op="";
      field[0]="";
      field[1]="";
      field[2]=""; // parsed input line

      my_input.getline(basic_line, 128, '\n');
      line = string(basic_line); // save for output
      if(line.length()<1) continue; // blank line
      parse_line = line; // modify for C++ ">>" reading
      if(debug>=pass) cout << "input=" << line << endl;

      // blank comments
      do_blank = false;
      for(i=0; i<parse_line.length(); i++)
      {
        if(do_blank) {parse_line[i]=space; continue;}
        if(parse_line[i]=='#') {parse_line[i]=space; do_blank=true;}
        if(parse_line[i]==';') {parse_line[i]=space; do_blank=true;}
        if(i+1 >= parse_line.length()) continue;
        if(parse_line[i]=='/' && (parse_line[i+1]=='/' ||
          parse_line[i+1]=='*')) {parse_line[i]=space; do_blank=true;}
        if(parse_line[i]=='-' && parse_line[i+1]=='-')
          {parse_line[i]=space; do_blank=true;}
      }

      // eliminate punctuation
      for(i=0; i<parse_line.length(); i++)
      {
        if(parse_line[i]==':' || parse_line[i]==',' || parse_line[i]=='(' ||
           parse_line[i]=='/' || parse_line[i]=='*' || parse_line[i]==')' ||
           parse_line[i]=='+' || parse_line[i]=='-' || parse_line[i]==';' ||
           parse_line[i]==tab) parse_line[i]=space;
      }
      if(debug>=pass) cout << "parse=" << parse_line << endl;

      istringstream parse(parse_line);
      // check for label (first column not space or tab)
      if(parse_line[0]!=space)
      {
        parse >> lab;
        set_addr(lab, loc);
      }

      parse >> op;
      if(op=="end") break; // out of while loop

      // look up opcode, determine format, fill in fields    
      p_op = optab.find(optype(op,0,0,0,0,0));
      if(p_op==optab.end()) continue; // op not found, ignore line
      opkind   = p_op->opfmt;
      opfields = p_op->fields;
      opmajor  = p_op->major;
      opminor  = p_op->minor;
      opfix    = p_op->fix;
      regval[0]= 0;
      regval[1]= 0;
      regval[2]= 0;
   
      for(i=0; i<opfields; i++)
      {
        parse >> field[i];
        p_sym = regtab.find(symloc(field[i],0));
        if(p_sym==regtab.end()) regval[i]=0;
        else regval[i] = p_sym->loc;
      }

      if(debug>=pass)
      {
        cout << "lab=" << lab << "  op=" << op << "  f[0]=" <<
          field[0] << "  f[1]=" << field[1] << "  f[2]=" <<
          field[2] << endl;
        cout << "opkind=" << opkind << "  opmajor=" << opmajor <<
          "  opminor=" << opminor << "  opfields=" << opfields <<
          "  opfix=" << opfix << endl;
        cout << "regval[0]=" << regval[0] << "  regval[1]=" <<
          regval[1] << "  regval[2]=" << regval[2] << 
          "  loc=" << loc << endl;
      }
      switch(opkind)
      {
        case 0: // op rd rs rt
          val = opmajor<<26;
          val = val | opminor;
          val = val | (regval[0]<<11);
          val = val | (regval[1]<<21);
          val = val | (regval[2]<<16);
        break;
        case 1: // op rd adr rx
          val = opmajor<<26;
          val = val | (regval[0]<<16);
          val = val | (regval[2]<<21);
          val = val | (get_addr(field[1])&65535);
        break;
        case 2: // beq bne  rs rt
          val = opmajor<<26;
          val = val | (regval[0]<<21);
          val = val | (regval[1]<<16);
          val = val | (((get_addr(field[2])-(loc+4))/4)&65535); // does offset
        break;
        case 3: // jump
          val = opmajor<<26;
          val = val | (get_addr(field[0])/4);
        break;
        case 4: // op rd rs shf
          val = opmajor<<26;
          val = val | opminor;
          val = val | (regval[0]<<11);
          val = val | (regval[1]<<16);
          val = val | (regval[2]<<6); // actually shift count
        break;
        case 5: // float and double
          val = opmajor<<26;
          val = val | opminor;
          val = val | (opfix<<21);
          if(opfields==3)
          {
            val = val | (regval[0]<<6);
            val = val | (regval[1]<<16);
            val = val | (regval[2]<<11);
          }
          else
          {
            val = val | (regval[0]<<6);
            val = val | (regval[1]<<11);
          }
        break;
        case 6: // word or data
          val = get_addr(field[0]);
        break;
        case 7: // org
          loc = get_addr(field[0]);
          continue; // skip increment at end of loop
        break;
        case 8: // set
          set_addr(lab, get_addr(field[0]));
          continue;
        break;
        case 9: // debug 0=off, 1=pass1 and 2. 2=just pass2
          debug = get_addr(field[0]);
          if(debug>2 || debug<0) debug=2;
          continue;
        break;
        default:
          continue;
      }
      if(pass==2)
      {
        // build output (hex)
        hexout(my_output, loc);
        my_output << " ";
        hexout(my_output, val);
        my_output << " " << line << endl;
        if(debug>=pass) cout << hex << loc << "=loc, " << hex << val <<
                          "=val" << endl << endl;
      }
      loc=loc+4; // increment location counter
      if(my_input.eof()) break;
    }  // end while loop
} // end pass loop

  my_output.flush();      // force file to disk.
  my_output.close();      // close the file
  
  cout << argv[2] << " written" << endl;
  if(debug)
  {
    cout << "Symbol table" << endl;
    for(p_sym=symtab.begin(); p_sym!=symtab.end(); p_sym++)
      cout << p_sym->name << " " << p_sym->loc << endl;
  }
  return 0;
} // end main

static void set_addr(string &lab, int loc)
{
  p_sym = symtab.find(symloc(lab,0));
  if(p_sym==symtab.end()) symtab.insert(symloc(lab,loc));
  else{symtab.erase(p_sym); symtab.insert(symloc(lab,loc));}
      // p_sym->setloc(loc); // replace .erase and .insert
  return;
}

static int get_addr(string &field)
{
  union both {int val; float fval;} u;
  char junk;
  
  u.val = 0;
  if(isdigit(field[0]))
  {
    if(field[0]=='0' && (field[1]=='x' || field[1]=='X'))
    {
      istringstream buf(field);
      buf >> junk;
      buf >> junk;
      buf >> hex >> u.val;
    }
    else if(field[1]=='.') // second character must be a point
    {
      istringstream buf(field);
      buf >> u.fval;
    }
    else
    {
      istringstream buf(field);
      buf >> u.val;
    }
  }
  else
  {
    p_sym = symtab.find(symloc(field,0));
      if(p_sym==symtab.end()) u.val=0;
      else u.val = p_sym->loc;
   }
  return u.val;
}

static void hexout(fstream &out_file, int &val)
{
  int bits;
  char chars[16]={'0', '1', '2', '3', '4', '5', '6', '7', 
                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  int i;
  
  for(i=0; i<8; i++)
  {
    bits=val;
    bits=(bits>>(28-i*4))&0xf;
    out_file << chars[bits];
  }
  return;
}


// no string my_input.get(word, 70);
// no my_input.getline(word);

static void tblbuild()
{
  //                  opname, typ, maj, min, #f, fix
  optab.insert(optype("nop",    0,   0,   0,  0,  0));
  optab.insert(optype("break",  0,   0,  13,  0,  0));
  optab.insert(optype("add",    0,   0,  32,  3,  0));
  optab.insert(optype("sub",    0,   0,  34,  3,  0));
  optab.insert(optype("and",    0,   0,  10,  3,  0)); //f03,05,f07,s09
  optab.insert(optype("or",     0,   0,  14,  3,  0)); //s07,s09,s10
  optab.insert(optype("mul",    0,   0,  26,  3,  0)); //f08,s09,f09,f10
  optab.insert(optype("div",    0,   0,  24,  3,  0));
  optab.insert(optype("lw",     1,  35,   0,  3,  0));
  optab.insert(optype("sw",     1,  43,   0,  3,  0));
  optab.insert(optype("lwc1",   1,  49,   0,  3,  0));
  optab.insert(optype("swc1",   1,  57,   0,  3,  0));
  optab.insert(optype("ldc1",   1,  53,   0,  3,  0));
  optab.insert(optype("sdc1",   1,  61,   0,  3,  0));
  optab.insert(optype("addi",   1,  15,   0,  3,  0)); // s03,f03,f10
  optab.insert(optype("beq",    2,  39,   0,  3,  0)); // s03,f05,f06,s07,f07,s08,f08,s09,f09,s10,f10
  optab.insert(optype("bne",    2,   5,   0,  3,  0));
  optab.insert(optype("j",      3,   2,   0,  1,  0));
  optab.insert(optype("sll",    4,   0,   2,  3,  0)); // f04,f05,f06
  optab.insert(optype("srl",    4,   0,   5,  3,  0)); // f04,f05,f06,s10,f10
  optab.insert(optype("cmpl",   4,   0,  15,  2,  0)); // s03,f03, f04,s07,s10
  optab.insert(optype("add.s",  5,  17,   0,  3, 16));
  optab.insert(optype("add.d",  5,  17,   0,  3, 17));
  optab.insert(optype("sub.s",  5,  17,   1,  3, 16));
  optab.insert(optype("sub.d",  5,  17,   1,  3, 17));
  optab.insert(optype("mul.s",  5,  17,   2,  3, 16));
  optab.insert(optype("mul.d",  5,  17,   2,  3, 17));
  optab.insert(optype("div.s",  5,  17,   3,  3, 16));
  optab.insert(optype("div.d",  5,  17,   3,  3, 17));
  optab.insert(optype("abs.s",  5,  17,   5,  2, 16));
  optab.insert(optype("abs.d",  5,  17,   5,  2, 17));
  optab.insert(optype("mov.s",  5,  17,   6,  2, 16));
  optab.insert(optype("mov.d",  5,  17,   6,  2, 17));
  optab.insert(optype("neg.s",  5,  17,   7,  2, 16));
  optab.insert(optype("neg.d",  5,  17,   7,  2, 17));
  optab.insert(optype("word",   6,   0,   0,  1,  0));
  optab.insert(optype("data",   6,   0,   0,  1,  0));
  optab.insert(optype("org",    7,   0,   0,  1,  0));
  optab.insert(optype("set",    8,   0,   0,  1,  0));
  optab.insert(optype("debug",  9,   0,   0,  1,  0));
  regtab.insert(symloc("$0",0));
  regtab.insert(symloc("$1",1));
  regtab.insert(symloc("$2",2));
  regtab.insert(symloc("$3",3));
  regtab.insert(symloc("$4",4));
  regtab.insert(symloc("$5",5));
  regtab.insert(symloc("$6",6));
  regtab.insert(symloc("$7",7));
  regtab.insert(symloc("$8",8));
  regtab.insert(symloc("$9",9));
  regtab.insert(symloc("$10",10));
  regtab.insert(symloc("$11",11));
  regtab.insert(symloc("$12",12));
  regtab.insert(symloc("$13",13));
  regtab.insert(symloc("$14",14));
  regtab.insert(symloc("$15",15));
  regtab.insert(symloc("$16",16));
  regtab.insert(symloc("$17",17));
  regtab.insert(symloc("$18",18));
  regtab.insert(symloc("$19",19));
  regtab.insert(symloc("$20",20));
  regtab.insert(symloc("$21",21));
  regtab.insert(symloc("$22",22));
  regtab.insert(symloc("$23",23));
  regtab.insert(symloc("$24",24));
  regtab.insert(symloc("$25",25));
  regtab.insert(symloc("$26",26));
  regtab.insert(symloc("$27",27));
  regtab.insert(symloc("$28",28));
  regtab.insert(symloc("$29",29));
  regtab.insert(symloc("$30",30));
  regtab.insert(symloc("$31",31));
  regtab.insert(symloc("$f0",0));
  regtab.insert(symloc("$f1",1));
  regtab.insert(symloc("$f2",2));
  regtab.insert(symloc("$f3",3));
  regtab.insert(symloc("$f4",4));
  regtab.insert(symloc("$f5",5));
  regtab.insert(symloc("$f6",6));
  regtab.insert(symloc("$f7",7));
  regtab.insert(symloc("$f8",8));
  regtab.insert(symloc("$f9",9));
  regtab.insert(symloc("$f10",10));
  regtab.insert(symloc("$f11",11));
  regtab.insert(symloc("$f12",12));
  regtab.insert(symloc("$f13",13));
  regtab.insert(symloc("$f14",14));
  regtab.insert(symloc("$f15",15));
  regtab.insert(symloc("$f16",16));
  regtab.insert(symloc("$f17",17));
  regtab.insert(symloc("$f18",18));
  regtab.insert(symloc("$f19",19));
  regtab.insert(symloc("$f20",20));
  regtab.insert(symloc("$f21",21));
  regtab.insert(symloc("$f22",22));
  regtab.insert(symloc("$f23",23));
  regtab.insert(symloc("$f24",24));
  regtab.insert(symloc("$f25",25));
  regtab.insert(symloc("$f26",26));
  regtab.insert(symloc("$f27",27));
  regtab.insert(symloc("$f28",28));
  regtab.insert(symloc("$f29",29));
  regtab.insert(symloc("$f30",30));
  regtab.insert(symloc("$f31",31));
  regtab.insert(symloc("0",0));
  regtab.insert(symloc("1",1));
  regtab.insert(symloc("2",2));
  regtab.insert(symloc("3",3));
  regtab.insert(symloc("4",4));
  regtab.insert(symloc("5",5));
  regtab.insert(symloc("6",6));
  regtab.insert(symloc("7",7));
  regtab.insert(symloc("8",8));
  regtab.insert(symloc("9",9));
  regtab.insert(symloc("10",10));
  regtab.insert(symloc("11",11));
  regtab.insert(symloc("12",12));
  regtab.insert(symloc("13",13));
  regtab.insert(symloc("14",14));
  regtab.insert(symloc("15",15));
  regtab.insert(symloc("16",16));
  regtab.insert(symloc("17",17));
  regtab.insert(symloc("18",18));
  regtab.insert(symloc("19",19));
  regtab.insert(symloc("20",20));
  regtab.insert(symloc("21",21));
  regtab.insert(symloc("22",22));
  regtab.insert(symloc("23",23));
  regtab.insert(symloc("24",24));
  regtab.insert(symloc("25",25));
  regtab.insert(symloc("26",26));
  regtab.insert(symloc("27",27));
  regtab.insert(symloc("28",28));
  regtab.insert(symloc("29",29));
  regtab.insert(symloc("30",30));
  regtab.insert(symloc("31",31));
  regtab.insert(symloc("$v0",2));
  regtab.insert(symloc("$v1",3));
  regtab.insert(symloc("$a0",4));
  regtab.insert(symloc("$a1",5));
  regtab.insert(symloc("$a2",6));
  regtab.insert(symloc("$a3",7));
  regtab.insert(symloc("$t0",8));
  regtab.insert(symloc("$t1",9));
  regtab.insert(symloc("$t2",10));
  regtab.insert(symloc("$t3",11));
  regtab.insert(symloc("$t4",12));
  regtab.insert(symloc("$t5",13));
  regtab.insert(symloc("$t6",14));
  regtab.insert(symloc("$t7",15));
  regtab.insert(symloc("$s0",16));
  regtab.insert(symloc("$s1",17));
  regtab.insert(symloc("$s2",18));
  regtab.insert(symloc("$s3",19));
  regtab.insert(symloc("$s4",20));
  regtab.insert(symloc("$s5",21));
  regtab.insert(symloc("$s6",22));
  regtab.insert(symloc("$s7",23));
  regtab.insert(symloc("$t8",24));
  regtab.insert(symloc("$t9",25));
  regtab.insert(symloc("$t10",26));
  regtab.insert(symloc("$t11",27));
  regtab.insert(symloc("$tgp",28));
  regtab.insert(symloc("$tsp",29));
  regtab.insert(symloc("$fp",30));
  regtab.insert(symloc("$ra",31));
  return;
}


