// mipsasm.cpp a very simple MIPS architecture assembler (incomplete) // v1.1 opcode for cmpl and sll changed for cs411 // v1.2 opcode for beq, addi, cmpl changed for cs411 // // mipsasm input_file output_file // input_file - typically *.asm is free format except labels must // start in column one // output_file - typically *.abs is hex_address hex_content source_line // // label: op reg,offs(xreg) one of many formats // op reg,reg,reg another format // // op is a mips opcode // reg can be simple number 0 to 31, $0 to $31, $sp, $t7 etc // ":" "," "(" ")" "\t" are all converted to space so you // can enter any line just space separating fields // // comments OK after blank after last possible field // // /* -- # ; make rest of line a comment // // compile g++ -o mipsasm mipsasm.cpp // or cl /GX /ML mipsasm.cpp #include // for file I/O #include // for strings, istrstream #include // for basic cout #include // for input buffer #include // for lookup tables #include // isdigit using namespace std; // bring standard names into scope static void tblbuild(); // build regtab and optab static void hexout(fstream &out_file, int &val); static int get_addr(string &field); static void set_addr(string &lab, int loc); static int debug=0; class symloc // for symtab and regtab { public: symloc(string aname, int aloc){name=aname; loc=aloc;} bool operator< (const symloc &a) const {if(name > symtab; set > regtab; set >::iterator p_sym; set > optab; set >::const_iterator p_op; int main(int argc, char* argv[]) { fstream my_input; // assembly language input file handle fstream my_output; // hex absolute memory image output string line; // just a place for inputting string parse_line; // for parsing char basic_line[128]; // raw input char tab = '\t'; char space = ' '; bool do_blank; string lab, op, field[3]; // parsed input line int regval[3]; // values of registers int opkind, opfields, opmajor, opminor, opfix; int loc=0; int val=0; int i; int pass; tblbuild(); if(argc != 3) { cout << "mipsasm input_file output_file" << endl; return 1; } cout << "mipsasm v1.2 assembling " << argv[1] << " into " << argv[2] << endl; my_input.open(argv[1], ios::in); // open input file if(!my_input) // believe it!, anything can go wrong { cout << "can not open " << argv[1] << " for reading" << endl; return 1; } my_output.open(argv[2], ios::out); // creates file if necessary and opens if(!my_output) // believe it!, anything can go wrong { cout << "can not open " << argv[2] << " for writing" << endl; return 1; } for(pass=1; pass<=2; pass++) // two pass assembler { if(pass==2) { my_input.close(); my_input.open(argv[1], ios::in); loc = 0; // reset location counter } while(!my_input.eof()) // main loop over source code { lab=""; op=""; field[0]=""; field[1]=""; field[2]=""; // parsed input line my_input.getline(basic_line, 128, '\n'); line = string(basic_line); // save for output if(line.length()<1) continue; // blank line parse_line = line; // modify for C++ ">>" reading if(debug>=pass) cout << "input=" << line << endl; // blank comments do_blank = false; for(i=0; i= parse_line.length()) continue; if(parse_line[i]=='/' && (parse_line[i+1]=='/' || parse_line[i+1]=='*')) {parse_line[i]=space; do_blank=true;} if(parse_line[i]=='-' && parse_line[i+1]=='-') {parse_line[i]=space; do_blank=true;} } // eliminate punctuation for(i=0; i=pass) cout << "parse=" << parse_line << endl; istrstream parse(parse_line.begin(), parse_line.length()); // check for label (first column not space or tab) if(parse_line[0]!=space) { parse >> lab; set_addr(lab, loc); } parse >> op; if(op=="end") break; // out of while loop // look up opcode, determine format, fill in fields p_op = optab.find(optype(op,0,0,0,0,0)); if(p_op==optab.end()) continue; // op not found, ignore line opkind = p_op->opfmt; opfields = p_op->fields; opmajor = p_op->major; opminor = p_op->minor; opfix = p_op->fix; regval[0]= 0; regval[1]= 0; regval[2]= 0; for(i=0; i> field[i]; p_sym = regtab.find(symloc(field[i],0)); if(p_sym==regtab.end()) regval[i]=0; else regval[i] = p_sym->loc; } if(debug>=pass) { cout << "lab=" << lab << " op=" << op << " f[0]=" << field[0] << " f[1]=" << field[1] << " f[2]=" << field[2] << endl; cout << "opkind=" << opkind << " opmajor=" << opmajor << " opminor=" << opminor << " opfields=" << opfields << " opfix=" << opfix << endl; cout << "regval[0]=" << regval[0] << " regval[1]=" << regval[1] << " regval[2]=" << regval[2] << " loc=" << loc << endl; } switch(opkind) { case 0: // op rd rs rt val = opmajor<<26; val = val | opminor; val = val | (regval[0]<<11); val = val | (regval[1]<<21); val = val | (regval[2]<<16); break; case 1: // op rd adr rx val = opmajor<<26; val = val | (regval[0]<<16); val = val | (regval[2]<<21); val = val | (get_addr(field[1])&65535); break; case 2: // beq bne rs rt val = opmajor<<26; val = val | (regval[0]<<21); val = val | (regval[1]<<16); val = val | (((get_addr(field[2])-(loc+4))/4)&65535); // does offset break; case 3: // jump val = opmajor<<26; val = val | (get_addr(field[0])/4); break; case 4: // op rd rs shf val = opmajor<<26; val = val | opminor; val = val | (regval[0]<<11); val = val | (regval[1]<<16); val = val | (regval[2]<<6); // actually shift count break; case 5: // float and double val = opmajor<<26; val = val | opminor; val = val | (opfix<<21); if(opfields==3) { val = val | (regval[0]<<6); val = val | (regval[1]<<16); val = val | (regval[2]<<11); } else { val = val | (regval[0]<<6); val = val | (regval[1]<<11); } break; case 6: // word or data val = get_addr(field[0]); break; case 7: // org loc = get_addr(field[0]); continue; // skip increment at end of loop break; case 8: // set set_addr(lab, get_addr(field[0])); continue; break; case 9: // debug 0=off, 1=pass1 and 2. 2=just pass2 debug = get_addr(field[0]); if(debug>2 || debug<0) debug=2; continue; break; default: continue; } if(pass==2) { // build output (hex) hexout(my_output, loc); my_output << " "; hexout(my_output, val); my_output << " " << line << endl; if(debug>=pass) cout << hex << loc << "=loc, " << hex << val << "=val" << endl << endl; } loc=loc+4; // increment location counter if(my_input.eof()) break; } // end while loop } // end pass loop my_output.flush(); // force file to disk. my_output.close(); // close the file cout << argv[2] << " written" << endl; if(debug) { cout << "Symbol table" << endl; for(p_sym=symtab.begin(); p_sym!=symtab.end(); p_sym++) cout << p_sym->name << " " << p_sym->loc << endl; } return 0; } // end main static void set_addr(string &lab, int loc) { p_sym = symtab.find(symloc(lab,0)); if(p_sym==symtab.end()) symtab.insert(symloc(lab,loc)); else{symtab.erase(p_sym); symtab.insert(symloc(lab,loc));} // p_sym->setloc(loc); // replace .erase and .insert return; } static int get_addr(string &field) { union both {int val; float fval;} u; char junk; u.val = 0; if(isdigit(field[0])) { if(field[0]=='0' && (field[1]=='x' || field[1]=='X')) { istrstream buf(field.begin(), field.length()); buf >> junk; buf >> junk; buf >> hex >> u.val; } else if(field[1]=='.') // second character must be a point { istrstream buf(field.begin(), field.length()); buf >> u.fval; } else { istrstream buf(field.begin(), field.length()); buf >> u.val; } } else { p_sym = symtab.find(symloc(field,0)); if(p_sym==symtab.end()) u.val=0; else u.val = p_sym->loc; } return u.val; } static void hexout(fstream &out_file, int &val) { int bits; char chars[16]={'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; int i; for(i=0; i<8; i++) { bits=val; bits=(bits>>(28-i*4))&0xf; out_file << chars[bits]; } return; } // no string my_input.get(word, 70); // no my_input.getline(word); static void tblbuild() { // opname, typ, maj, min, #f, fix optab.insert(optype("nop", 0, 0, 0, 0, 0)); optab.insert(optype("break", 0, 0, 13, 0, 0)); optab.insert(optype("add", 0, 0, 32, 3, 0)); optab.insert(optype("sub", 0, 0, 34, 3, 0)); optab.insert(optype("mult", 0, 0, 24, 3, 0)); optab.insert(optype("mut", 0, 0, 24, 3, 0)); optab.insert(optype("div", 0, 0, 26, 3, 0)); optab.insert(optype("lw", 1, 35, 0, 3, 0)); optab.insert(optype("sw", 1, 43, 0, 3, 0)); optab.insert(optype("lwc1", 1, 49, 0, 3, 0)); optab.insert(optype("swc1", 1, 57, 0, 3, 0)); optab.insert(optype("ldc1", 1, 53, 0, 3, 0)); optab.insert(optype("sdc1", 1, 61, 0, 3, 0)); optab.insert(optype("addi", 1, 4, 0, 3, 0)); // f02 optab.insert(optype("beq", 2, 8, 0, 3, 0)); // f02 optab.insert(optype("bne", 2, 5, 0, 3, 0)); optab.insert(optype("j", 3, 2, 0, 1, 0)); optab.insert(optype("sll", 4, 0, 1, 3, 0)); optab.insert(optype("srl", 4, 0, 2, 3, 0)); optab.insert(optype("cmpl", 4, 0, 36, 2, 0)); // f02 optab.insert(optype("add.s", 5, 17, 0, 3, 16)); optab.insert(optype("add.d", 5, 17, 0, 3, 17)); optab.insert(optype("sub.s", 5, 17, 1, 3, 16)); optab.insert(optype("sub.d", 5, 17, 1, 3, 17)); optab.insert(optype("mul.s", 5, 17, 2, 3, 16)); optab.insert(optype("mul.d", 5, 17, 2, 3, 17)); optab.insert(optype("div.s", 5, 17, 3, 3, 16)); optab.insert(optype("div.d", 5, 17, 3, 3, 17)); optab.insert(optype("abs.s", 5, 17, 5, 2, 16)); optab.insert(optype("abs.d", 5, 17, 5, 2, 17)); optab.insert(optype("mov.s", 5, 17, 6, 2, 16)); optab.insert(optype("mov.d", 5, 17, 6, 2, 17)); optab.insert(optype("neg.s", 5, 17, 7, 2, 16)); optab.insert(optype("neg.d", 5, 17, 7, 2, 17)); optab.insert(optype("word", 6, 0, 0, 1, 0)); optab.insert(optype("data", 6, 0, 0, 1, 0)); optab.insert(optype("org", 7, 0, 0, 1, 0)); optab.insert(optype("set", 8, 0, 0, 1, 0)); optab.insert(optype("debug", 9, 0, 0, 1, 0)); regtab.insert(symloc("$0",0)); regtab.insert(symloc("$1",1)); regtab.insert(symloc("$2",2)); regtab.insert(symloc("$3",3)); regtab.insert(symloc("$4",4)); regtab.insert(symloc("$5",5)); regtab.insert(symloc("$6",6)); regtab.insert(symloc("$7",7)); regtab.insert(symloc("$8",8)); regtab.insert(symloc("$9",9)); regtab.insert(symloc("$10",10)); regtab.insert(symloc("$11",11)); regtab.insert(symloc("$12",12)); regtab.insert(symloc("$13",13)); regtab.insert(symloc("$14",14)); regtab.insert(symloc("$15",15)); regtab.insert(symloc("$16",16)); regtab.insert(symloc("$17",17)); regtab.insert(symloc("$18",18)); regtab.insert(symloc("$19",19)); regtab.insert(symloc("$20",20)); regtab.insert(symloc("$21",21)); regtab.insert(symloc("$22",22)); regtab.insert(symloc("$23",23)); regtab.insert(symloc("$24",24)); regtab.insert(symloc("$25",25)); regtab.insert(symloc("$26",26)); regtab.insert(symloc("$27",27)); regtab.insert(symloc("$28",28)); regtab.insert(symloc("$29",29)); regtab.insert(symloc("$30",30)); regtab.insert(symloc("$31",31)); regtab.insert(symloc("$f0",0)); regtab.insert(symloc("$f1",1)); regtab.insert(symloc("$f2",2)); regtab.insert(symloc("$f3",3)); regtab.insert(symloc("$f4",4)); regtab.insert(symloc("$f5",5)); regtab.insert(symloc("$f6",6)); regtab.insert(symloc("$f7",7)); regtab.insert(symloc("$f8",8)); regtab.insert(symloc("$f9",9)); regtab.insert(symloc("$f10",10)); regtab.insert(symloc("$f11",11)); regtab.insert(symloc("$f12",12)); regtab.insert(symloc("$f13",13)); regtab.insert(symloc("$f14",14)); regtab.insert(symloc("$f15",15)); regtab.insert(symloc("$f16",16)); regtab.insert(symloc("$f17",17)); regtab.insert(symloc("$f18",18)); regtab.insert(symloc("$f19",19)); regtab.insert(symloc("$f20",20)); regtab.insert(symloc("$f21",21)); regtab.insert(symloc("$f22",22)); regtab.insert(symloc("$f23",23)); regtab.insert(symloc("$f24",24)); regtab.insert(symloc("$f25",25)); regtab.insert(symloc("$f26",26)); regtab.insert(symloc("$f27",27)); regtab.insert(symloc("$f28",28)); regtab.insert(symloc("$f29",29)); regtab.insert(symloc("$f30",30)); regtab.insert(symloc("$f31",31)); regtab.insert(symloc("0",0)); regtab.insert(symloc("1",1)); regtab.insert(symloc("2",2)); regtab.insert(symloc("3",3)); regtab.insert(symloc("4",4)); regtab.insert(symloc("5",5)); regtab.insert(symloc("6",6)); regtab.insert(symloc("7",7)); regtab.insert(symloc("8",8)); regtab.insert(symloc("9",9)); regtab.insert(symloc("10",10)); regtab.insert(symloc("11",11)); regtab.insert(symloc("12",12)); regtab.insert(symloc("13",13)); regtab.insert(symloc("14",14)); regtab.insert(symloc("15",15)); regtab.insert(symloc("16",16)); regtab.insert(symloc("17",17)); regtab.insert(symloc("18",18)); regtab.insert(symloc("19",19)); regtab.insert(symloc("20",20)); regtab.insert(symloc("21",21)); regtab.insert(symloc("22",22)); regtab.insert(symloc("23",23)); regtab.insert(symloc("24",24)); regtab.insert(symloc("25",25)); regtab.insert(symloc("26",26)); regtab.insert(symloc("27",27)); regtab.insert(symloc("28",28)); regtab.insert(symloc("29",29)); regtab.insert(symloc("30",30)); regtab.insert(symloc("31",31)); regtab.insert(symloc("$v0",2)); regtab.insert(symloc("$v1",3)); regtab.insert(symloc("$a0",4)); regtab.insert(symloc("$a1",5)); regtab.insert(symloc("$a2",6)); regtab.insert(symloc("$a3",7)); regtab.insert(symloc("$t0",8)); regtab.insert(symloc("$t1",9)); regtab.insert(symloc("$t2",10)); regtab.insert(symloc("$t3",11)); regtab.insert(symloc("$t4",12)); regtab.insert(symloc("$t5",13)); regtab.insert(symloc("$t6",14)); regtab.insert(symloc("$t7",15)); regtab.insert(symloc("$s0",16)); regtab.insert(symloc("$s1",17)); regtab.insert(symloc("$s2",18)); regtab.insert(symloc("$s3",19)); regtab.insert(symloc("$s4",20)); regtab.insert(symloc("$s5",21)); regtab.insert(symloc("$s6",22)); regtab.insert(symloc("$s7",23)); regtab.insert(symloc("$t8",24)); regtab.insert(symloc("$t9",25)); regtab.insert(symloc("$t10",26)); regtab.insert(symloc("$t11",27)); regtab.insert(symloc("$tgp",28)); regtab.insert(symloc("$tsp",29)); regtab.insert(symloc("$fp",30)); regtab.insert(symloc("$ra",31)); return; }