<- previous    index    next ->

Lecture 6 Branching and loops

UGH! Note that < and > are interpreted by HTML,
thus source code, physically included, has & gt ; rather than symbol.
Be sure to download from link, not from HTML.

The basic integer compare instruction is  "cmp"
      for float   compare instruction is  "fcomip st0,st1"
Following this instruction is typically one of:
  JL  label  ; jump on less than  "<"
  JLE label  ; jump on less than or equal "<="
  JG  label  ; jump on greater than ">"
  JGE label  ; jump on greater than or equal ">="
  JE  label  ; jump on equal "=="
  JNE label  ; jump on not equal "!="

After many integer arithmetic instructions
  JZ  label  ; jump on zero
  JNZ label  ; jump on non zero
  JS  label  ; jump on sign plus
  JNS labe;  ; jump on sign not plus

Note: Use 'cmp' rather than 'sub' for comparison.
Overflow can occur on subtraction resulting in sign inversion.

if-then-else in assembly language

Convert a "C" 'if' statement to nasm assembly ifint_64.asm The significant features are: 1) use a compare instruction for the test 2) put a label on the start of the false branch (e.g. false1:) 3) put a label after the end of the 'if' statement (e.g. exit1:) 4) choose a conditional jump that goes to the false part 5) put an unconditional jump to (e.g. exit1:) at the end of the true part source code ifint_64.asm ; ifint_64.asm code ifint_64.c for nasm ; /* ifint_64.c an 'if' statement that will be coded for nasm */ ; #include <stdio.h> ; int main() ; { ; long int a=1; ; long int b=2; ; long int c=3; ; long int xyz=4; ; if(a < b) ; printf("true a < b \n"); ; else ; printf("wrong on a < b \n"); ; if(b > c) ; printf("wrong on b > c \n"); ; else ; printf("false b > > \n"); ; ; if(4==xyz) goto label1e; ; printf("failed 4==xyz\n"); ;label1e: printf("passed 4==xyz\n"); ; ; if(5>xyz) goto label1g; ; printf("failed 5>xyz\n"); ;label1g: printf("passed 5>xyz\n"); ; ; if(3 < xyz) goto label1l; ; printf("failed 3 < xyz\n"); ;label1l: printf("passed 3<xyz\n"); ; ; return 0; ;} ; result of executing both "C" and assembly is: ; true a < b ; false b > c global main ; define for linker extern printf ; tell linker we need this C function section .data ; Data section, initialized variables a: dq 1 b: dq 2 c: dq 3 xyz: dq 4 fmt1: db "true a < b ",10,0 fmt2: db "wrong on a < b ",10,0 fmt3: db "wrong on b > c ",10,0 fmt4: db "false b > c ",10,0 fmt5: db "failed 4==xyz ",10,0 fmt6: db "passed 4==xyz ",10,0 fmt7: db "failed 5>xyz ",10,0 fmt8: db "passed 5>xyz ",10,0 fmt9: db "failed 3<xyz ",10,0 fmt10: db "passed 3<xyz ",10,0 section .text main: push rbp ; set up stack mov rax,[a] ; a cmp rax,[b] ; compare a to b jge false1 ; choose jump to false part ; a < b sign is set mov rdi, fmt1 ; printf("true a < b \n"); mov rax,0 call printf jmp exit1 ; jump over false part false1: ; a < b is false mov rdi, fmt2 ; printf("wrong on a < b \n"); mov rax,0 call printf exit1: ; finished 'if' statement mov rax,[b] ; b cmp rax,[c] ; compare b to c jle false2 ; choose jump to false part ; b > c sign is not set mov rdi, fmt3 ; printf("wrong on b > c \n"); mov rax,0 call printf jmp exit2 ; jump over false part false2: ; b > c is false mov rdi, fmt4 ; printf("false b > c \n"); mov rax,0 call printf exit2: ; finished 'if' statement mov rax,4 cmp rax,[xyz] ; if(4==xyz) goto label1e; je label1e mov rdi, fmt5 mov rax,0 call printf label1e:mov rdi, fmt6 mov rax,0 call printf mov rax,5 cmp rax,[xyz] ; if(5 > xyz) goto label1g; jg label1g mov rdi, fmt7 mov rax,0 call printf label1g:mov rdi, fmt8 mov rax,0 call printf mov rax,3 cmp rax,[xyz] ; if(3 < xyz) goto label1l; jl label1l mov rdi, fmt9 mov rax,0 call printf label1l:mov rdi, fmt10 mov rax,0 call printf pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return 0; output ifint_64.out true a < b false b > c passed 4==xyz passed 5 > xyz passed 3 < xyz source code ifflt_64.asm ; ifflt_64.asm code ifflt_64.c for nasm ; /* ifflt_64.c an 'if' statement that will be coded for nasm */ ; #include <stdio.h> ; int main() ; { ; double a=1.0; ; double b=2.0; ; double c=3.0; ; if(ac) ; printf("wrong on b > c \n"); ; else ; printf("false b > c \n"); ; return 0; ;} ; result of executing both "C" and assembly is: ; true a < b ; false b > c global main ; define for linker extern printf ; tell linker we need this C function section .data ; Data section, initialized variables a: dt 1.0 b: dt 2.0 c: dt 3.0 fmt1: db "true a < b ",10,0 fmt2: db "wrong on a > b ",10,0 fmt3: db "wrong on b < c ",10,0 fmt4: db "false b > c ",10,0 section .bss ; unused, except to pop t: rest 1 ; reserve one space for dt section .text main: push rbp ; set up stack fld tword [b] ; b into st0 fld tword [a] ; a into st0, pushes b into st1 fcompp ; compare and pop both ; fcomip st0,st1 ; compare a to b, pop a ; fstp tword [t] ; just to pop b jl false1 ; choose jump to false part ; a < b sign is set mov rdi, fmt1 ; printf("true a < b \n"); call printf jmp exit1 ; jump over false part false1: ; a < b is false mov rdi, fmt2 ; printf("wrong on a < b \n"); call printf exit1: ; finished 'if' statement fld tword [c] ; c into st0 fld tword [b] ; b into st0, pushes c into st1 fcompp ; compare and pop both ; fcomip st0,st1 ; compare b to c, pop b ; fstp tword [t] ; just to pop c jg false2 ; choose jump to false part ; b > c sign is not set mov rdi, fmt3 ; printf("wrong on b > c \n"); call printf jmp exit2 ; jump over false part false2: ; b > c is false mov rdi, fmt4 ; printf("false b > c \n"); call printf exit2: ; finished 'if' statement pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return 0; output ifint_64.out true a < b false b > c

loop in assembly language

Convert a "C" loop to nasm assembly loopint_64.asm The significant features are: 1) "C" long int is 8-bytes, thus dd1[1] becomes dword [dd1+8] dd1[99] becomes dword [dd1+8*99] 2) "C" long int is 8-bytes, thus dd1[i]; i++; becomes add edi,8 since "i" is never stored, the register edi holds "i" 3) the 'cmp' instruction sets flags that control the jump instruction. cmp edi,8*99 is like i<99 in "C" jnz loop1 jumps if register edi is not 8*99 ; loopint_64.asm code loopint.c for nasm ; /* loopint_64.c a very simple loop that will be coded for nasm */ ; #include <stdio.h> ; int main() ; { ; long int dd1[100]; // 100 could be 3 gigabytes ; long int i; // must be long for more than 2 gigabytes ; dd1[0]=5; /* be sure loop stays 1..98 */ ; dd1[99]=9; ; for(i=1; i<99; i++) dd1[i]=7; ; printf("dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld\n", ; dd1[0], dd1[1], dd1[98],dd1[99]); ; return 0; ;} ; execution output is dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9 section .bss dd1: resq 100 ; reserve 100 long int i: resq 1 ; actually unused, kept in register section .data ; Data section, initialized variables fmt: db "dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld",10,0 extern printf ; the C function, to be called section .text global main main: push rbp ; set up stack mov qword [dd1],5 ; dd1[0]=5; memory to memory mov qword [dd1+99*8],9 ; dd1[99]=9; indexed 99 qword mov rdi, 1*8 ; i=1; index, will move by 8 bytes loop1: mov qword [dd1+rdi],7 ; dd1[i]=7; add rdi, 8 ; i++; 8 bytes cmp rdi, 8*99 ; i<99 jne loop1 ; loop until incremented i=99 mov rdi, fmt ; pass address of format mov rsi, qword [dd1] ; dd1[0] first list parameter mov rdx, qword [dd1+1*8] ; dd1[1] second list parameter mov rcx, qword [dd1+98*8] ; dd1[98] third list parameter mov r8, qword [dd1+99*8] ; dd1[99] fourth list parameter mov rax, 0 ; no xmm used call printf ; Call C function pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return 0; output loopint_64.out dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9

logic operations in assembly language

Previously, integer arithmetic in "C" was converted to NASM assembly language. The following is very similar (cut and past) of intarith_64.asm to intlogic_64.asm that shows the "C" operators "&" and, "|" or, "^" xor, "~" not. intlogic_64.asm ; intlogic_64.asm show some simple C code and corresponding nasm code ; the nasm code is one sample, not unique ; ; compile: nasm -f elf64 -l intlogic_64.lst intlogic_64.asm ; link: gcc -m64 -o intlogic_64 intlogic_64.o ; run: ./intlogic_64 > intlogic_64.out ; ; the output from running intlogic_64.asm and intlogic.c is ; c=5 , a=3, b=5, c=15 ; c=a&b, a=3, b=5, c=1 ; c=a|b, a=3, b=5, c=7 ; c=a^b, a=3, b=5, c=6 ; c=~a , a=3, b=5, c=-4 ; ;The file intlogic.c is: ; #include <stdio.h> ; int main() ; { ; long int a=3, b=5, c; ; ; c=15; ; printf("%s, a=%d, b=%d, c=%d\n","c=5 ", a, b, c); ; c=a&b; /* and */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a&b", a, b, c); ; c=a|b; /* or */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a|b", a, b, c); ; c=a^b; /* xor */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a^b", a, b, c); ; c=~a; /* not */ ; printf("%s, a=%d, b=%d, c=%d\n","c=~a", a, b, c); ; return 0; ; } extern printf ; the C function to be called %macro pabc 1 ; a "simple" print macro section .data .str db %1,0 ; %1 is first actual in macro call section .text mov rdi, fmt ; address of format string mov rsi, .str ; users string mov rdx, [a] ; long int a mov rcx, [b] ; long int b mov r8, [c] ; long int c mov rax, 0 ; no xmm used call printf ; Call C function %endmacro section .data ; preset constants, writeable a: dq 3 ; 64-bit variable a initialized to 3 b: dq 5 ; 64-bit variable b initializes to 4 fmt: db "%s, a=%ld, b=%ld, c=%ld",10,0 ; format string for printf section .bss ; unitialized space c: resq 1 ; reserve a 64-bit word section .text ; instructions, code segment global main ; for gcc standard linking main: ; label push rbp ; set up stack lit5: ; c=5; mov rax,15 ; 5 is a literal constant mov [c],rax ; store into c pabc "c=5 " ; invoke the print macro andb: ; c=a&b; mov rax,[a] ; load a and rax,[b] ; and with b mov [c],rax ; store into c pabc "c=a&b" ; invoke the print macro orw: ; c=a-b; mov rax,[a] ; load a or rax,[b] ; logical or with b mov [c],rax ; store into c pabc "c=a|b" ; invoke the print macro xorw: ; c=a^b; mov rax,[a] ; load a xor rax,[b] ; exclusive or with b mov [c],rax ; store result in c pabc "c=a^b" ; invoke the print macro notw: ; c=~a; mov rax,[a] ; load c not rax ; not, complement mov [c],rax ; store result into c pabc "c=~a " ; invoke the print macro pop rbp ; restore stack mov rax,0 ; exit code, 0=normal ret ; main returns to operating system output intlogic_64.out c=5 , a=3, b=5, c=15 c=a&b, a=3, b=5, c=1 c=a|b, a=3, b=5, c=7 c=a^b, a=3, b=5, c=6 c=~a , a=3, b=5, c=-4 note: not 3 complement becomes -4 in twos complement

loops in assembly language

One significant use of loops is to evaluate polynomials and convert numbers from one base to another. (Yes, this is related to project 1 for CMSC 313) The following program has three loops. Loop3 (h3loop) uses Horners method to evaluate a polynomial, using 'rdi' as an index, 'rcx' and 'loop' to do the loop. a_0 is first in the array, n=4. Loop4 (h4loop) uses Horners method, with data order optimized, using 'rcx' as both index and loop counter, to get a three instruction loop. a_4 is first in the array, n=4. Loop5 (h5loop) uses Horners method to evaluate a polynomial using double precision floating point. Note 8 byte increment and quad word to xmm0, to printf.

Horners method to evaluate polynomials in assembly language

Study horner_64.asm to understand the NASM coding of the loops. ; horner_64.asm Horners method of evaluating polynomials ; ; given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0 ; a_n is the coefficient 'a' with subscript n. X^n is X to nth power ; compute y_1 = a_n * X + a_n-1 ; compute y_2 = y_1 * X + a_n-2 ; compute y_i = y_i-1 * X + a_n-i i=3..n ; thus y_n = Y = value of polynomial ; ; in assembly language: ; load some register with a_n, multiply by X ; add a_n-1, multiply by X, add a_n-2, multiply by X, ... ; finishing with the add a_0 ; ; output from execution: ; a 6319 ; aa 6319 ; af 6.319000e+03 extern printf section .data global main section .data fmta: db "a %ld",10,0 fmtaa: db "aa %ld",10,0 fmtflt: db "af %e",10,0 section .text main: push rbp ; set up stack ; evaluate an integer polynomial, X=7, using a count section .data a: dq 2,5,-7,22,-9 ; coefficients of polynomial, a_n first X: dq 7 ; X = 7 ; n=4, 8 bytes per coefficient section .text mov rax,[a] ; accumulate value here, get coefficient a_n mov rdi,1 ; subscript initialization mov rcx,4 ; loop iteration count initialization, n h3loop: imul rax,[X] ; * X (ignore edx) add rax,[a+8*rdi] ; + a_n-i inc rdi ; increment subscript loop h3loop ; decrement rcx, jump on non zero mov rsi, rax ; print rax mov rdi, fmta ; format mov rax, 0 ; no float call printf ; evaluate an integer polynomial, X=7, using a count as index ; optimal organization of data allows a three instruction loop section .data aa: dq -9,22,-7,5,2 ; coefficients of polynomial, a_0 first n: dq 4 ; n=4, 8 bytes per coefficient section .text mov rax,[aa+4*8] ; accumulate value here, get coefficient a_n mov rcx,[n] ; loop iteration count initialization, n h4loop: imul rax,[X] ; * X (ignore edx) add rax,[aa+8*rcx-8]; + aa_n-i loop h4loop ; decrement rcx, jump on non zero mov rsi, rax ; print rax mov rdi, fmtaa ; format mov rax, 0 ; no float call printf ; evaluate a double floating polynomial, X=7.0, using a count as index ; optimal organization of data allows a three instruction loop section .data af: dq -9.0,22.0,-7.0,5.0,2.0 ; coefficients of polynomial, a_0 first XF: dq 7.0 Y: dq 0.0 N: dd 4 section .text mov rcx,[N] ; loop iteration count initialization, n fld qword [af+8*rcx]; accumulate value here, get coefficient a_n h5loop: fmul qword [XF] ; * XF fadd qword [af+8*rcx-8] ; + aa_n-i loop h5loop ; decrement rcx, jump on non zero fstp qword [Y] ; store Y in order to print Y movq xmm0, qword [Y] ; well, may just mov reg mov rdi, fmtflt ; format mov rax, 1 ; one float call printf pop rbp ; restore stack mov rax,0 ; normal return ret ; return output horner_64.out a 6319 aa 6319 af 6.319000e+03 A "C" version with same data, slightly different code sequence. // horner_64.c long integer and double Horners method of evaluating polynomials // everything 64-bit // given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0 // a_n is the coefficient 'a' with subscript n. X^n is X to nth power // compute y_1 = a_n * X + a_n-1 // compute y_2 = y_1 * X + a_n-2 // compute y_i = y_i-1 * X + a_n-i i=3..n // thus y_n = Y = value of polynomial #include <stdio.h> int main(int argc, char *argv[]) { long int a[] = {2, 5, -7, 22, -9}; // a_n first long int aa[] = {-9, 22, -7, 5, 2}; // aa_0 first double af[] = {-9.0, 22.0, -7.0, 5.0, 2.0}; // af_0 first long int n = 4; long int X, Y; double XF, YF; long int i; // evaluate an integer polynomial a, X=7, using a_n first, count n X = 7; Y = a[0]*X + a[1]; for(i=2; i<=n; i++) Y = Y*X + a[i]; printf("a %ld\n", Y); // evaluate an integer polynomial aa , X=7, using a_0 first, count n X = 7; Y = aa[n]*X + aa[n-1]; for(i=n-2; i>=0; i--) Y = Y*X + aa[i]; printf("aa %ld\n", Y); // evaluate a double floating polynomial, X=7.0, using af_0 first, n XF = 7.0; YF = af[n]*X + af[n-1]; for(i=n-2; i>=0; i--) YF = YF*XF + af[i]; printf("af %e\n", YF); return 0; } Same output: a 6319 aa 6319 af 6.319000e+03

The first matrix program, to learn indexing, is print matrix

Study prtmat.asm to understand the NASM indexing of two matrix, m1 and m2 Output prtmat.out see output prtmat.asm runnung using macro to print m1[0][0]=1.100000e+00 m1[0][1]=1.200000e+00 m1[1][0]=1.300000e+00 m1[1][1]=2.100000e+00 m1[2][0]=2.200000e+00 m1[2][1]=2.300000e+00 development with debug print i=0, j=0, k=0, l=0 m1[0][0]=1.100000e+00 i=0, j=1, k=1, l=8 m1[0][1]=1.200000e+00 i=1, j=0, k=2, l=16 m1[1][0]=1.300000e+00 i=1, j=1, k=3, l=24 m1[1][1]=2.100000e+00 i=2, j=0, k=4, l=32 m1[2][0]=2.200000e+00 i=2, j=1, k=5, l=40 m1[2][1]=2.300000e+00 m2[0][0]=1.100000e+00 m2[0][1]=1.200000e+00 m2[0][2]=1.300000e+00 m2[1][0]=2.100000e+00 m2[1][1]=2.200000e+00 m2[1][2]=2.300000e+00 prtmat.asm finished ; prtmat.asm ; ; compile nasm -f elf64 -l prtmat.lst prtmat.asm ; link gcc -m64 -o prtmat prtmat.o ; run ./prtmat ; extern printf ; the C function to be called %macro prtm 1 ; print macro, arg1 is string, name of matrix section .data ; for inside macro .fmt db "%s[%d][%d]=%e", 10, 0 .str db %1,0 ; %1 is macro call first actual parameter section .text mov rdi, .fmt ; address of format string mov rsi, .str ; string passed to macro mov rdx, [i] mov rcx, [j] movq xmm0, qword [x] ; first floating point in fmt mov rax, 1 ; 1 floating point arguments to printf call printf ; Call C function %endmacro section .data ; initialized msg: db "prtmat.asm runnung", 0 msg2: db "prtmat.asm finished", 0 msg3: db "using macro to print", 0 msg4: db "development with debug print", 0 fmt: db "%s", 10, 0 fmtbl: db " ", 10, 0 ; for blank line m1: dq 1.1, 1.2, 1.3, 2.1, 2.2, 2.3 ; row major 2 by 3 matrix m1name: db "m1", 0 ncol1: dq 2 nrow1: dq 3 m2: dq 1.1, 2.1, 3.1, 1.2, 2.2, 3.2 ; coloum major 3 by 2 matrix m2name: db "m2", 0 ncol2: dq 3 nrow2: dq 2 i8: dq 8 ; 8 bytes in qword 4*2 fmtm: db "%s[%d][%d]=%e", 10, 0 fijkl: db "i=%d, j=%d, k=%d, l=%d", 10, 0 section .bss i: resq 1 ; reserve space, 1 is one quad word j: resq 1 k: resq 1 l: resq 1 lm resq 1 ; address of matrix item x: resq 1 ; float type or any type section .text ; Code section. global main main: push rbp ; set up stack frame, must be aligned mov rdi,fmt ; address of format, required register rdi mov rsi,msg ; address of data mov rax,0 ; no float to print call printf ; Call C function mov rdi,fmt ; address of format, required register rdi mov rsi,msg3 mov rax,0 ; no float to print call printf ; Call C function mov rax,0 ; first print matrix mov [i],rax ; i=0 loopi: mov rax,[i] mov rbx,0 mov [j],rbx ; j=0 loopj: mov rax,[i] mov rbx,[j] imul rax,[ncol1] ; i*ncol1 add rax, rbx ; i*ncol1+j [i][j] mov [k],rax imul rax,[i8] ; byte address offset mov [l],rax ; (i*ncol1+j)*8 mov rbx,[l] movq xmm0,qword[m1+rbx] movq qword[x],xmm0 prtm "m1" mov rbx,[j] inc rbx mov [j],rbx cmp rbx,[ncol1] ; j<ncol jne loopj mov rax,[i] inc rax mov [i],rax cmp rax,[nrow1] ; i<nrow1 jne loopi mov rdi,fmtbl ; print blank line mov rax,0 ; no float to print call printf ; Call C function mov rdi,fmt ; address of format, required register rdi mov rsi,msg4 ; with debug mov rax,0 ; no float to print call printf ; Call C function mov rax,0 ; now using macro and added debug print mov [i],rax ; i=0 loopi2: mov rax,[i] mov rbx,0 mov [j],rbx ; j=0 loopj2: mov rax,[i] mov rbx,[j] imul rax,[ncol1] ; i*ncol1 add rax, rbx ; i*ncol1+j [i][j] mov [k],rax imul rax,[i8] ; byte address offset mov [l],rax ; (i*ncol1+j)*8 mov rdi,fijkl ; debug print needed to develop indexing mov rsi,[i] mov rdx,[j] mov rcx,[k] ; i*ncol1+j mov r8,[l] ; will go into rbx qword[m1+rbx] mov rax,0 call printf mov rdi,fmtm mov rsi,m1name mov rdx,[i] mov rcx,[j] mov rbx,[l] movq xmm0,qword[m1+rbx] mov rax,1 call printf mov rbx,[j] inc rbx mov [j],rbx cmp rbx,[ncol1] ; j<ncol1 jne loopj2 mov rax,[i] inc rax mov [i],rax cmp rax,[nrow1] ; i<nrow1 jne loopi2 mov rdi,fmtbl ; print blank line mov rax,0 ; no float to print call printf ; Call C function mov rax,0 ; print matrix m2 mov [i],rax ; i=0 loopi3: mov rax,[i] mov rbx,0 mov [j],rbx ; j=0 loopj3: mov rax,[i] mov rbx,[j] imul rax,[ncol2] ; i*ncol2 add rax, rbx ; i*ncol2+j [i][j] mov [k],rax imul rax,[i8] ; byte address offset mov [l],rax ; (i*ncol2+j)*8 mov rbx,[l] movq xmm0,qword[m1+rbx] movq qword[x],xmm0 prtm "m2" mov rbx,[j] inc rbx mov [j],rbx cmp rbx,[ncol2] ; j<ncol2 jne loopj3 mov rax,[i] inc rax mov [i],rax cmp rax,[nrow2] ; i<nrow2 jne loopi3 mov rdi,fmtbl ; print blank line mov rax,0 ; no float to print call printf ; Call C function mov rdi,fmt ; final print mov rsi,msg2 mov rax,0 call printf pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return
    <- previous    index    next ->

Other links

Go to top