CMSC 313 Lecture 6,

    <- previous    index    next ->

Lecture 6 Branching and loops

UGH! Note that < and > are interpreted by HTML,
thus source code, physically included, has & gt ; rather than symbol.
Be sure to download from link, not from HTML.

The basic integer compare instruction is  "cmp"
      for float   compare instruction is  "fcomip st0,st1"
Following this instruction is typically one of:
  JL  label  ; jump on less than  "<"
  JLE label  ; jump on less than or equal "<="
  JG  label  ; jump on greater than ">"
  JGE label  ; jump on greater than or equal ">="
  JE  label  ; jump on equal "=="
  JNE label  ; jump on not equal "!="

After many integer arithmetic instructions
  JZ  label  ; jump on zero
  JNZ label  ; jump on non zero
  JS  label  ; jump on sign plus
  JNS labe;  ; jump on sign not plus

Note: Use 'cmp' rather than 'sub' for comparison.
Overflow can occur on subtraction resulting in sign inversion.

if-then-else in assembly language
Convert a "C" 'if' statement to nasm assembly ifint_64.asm
The significant features are:
1) use a compare instruction for the test
2) put a label on the start of the false branch (e.g. false1:)
3) put a label after the end of the 'if' statement (e.g. exit1:)
4) choose a conditional jump that goes to the false part
5) put an unconditional jump to (e.g. exit1:) at the end of the true part

source code  ifint_64.asm

; ifint_64.asm  code ifint_64.c for nasm 
; /* ifint_64.c an 'if' statement that will be coded for nasm */
; #include <stdio.h>
; int main()
; {
;   long int a=1;
;   long int b=2;
;   long int c=3;
;   long int xyz=4;
;   if(a < b)
;     printf("true a < b \n");
;   else
;     printf("wrong on a < b \n");
;   if(b > c)
;     printf("wrong on b > c \n");
;   else
;     printf("false b > > \n");
;
;   if(4==xyz) goto label1e;
;   printf("failed 4==xyz\n");
;label1e: printf("passed 4==xyz\n");
;
;   if(5>xyz) goto label1g;
;   printf("failed 5>xyz\n");
;label1g: printf("passed 5>xyz\n");
;
;   if(3 < xyz) goto label1l;
;   printf("failed 3 < xyz\n");
;label1l: printf("passed 3<xyz\n");
;
;   return 0;
;}
; result of executing both "C" and assembly is:
; true a < b
; false b > c 
	
	global	main		; define for linker
        extern	printf		; tell linker we need this C function
        section .data		; Data section, initialized variables
a:	dq 1
b:	dq 2
c:	dq 3
xyz:	dq 4
fmt1:   db "true a < b ",10,0
fmt2:   db "wrong on a < b ",10,0
fmt3:   db "wrong on b > c ",10,0
fmt4:   db "false b > c ",10,0
fmt5:	db "failed 4==xyz ",10,0
fmt6:	db "passed 4==xyz ",10,0
fmt7:	db "failed 5>xyz ",10,0
fmt8:	db "passed 5>xyz ",10,0
fmt9:	db "failed 3<xyz ",10,0
fmt10:	db "passed 3<xyz ",10,0
	
	section .text
main:	push	rbp		; set up stack
	mov	rax,[a]		; a
	cmp	rax,[b]		; compare a to b
	jge	false1		; choose jump to false part
	; a < b sign is set
        mov	rdi, fmt1	; printf("true a < b \n");
	mov	rax,0
        call    printf	
        jmp	exit1		; jump over false part
false1:	;  a < b is false 
        mov	rdi, fmt2	; printf("wrong on a < b \n");
	mov	rax,0
        call    printf
exit1:				; finished 'if' statement

	mov	rax,[b]		; b
	cmp	rax,[c]		; compare b to c
	jle	false2		; choose jump to false part
	; b > c sign is not set
        mov	rdi, fmt3	; printf("wrong on b > c \n");
	mov	rax,0
        call    printf	
        jmp	exit2		; jump over false part
false2:	;  b > c is false 
        mov	rdi, fmt4	; printf("false b > c \n");
	mov	rax,0
        call    printf
exit2:				; finished 'if' statement

	mov	rax,4
	cmp	rax,[xyz]	; if(4==xyz) goto label1e;
	je	label1e
	mov	rdi, fmt5
	mov	rax,0
	call	printf
label1e:mov	rdi, fmt6
	mov	rax,0
	call	printf

	mov	rax,5
	cmp	rax,[xyz]	; if(5 > xyz) goto label1g;
	jg	label1g
	mov	rdi, fmt7
	mov	rax,0
	call	printf
label1g:mov	rdi, fmt8
	mov	rax,0
	call	printf

	mov	rax,3
	cmp	rax,[xyz]	; if(3 < xyz) goto label1l;
	jl	label1l
	mov	rdi, fmt9
	mov	rax,0
	call	printf
label1l:mov	rdi, fmt10
	mov	rax,0
	call	printf
	
	pop	rbp		; restore stack
	mov	rax,0		; normal, no error, return value
	ret			; return 0;


output  ifint_64.out
true a < b 
false b > c 
passed 4==xyz 
passed 5 > xyz 
passed 3 < xyz 

source code  ifflt_64.asm

; ifflt_64.asm  code ifflt_64.c for nasm 
; /* ifflt_64.c an 'if' statement that will be coded for nasm */
; #include <stdio.h>
; int main()
; {
;   double a=1.0;
;   double b=2.0;
;   double c=3.0;
;   if(ac)
;     printf("wrong on b > c \n");
;   else
;     printf("false b > c \n");
;   return 0;
;}
; result of executing both "C" and assembly is:
; true a < b
; false b > c 
	
	global	main		; define for linker
        extern	printf		; tell linker we need this C function
        section .data		; Data section, initialized variables
a:	dt 1.0
b:	dt 2.0
c:	dt 3.0
fmt1:   db "true a < b ",10,0
fmt2:   db "wrong on a > b ",10,0
fmt3:   db "wrong on b < c ",10,0
fmt4:   db "false b > c ",10,0

	section .bss		; unused, except to pop
t:	rest 1			; reserve one space for dt

	section .text
main:	push	rbp		; set up stack
	fld	tword [b]	; b into st0
	fld	tword [a]	; a into st0, pushes b into st1
	fcompp			; compare and pop both
;	fcomip	st0,st1		; compare a to b, pop a
;	fstp	tword [t]	; just to pop b
	jl	false1		; choose jump to false part
	; a < b sign is set
        mov	rdi, fmt1	; printf("true a < b \n"); 
        call    printf	
        jmp	exit1		; jump over false part
false1:	;  a < b is false 
        mov	rdi, fmt2	; printf("wrong on a < b \n");
        call    printf
exit1:				; finished 'if' statement

	fld	tword [c]	; c into st0
	fld	tword [b]	; b into st0, pushes c into st1
	fcompp			; compare and pop both
;	fcomip	st0,st1		; compare b to c, pop b
;	fstp	tword [t]	; just to pop c
	jg	false2		; choose jump to false part
	; b > c sign is not set
        mov	rdi, fmt3	; printf("wrong on b > c \n");
        call    printf	
        jmp	exit2		; jump over false part
false2:	;  b > c is false 
        mov	rdi, fmt4	; printf("false b > c \n");
        call    printf
exit2:				; finished 'if' statement

	pop	rbp		; restore stack
	mov	rax,0		; normal, no error, return value
	ret			; return 0;

output  ifint_64.out

true a < b 
false b > c 


loop in assembly language
Convert a "C" loop to nasm assembly  loopint_64.asm
The significant features are:
1) "C" long int  is 8-bytes, thus  dd1[1] becomes  dword [dd1+8]
                              dd1[99] becomes  dword [dd1+8*99]

2) "C" long int  is 8-bytes, thus  dd1[i]; i++; becomes  add edi,8
   since "i" is never stored, the register  edi  holds "i"

3) the 'cmp' instruction sets flags that control the jump instruction.
   cmp  edi,8*99   is like  i<99 in "C"
   jnz  loop1      jumps if register  edi  is not  8*99

; loopint_64.asm  code loopint.c for nasm 
; /* loopint_64.c a very simple loop that will be coded for nasm */
; #include <stdio.h>
; int main()
; {
;   long int dd1[100]; // 100 could be 3 gigabytes
;   long int i;        // must be long for more than 2 gigabytes
;   dd1[0]=5; /* be sure loop stays 1..98 */
;   dd1[99]=9;
;   for(i=1; i<99; i++) dd1[i]=7;
;   printf("dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld\n",
;           dd1[0], dd1[1], dd1[98],dd1[99]);
;   return 0;
;}
; execution output is dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9
 
	section	.bss
dd1:	resq	100			; reserve 100 long int
i:	resq	1			; actually unused, kept in register

        section .data			; Data section, initialized variables
fmt:    db "dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld",10,0
	
        extern	printf			; the C function, to be called

	section .text
	global	main
main:	push	rbp			; set up stack

	mov	qword [dd1],5	   	; dd1[0]=5;  memory to memory
	mov	qword [dd1+99*8],9 	; dd1[99]=9; indexed 99 qword

	mov 	rdi, 1*8		; i=1; index, will move by 8 bytes
loop1:	mov 	qword [dd1+rdi],7	; dd1[i]=7;
	add	rdi, 8			; i++;  8 bytes 
	cmp	rdi, 8*99		; i<99
	jne	loop1			; loop until incremented i=99
	
	mov	rdi, fmt		; pass address of format
	mov	rsi, qword [dd1]	; dd1[0]   first list parameter
	mov	rdx, qword [dd1+1*8]	; dd1[1]   second list parameter
	mov	rcx, qword [dd1+98*8]	; dd1[98]  third list parameter
	mov	r8,  qword [dd1+99*8]	; dd1[99]  fourth list parameter
	mov	rax, 0			; no xmm used
        call    printf			; Call C function

	pop	rbp			; restore stack
	mov	rax,0			; normal, no error, return value
	ret				; return 0;

output  loopint_64.out
dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9

	
logic operations in assembly language
Previously, integer arithmetic in "C" was converted to
NASM assembly language. The following is very similar
(cut and past) of intarith_64.asm to intlogic_64.asm that
shows the "C" operators "&" and, "|" or, "^" xor, "~" not.

intlogic_64.asm

; intlogic_64.asm    show some simple C code and corresponding nasm code
;                    the nasm code is one sample, not unique
;
; compile:	nasm -f elf64 -l intlogic_64.lst  intlogic_64.asm
; link:		gcc -m64 -o intlogic_64  intlogic_64.o
; run:		./intlogic_64 > intlogic_64.out
;
; the output from running intlogic_64.asm and intlogic.c is
; c=5  , a=3, b=5, c=15
; c=a&b, a=3, b=5, c=1
; c=a|b, a=3, b=5, c=7
; c=a^b, a=3, b=5, c=6
; c=~a , a=3, b=5, c=-4
;
;The file  intlogic.c  is:
;  #include <stdio.h>
;  int main()
;  { 
;    long int a=3, b=5, c;
;
;    c=15;
;    printf("%s, a=%d, b=%d, c=%d\n","c=5  ", a, b, c);
;    c=a&b; /* and */
;    printf("%s, a=%d, b=%d, c=%d\n","c=a&b", a, b, c);
;    c=a|b; /* or */
;    printf("%s, a=%d, b=%d, c=%d\n","c=a|b", a, b, c);
;    c=a^b; /* xor */
;    printf("%s, a=%d, b=%d, c=%d\n","c=a^b", a, b, c);
;    c=~a;  /* not */
;    printf("%s, a=%d, b=%d, c=%d\n","c=~a", a, b, c);
;    return 0;
; }

        extern printf		; the C function to be called

%macro	pabc 1			; a "simple" print macro
	section .data
.str	db	%1,0		; %1 is first actual in macro call
	section .text
        mov	rdi, fmt        ; address of format string
	mov	rsi, .str 	; users string
	mov	rdx, [a]	; long int a
	mov	rcx, [b]	; long int b 
	mov	r8, [c]		; long int c
	mov     rax, 0	        ; no xmm used
        call    printf          ; Call C function
%endmacro
	
	section .data  		; preset constants, writeable
a:	dq	3		; 64-bit variable a initialized to 3
b:	dq	5		; 64-bit variable b initializes to 4
fmt:    db "%s, a=%ld, b=%ld, c=%ld",10,0 ; format string for printf
	
	section .bss 		; unitialized space
c:	resq	1		; reserve a 64-bit word

	section .text		; instructions, code segment
	global	 main		; for gcc standard linking
main:				; label
	push	rbp		; set up stack
	
lit5:				; c=5;
	mov	rax,15	 	; 5 is a literal constant
	mov	[c],rax		; store into c
	pabc	"c=5  "		; invoke the print macro
	
andb:				; c=a&b;
	mov	rax,[a]	 	; load a
	and	rax,[b]		; and with b
	mov	[c],rax		; store into c
	pabc	"c=a&b"		; invoke the print macro
	
orw:				; c=a-b;
	mov	rax,[a]	 	; load a
	or	rax,[b]		; logical or with b
	mov	[c],rax		; store into c
	pabc	"c=a|b"		; invoke the print macro
	
xorw:				; c=a^b;
	mov	rax,[a]	 	; load a
	xor	rax,[b] 	; exclusive or with b
	mov	[c],rax		; store result in c
	pabc	"c=a^b"		; invoke the print macro
	
notw:				; c=~a;
	mov	rax,[a]	 	; load c
	not	rax	 	; not, complement
	mov	[c],rax		; store result into c
	pabc	"c=~a "		; invoke the print macro

	pop	rbp		; restore stack
	mov     rax,0           ; exit code, 0=normal
	ret			; main returns to operating system

output  intlogic_64.out

c=5  , a=3, b=5, c=15
c=a&b, a=3, b=5, c=1
c=a|b, a=3, b=5, c=7
c=a^b, a=3, b=5, c=6
c=~a , a=3, b=5, c=-4

note: not 3  complement becomes -4  in twos complement

loops in assembly language
One significant use of loops is to evaluate polynomials and
convert numbers from one base to another.
(Yes, this is related to project 1 for CMSC 313)

The following program has three loops.

Loop3 (h3loop) uses Horners method to evaluate a polynomial,
       using 'rdi' as an index, 'rcx' and 'loop' to do the loop.
       a_0 is first in the array, n=4.

Loop4 (h4loop) uses Horners method, with data order optimized,
      using 'rcx' as both index and loop counter, to get a
      three instruction loop.
      a_4 is first in the array, n=4.

Loop5 (h5loop) uses Horners method to evaluate a polynomial
      using double precision floating point. Note 8 byte
      increment and quad word to xmm0, to printf.


Horners method to evaluate polynomials in assembly language
Study horner_64.asm to understand
the NASM coding of the loops.

; horner_64.asm  Horners method of evaluating polynomials
;
; given a polynomial  Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0
; a_n is the coefficient 'a' with subscript n. X^n is X to nth power
; compute y_1 = a_n * X + a_n-1
; compute y_2 = y_1 * X + a_n-2
; compute y_i = y_i-1 * X + a_n-i   i=3..n
; thus    y_n = Y = value of polynomial 
;
; in assembly language:
;   load some register with a_n, multiply by X
;   add a_n-1, multiply by X, add a_n-2, multiply by X, ...
;   finishing with the add  a_0
;
; output from execution:
; a  6319
; aa 6319
; af 6.319000e+03

	extern	printf
	section	.data
	global	main

	section	.data
fmta:	db	"a  %ld",10,0
fmtaa:	db	"aa %ld",10,0
fmtflt:	db	"af %e",10,0

	section	.text
main:	push	rbp		; set up stack

; evaluate an integer polynomial, X=7, using a count

	section	.data
a:	dq	2,5,-7,22,-9	; coefficients of polynomial, a_n first
X:	dq	7		; X = 7
				; n=4, 8 bytes per coefficient
	section	.text
	mov	rax,[a]		; accumulate value here, get coefficient a_n
	mov	rdi,1		; subscript initialization
	mov	rcx,4		; loop iteration count initialization, n
h3loop:	imul	rax,[X]		; * X     (ignore edx)
	add	rax,[a+8*rdi]	; + a_n-i
	inc	rdi		; increment subscript
	loop	h3loop		; decrement rcx, jump on non zero

	mov	rsi, rax	; print rax
	mov	rdi, fmta	; format
	mov	rax, 0		; no float
	call	printf


; evaluate an integer polynomial, X=7, using a count as index
; optimal organization of data allows a three instruction loop
	
	section	.data
aa:	dq	-9,22,-7,5,2	; coefficients of polynomial, a_0 first
n:	dq	4		; n=4, 8 bytes per coefficient
	section	.text
	mov	rax,[aa+4*8]	; accumulate value here, get coefficient a_n
	mov	rcx,[n]		; loop iteration count initialization, n
h4loop:	imul	rax,[X]		; * X     (ignore edx)
	add	rax,[aa+8*rcx-8]; + aa_n-i
	loop	h4loop		; decrement rcx, jump on non zero

	mov	rsi, rax	; print rax
	mov	rdi, fmtaa	; format
	mov	rax, 0		; no float
	call	printf

; evaluate a double floating polynomial, X=7.0, using a count as index
; optimal organization of data allows a three instruction loop
	
	section	.data
af:	dq	-9.0,22.0,-7.0,5.0,2.0	; coefficients of polynomial, a_0 first
XF:	dq	7.0
Y:	dq	0.0
N:	dd	4

	section	.text
	mov	rcx,[N]		; loop iteration count initialization, n
	fld	qword [af+8*rcx]; accumulate value here, get coefficient a_n
h5loop:	fmul	qword [XF]	; * XF
	fadd	qword [af+8*rcx-8] ; + aa_n-i
	loop	h5loop		; decrement rcx, jump on non zero

	fstp	qword [Y]	; store Y in order to print Y
	movq	xmm0, qword [Y]	; well, may just mov reg
	mov	rdi, fmtflt	; format
	mov	rax, 1		; one float
	call	printf

	pop	rbp		; restore stack
	mov	rax,0		; normal return
	ret			; return

output  horner_64.out

a  6319
aa 6319
af 6.319000e+03


A "C" version with same data, slightly different code sequence.

// horner_64.c long integer and double Horners method of evaluating polynomials
//             everything 64-bit
// given a polynomial  Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0
// a_n is the coefficient 'a' with subscript n. X^n is X to nth power
// compute y_1 = a_n * X + a_n-1
// compute y_2 = y_1 * X + a_n-2
// compute y_i = y_i-1 * X + a_n-i   i=3..n
// thus    y_n = Y = value of polynomial 

 #include <stdio.h>
int main(int argc, char *argv[])
{
  long int a[]  = {2, 5, -7, 22, -9}; // a_n first
  long int aa[] = {-9, 22, -7, 5, 2}; // aa_0 first
  double af[]   = {-9.0, 22.0, -7.0, 5.0, 2.0}; // af_0 first
  long int n    = 4;
  long int X, Y;
  double XF, YF; 
  long int i;

  // evaluate an integer polynomial a, X=7, using a_n first, count n
  X = 7;
  Y = a[0]*X + a[1];
  for(i=2; i<=n; i++) Y = Y*X + a[i];
  printf("a  %ld\n", Y);

  // evaluate an integer polynomial aa , X=7, using a_0 first, count n
  X = 7;
  Y = aa[n]*X + aa[n-1];
  for(i=n-2; i>=0; i--) Y = Y*X + aa[i];
  printf("aa %ld\n", Y);

  // evaluate a double floating polynomial, X=7.0, using af_0 first, n
  XF = 7.0;
  YF = af[n]*X + af[n-1];
  for(i=n-2; i>=0; i--) YF = YF*XF + af[i];
  printf("af %e\n", YF);

  return 0;
}

Same output:
a  6319
aa 6319
af 6.319000e+03



The first matrix program, to learn indexing, is print matrix
Study prtmat.asm to understand
the NASM indexing of two matrix, m1 and m2
Output prtmat.out see output
prtmat.asm runnung
using macro to print
m1[0][0]=1.100000e+00
m1[0][1]=1.200000e+00
m1[1][0]=1.300000e+00
m1[1][1]=2.100000e+00
m1[2][0]=2.200000e+00
m1[2][1]=2.300000e+00
 
development with debug print
i=0, j=0, k=0, l=0
m1[0][0]=1.100000e+00
i=0, j=1, k=1, l=8
m1[0][1]=1.200000e+00
i=1, j=0, k=2, l=16
m1[1][0]=1.300000e+00
i=1, j=1, k=3, l=24
m1[1][1]=2.100000e+00
i=2, j=0, k=4, l=32
m1[2][0]=2.200000e+00
i=2, j=1, k=5, l=40
m1[2][1]=2.300000e+00
 
m2[0][0]=1.100000e+00
m2[0][1]=1.200000e+00
m2[0][2]=1.300000e+00
m2[1][0]=2.100000e+00
m2[1][1]=2.200000e+00
m2[1][2]=2.300000e+00
 
prtmat.asm finished

; prtmat.asm  
;
; compile  nasm -f elf64 -l prtmat.lst  prtmat.asm
; link     gcc -m64 -o prtmat  prtmat.o
; run      ./prtmat
;

        extern printf		; the C function to be called

%macro  prtm 1			; print macro, arg1 is string, name of matrix
	section .data           ; for inside macro
	.fmt	db	"%s[%d][%d]=%e", 10, 0
	.str    db      %1,0	; %1 is macro call first actual parameter
	
	section .text
	mov     rdi, .fmt ; address of format string
	mov     rsi, .str ; string passed to macro
	mov	rdx, [i]
	mov	rcx, [j]
	movq    xmm0, qword [x] ; first floating point in fmt
	mov     rax, 1	; 1 floating point arguments to printf
	call    printf	; Call C function
%endmacro
	
	section .data		; initialized
msg:	db "prtmat.asm runnung", 0
msg2:	db "prtmat.asm finished", 0
msg3:	db "using macro to print", 0
msg4:	db "development with debug print", 0
fmt:    db "%s", 10, 0
fmtbl:	db " ", 10, 0			; for blank line
m1:	dq 1.1, 1.2, 1.3, 2.1, 2.2, 2.3 ; row major 2 by 3 matrix
m1name:	db "m1", 0
ncol1:	dq 2
nrow1:	dq 3
m2:	dq 1.1, 2.1, 3.1, 1.2, 2.2, 3.2 ; coloum major 3 by 2 matrix
m2name:	db "m2", 0
ncol2:	dq 3
nrow2:	dq 2
i8:	dq 8				; 8 bytes in qword 4*2
fmtm:	db "%s[%d][%d]=%e", 10, 0
fijkl:	db "i=%d, j=%d, k=%d, l=%d", 10, 0

	section .bss
i:	resq 1			; reserve space, 1 is one quad word
j:	resq 1
k:	resq 1
l:	resq 1
lm	resq 1			; address of matrix item
x:	resq 1			; float type or any type
	
        section .text           ; Code section.
        global main
main:
        push    rbp		; set up stack frame, must be aligned
	
	mov	rdi,fmt         ; address of format, required register rdi
	mov	rsi,msg         ; address of data
	mov	rax,0		; no float to print
        call    printf		; Call C function
	
	mov	rdi,fmt         ; address of format, required register rdi
	mov	rsi,msg3
	mov	rax,0		; no float to print
        call    printf		; Call C function

	mov	rax,0		; first print matrix
	mov	[i],rax		; i=0
loopi:
	mov rax,[i]
	mov rbx,0
	mov [j],rbx		; j=0
loopj:	
	mov	rax,[i]
	mov	rbx,[j]
	imul	rax,[ncol1] 	; i*ncol1
	add	rax, rbx	; i*ncol1+j  [i][j]
	mov	[k],rax
	imul	rax,[i8]	; byte address offset
	mov	[l],rax		; (i*ncol1+j)*8

	mov	rbx,[l]
	movq	xmm0,qword[m1+rbx]
        movq	qword[x],xmm0
	prtm "m1"

	mov	rbx,[j]
	inc	rbx
	mov	[j],rbx
	cmp 	rbx,[ncol1]	; j<ncol
	jne	loopj

	mov	rax,[i]
	inc	rax
	mov	[i],rax
	cmp	rax,[nrow1]	; i<nrow1
	jne	loopi

	mov	rdi,fmtbl       ; print blank line
	mov	rax,0		; no float to print
        call    printf		; Call C function
	
	mov	rdi,fmt         ; address of format, required register rdi
	mov	rsi,msg4        ; with debug
	mov	rax,0		; no float to print
        call    printf		; Call C function


	mov	rax,0		; now using macro and added debug print
	mov	[i],rax		; i=0
loopi2:
	mov rax,[i]
	mov rbx,0
	mov [j],rbx		; j=0
loopj2:	
	mov	rax,[i]
	mov	rbx,[j]
	imul	rax,[ncol1] 	; i*ncol1
	add	rax, rbx	; i*ncol1+j  [i][j]
	mov	[k],rax
	imul	rax,[i8]	; byte address offset
	mov	[l],rax		; (i*ncol1+j)*8
	
	mov	rdi,fijkl	; debug print needed to develop indexing
	mov	rsi,[i]
	mov	rdx,[j]
	mov	rcx,[k]		; i*ncol1+j
	mov	r8,[l]		; will go into rbx  qword[m1+rbx]
	mov	rax,0
	call	printf

	
	mov	rdi,fmtm
	mov	rsi,m1name
	mov	rdx,[i]
	mov	rcx,[j]
	mov	rbx,[l]
	movq	xmm0,qword[m1+rbx]
	mov	rax,1
	call	printf

	mov	rbx,[j]
	inc	rbx
	mov	[j],rbx
	cmp 	rbx,[ncol1]	; j<ncol1
	jne	loopj2

	mov	rax,[i]
	inc	rax
	mov	[i],rax
	cmp	rax,[nrow1]	; i<nrow1
	jne	loopi2

	mov	rdi,fmtbl       ; print blank line
	mov	rax,0		; no float to print
        call    printf		; Call C function

	mov	rax,0		; print matrix m2
	mov	[i],rax		; i=0
loopi3:
	mov rax,[i]
	mov rbx,0
	mov [j],rbx		; j=0
loopj3:	
	mov	rax,[i]
	mov	rbx,[j]
	imul	rax,[ncol2] 	; i*ncol2
	add	rax, rbx	; i*ncol2+j  [i][j]
	mov	[k],rax
	imul	rax,[i8]	; byte address offset
	mov	[l],rax		; (i*ncol2+j)*8

	mov	rbx,[l]
	movq	xmm0,qword[m1+rbx]
        movq	qword[x],xmm0
	prtm "m2"

	mov	rbx,[j]
	inc	rbx
	mov	[j],rbx
	cmp 	rbx,[ncol2]	; j<ncol2
	jne	loopj3

	mov	rax,[i]
	inc	rax
	mov	[i],rax
	cmp	rax,[nrow2]	; i<nrow2
	jne	loopi3


	mov	rdi,fmtbl       ; print blank line
	mov	rax,0		; no float to print
        call    printf		; Call C function

	mov	rdi,fmt		; final print
	mov	rsi,msg2
	mov	rax,0
	call	printf
        pop     rbp		; restore stack 
	mov	rax,0		; normal, no error, return value
	ret			; return

    <- previous    index    next ->

Lecture 6 Branching and loops

if-then-else in assembly language

loop in assembly language

logic operations in assembly language

loops in assembly language

Horners method to evaluate polynomials in assembly language

The first matrix program, to learn indexing, is print matrix

Other links

Go to top