1 ; horner_64.asm Horners method of evaluating polynomials 2 ; 3 ; given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0 4 ; a_n is the coefficient 'a' with subscript n. X^n is X to nth power 5 ; compute y_1 = a_n * X + a_n-1 6 ; compute y_2 = y_1 * X + a_n-2 7 ; compute y_i = y_i-1 * X + a_n-i i=3..n 8 ; thus y_n = Y = value of polynomial 9 ; 10 ; in assembly language: 11 ; load some register with a_n, multiply by X 12 ; add a_n-1, multiply by X, add a_n-2, multiply by X, ... 13 ; finishing with the add a_0 14 ; 15 ; output from execution: 16 ; a 6319 17 ; aa 6319 18 ; af 6.319000e+03 19 20 extern printf 21 section .data 22 global main 23 24 section .data 25 00000000 612020256C640A00 fmta: db "a %ld",10,0 26 00000008 616120256C640A00 fmtaa: db "aa %ld",10,0 27 00000010 61662025650A00 fmtflt: db "af %e",10,0 28 29 section .text 30 00000000 55 main: push rbp ; set up stack 31 32 ; evaluate an integer polynomial, X=7, using a count 33 34 section .data 35 00000017 020000000000000005- a: dq 2,5,-7,22,-9 ; coefficients of polynomial, a_n first 36 00000020 00000000000000F9FF- 37 00000029 FFFFFFFFFFFF160000- 38 00000032 0000000000F7FFFFFF- 39 0000003B FFFFFFFF 40 0000003F 0700000000000000 X: dq 7 ; X = 7 41 ; n=4, 8 bytes per coefficient 42 section .text 43 00000001 488B0425[17000000] mov rax,[a] ; accumulate value here, get coefficient a_n 44 00000009 BF01000000 mov rdi,1 ; subscript initialization 45 0000000E B904000000 mov rcx,4 ; loop iteration count initialization, n 46 00000013 480FAF0425- h3loop: imul rax,[X] ; * X (ignore edx) 47 00000018 [3F000000] 48 0000001C 480304FD[17000000] add rax,[a+8*rdi] ; + a_n-i 49 00000024 48FFC7 inc rdi ; increment subscript 50 00000027 E2EA loop h3loop ; decrement rcx, jump on non zero 51 52 00000029 4889C6 mov rsi, rax ; print rax 53 0000002C 48BF- mov rdi, fmta ; format 54 0000002E [0000000000000000] 55 00000036 B800000000 mov rax, 0 ; no float 56 0000003B E8(00000000) call printf 57 58 59 ; evaluate an integer polynomial, X=7, using a count as index 60 ; optimal organization of data allows a three instruction loop 61 62 section .data 63 00000047 F7FFFFFFFFFFFFFF16- aa: dq -9,22,-7,5,2 ; coefficients of polynomial, a_0 first 64 00000050 00000000000000F9FF- 65 00000059 FFFFFFFFFFFF050000- 66 00000062 000000000002000000- 67 0000006B 00000000 68 0000006F 0400000000000000 n: dq 4 ; n=4, 8 bytes per coefficient 69 section .text 70 00000040 488B0425[67000000] mov rax,[aa+4*8] ; accumulate value here, get coefficient a_n 71 00000048 488B0C25[6F000000] mov rcx,[n] ; loop iteration count initialization, n 72 00000050 480FAF0425- h4loop: imul rax,[X] ; * X (ignore edx) 73 00000055 [3F000000] 74 00000059 480304CD[3F000000] add rax,[aa+8*rcx-8]; + aa_n-i 75 00000061 E2ED loop h4loop ; decrement rcx, jump on non zero 76 77 00000063 4889C6 mov rsi, rax ; print rax 78 00000066 48BF- mov rdi, fmtaa ; format 79 00000068 [0800000000000000] 80 00000070 B800000000 mov rax, 0 ; no float 81 00000075 E8(00000000) call printf 82 83 ; evaluate a double floating polynomial, X=7.0, using a count as index 84 ; optimal organization of data allows a three instruction loop 85 86 section .data 87 00000077 00000000000022C000- af: dq -9.0,22.0,-7.0,5.0,2.0 ; coefficients of polynomial, a_0 first 88 00000080 000000000036400000- 89 00000089 000000001CC0000000- 90 00000092 000000144000000000- 91 0000009B 00000040 92 0000009F 0000000000001C40 XF: dq 7.0 93 000000A7 0000000000000000 Y: dq 0.0 94 000000AF 04000000 N: dd 4 95 96 section .text 97 0000007A 488B0C25[AF000000] mov rcx,[N] ; loop iteration count initialization, n 98 00000082 DD04CD[77000000] fld qword [af+8*rcx]; accumulate value here, get coefficient a_n 99 00000089 DC0C25[9F000000] h5loop: fmul qword [XF] ; * XF 100 00000090 DC04CD[6F000000] fadd qword [af+8*rcx-8] ; + aa_n-i 101 00000097 E2F0 loop h5loop ; decrement rcx, jump on non zero 102 103 00000099 DD1C25[A7000000] fstp qword [Y] ; store Y in order to print Y 104 000000A0 F30F7E0425- movq xmm0, qword [Y] ; well, may just mov reg 105 000000A5 [A7000000] 106 000000A9 48BF- mov rdi, fmtflt ; format 107 000000AB [1000000000000000] 108 000000B3 B801000000 mov rax, 1 ; one float 109 000000B8 E8(00000000) call printf 110 111 000000BD 5D pop rbp ; restore stack 112 000000BE B800000000 mov rax,0 ; normal return 113 000000C3 C3 ret ; return