-- least_square_fit.adb tailorable code, provide your input and setup with Ada.text_io; use Ada.text_io; with Ada.Numerics; use Ada.Numerics; with Ada.Numerics.Long_Elementary_Functions; use Ada.Numerics.Long_Elementary_Functions; with real_arrays; use real_arrays; -- The purpose of this package is to provide a reliable and convenient -- means for fitting existing data by a few coefficients. The companion -- package check_fit provides the means to use the coefficients for -- interpolation and limited extrapolation. -- -- This package implements the least square fit. -- -- The problem is stated as follows : -- Given measured data for values of Y based on values of X1,X2 and X3. e.g. -- -- Y_actual X1 X2 X3 -- -------- ----- ----- ----- -- 32.5 1.0 2.5 3.7 -- 7.2 2.0 2.5 3.6 -- 6.9 3.0 2.7 3.5 -- 22.4 2.2 2.1 3.1 -- 10.4 1.5 2.0 2.6 -- 11.3 1.6 2.0 3.1 -- -- Find a, b and c such that Y_approximate = a * X1 + b * X2 + c * X3 -- and such that the sum of (Y_actual - Y_approximate) squared is minimized. -- -- The method for determining the coefficients a, b and c follows directly -- form the problem definition and mathematical analysis. (See more below) -- -- Y is called the dependent variable and X1 .. Xn the independent variables. -- The procedures below implements a few special cases and the general case. -- The number of independent variables can vary. -- The approximation equation may use powers of the independent variables -- The user may create additional independent variables e.g. X2 = SIN(X1) -- with the restriction that the independent variables are linearly -- independent. e.g. Xi not equal p Xj + q for all i,j,p,q -- -- -- -- The mathematical derivation of the least square fit is as follows : -- -- Given data for the independent variable Y in terms of the dependent -- variables S,T,U and V consider that there exists a function F -- such that Y = F(S,T,U,V) -- The problem is to find coefficients a,b,c and d such that -- Y_approximate = a * S + b * T + c * U + d * V -- and such that the sum of ( Y - Y_approximate ) squared is minimized. -- -- Note: a, b, c, d are scalars. S, T, U, V, Y, Y_approximate are vectors. -- -- To find the minimum of SUM( Y - Y_approximate ) ** 2 -- the derivatives must be taken with respect to a,b,c and d and -- all must equal zero simultaneously. The steps follow : -- -- SUM( Y - Y_approximate ) ** 2 = SUM( Y - a*S - b*T - c*U - d*V ) ** 2 -- -- d/da = -2 * S * SUM( Y - a*S - b*T - c*U - d*V ) -- d/db = -2 * T * SUM( Y - a*S - b*T - c*U - d*V ) -- d/dc = -2 * U * SUM( Y - a*S - b*T - c*U - d*V ) -- d/dd = -2 * V * SUM( Y - a*S - b*T - c*U - d*V ) -- -- Setting each of the above equal to zero and putting constant term on left -- the -2 is factored out, -- the independent variable is moved inside the summation -- -- SUM( a*S*S + b*S*T + c*S*U + d*S*V = S*Y ) -- SUM( a*T*S + b*T*T + c*T*U + d*T*V = T*Y ) -- SUM( a*U*S + b*U*T + c*U*U + d*U*V = U*Y ) -- SUM( a*V*S + b*V*T + c*V*U + d*V*V = V*Y ) -- -- Distributing the SUM inside yields -- -- a * SUM(S*S) + b * SUM(S*T) + c * SUM(S*U) + d * SUM(S*V) = SUM(S*Y) -- a * SUM(T*S) + b * SUM(T*T) + c * SUM(T*U) + d * SUM(T*V) = SUM(T*Y) -- a * SUM(U*S) + b * SUM(U*T) + c * SUM(U*U) + d * SUM(U*V) = SUM(U*Y) -- a * SUM(V*S) + b * SUM(V*T) + c * SUM(V*U) + d * SUM(V*V) = SUM(V*Y) -- -- To find the coefficients a,b,c and d solve the linear system of equations -- -- | SUM(S*S) SUM(S*T) SUM(S*U) SUM(S*V) | | a | | SUM(S*Y) | -- | SUM(T*S) SUM(T*T) SUM(T*U) SUM(T*V) | x | b | = | SUM(T*Y) | -- | SUM(U*S) SUM(U*T) SUM(U*U) SUM(U*V) | | c | | SUM(U*Y) | -- | SUM(V*S) SUM(V*T) SUM(V*U) SUM(V*V) | | d | | SUM(V*Y) | -- -- Some observations : -- S,T,U and V must be linearly independent. -- There must be more data sets (Y, S, T, U, V) than variables. -- The analysis did not depend on the number of independent variables -- A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3 -- The general case for any order polynomial follows, fit_pn. -- Any substitution such as three variables to various powers may be used. procedure least_square_fit is n : integer; A : real_matrix(1..50,1..50); C : real_vector(1..50); Y : real_vector(1..50); rms_err, avg_err, max_err : real; idata : integer := 0; -- data pair index used by data_set have_data : boolean := true; -- returned by data_set package fltio is new float_io(long_float); use fltio; package intio is new integer_io(integer); use intio; procedure data_set(y : out real; -- a y value x : out real; -- an x value data : in out boolean) is -- returns true for data -- starts over if false k : integer := 100; -- data values -- idata static x1 : real := 0.0; dx1 : real := 0.0314; -- approx Pi xx : real; yy : real; begin idata := idata+1; if idata>k then idata := 0; data := false; return; -- ready for check end if; xx := x1 + real(idata) * dx1; yy := exp(xx)*sin(xx); x := xx; y := yy; end data_set; procedure simeq(n: Integer; -- tailored version for this code A : real_matrix; -- (1..,1..) array Y : real_vector; -- (1..) vector X : out real_vector) is B : real_matrix(1..n,1..n+1); -- working matrix row : array(1..n) of integer; -- row interchange indicies hold, i_pivot : integer; -- pivot indicies pivot : real; -- pivot element value abs_pivot : real; -- abs of pivot element norm1 : real := 0.0; -- 1 norm of matrix matrix_data_error : exception; begin for i in 1..n loop for j in 1..n loop B(i,j) := A(i,j); if abs B(i,j) > norm1 then norm1 := abs B(i,j); end if; end loop; B(i,n+1) := Y(i); end loop; for k in 1..n loop row(k) := k; end loop; for k in 1..n loop pivot := B(row(k),k); abs_pivot := abs pivot; i_pivot := k; for i in k..n loop if abs B(row(i),k) > abs_pivot then i_pivot := i; pivot := B(row(i),k); abs_pivot := abs pivot; end if; end loop; if abs_pivot < real'epsilon * norm1 then raise matrix_data_error; end if; hold := row(k); row(k) := row(i_pivot); row(i_pivot) := hold; for j in k+1..n+1 loop B(row(k),j) := B(row(k),j)/B(row(k),k); end loop; for i in 1..n loop if i /= k then for j in k+1..n+1 loop B(row(i),j) := B(row(i),j) - B(row(i),k)*B(row(k),j); end loop; end if; end loop; end loop; for i in 1..n loop X(i) := B(row(i),n+1); end loop; end simeq; procedure fit_pn(n : integer; A : in out real_matrix; Y : in out real_vector; C : in out real_vector) is xx, yy : real; pwr : real_vector(1..2*n); begin for i in 1..n loop for j in 1..n loop A(i,j) := 0.0; end loop; Y(i) := 0.0; end loop; have_data := true; while have_data loop data_set(yy, xx, have_data); if not have_data then exit; end if; pwr(1) := 1.0; for i in 2..2*n loop pwr(i) := pwr(i-1)*xx; end loop; for i in 1..n loop for j in 1..n loop A(i,j) := A(i,j) + pwr(i)*pwr(j); end loop; Y(i) := Y(i) + yy*pwr(i); end loop; end loop; -- on while simeq(n, A, Y, C); for i in 1..n loop put("C("); put(i,4); put(")="); put(C(i),3,4); new_line; end loop; end fit_pn; procedure check_pn(n : integer; C : real_vector; rms_err : out real; avg_err : out real; max_err : out real) is x, y, ya, diff : real; sumsq : real := 0.0; sum : real := 0.0; maxe : real := 0.0; xmin, xmax, ymin, ymax, xbad, ybad : real; k, imax : integer; begin k := 0; have_data := true; while have_data loop data_set(y, x, have_data); if not have_data then exit; end if; if k=0 then xmin := x; xmax := x; ymin := y; ymax := y; imax := 1; xbad := x; ybad := y; end if; if x>xmax then xmax := x; end if; if xymax then ymax := y; end if; if ymaxe then maxe := diff; imax := k; xbad := x; ybad := y; end if; sum := sum + diff; sumsq := sumsq + diff*diff; end loop; -- on while put("check_pn k="); put(k,3); new_line; put("xmin="); put(xmin,3,3); put(", xmax="); put(xmax,3,3); put(", ymin="); put(ymin,3,3); put(", ymax="); put(ymax,3,3); new_line; max_err := maxe; avg_err := sum/real(k); rms_err := sqrt(sumsq/real(k)); put("max="); put(maxe,3,4); put(" at "); put(imax,3); put(", xbad="); put(xbad,3,3); put(", ybad="); put(ybad,3,3); new_line; end check_pn; begin put_line("least_square_fit.adb"); -- sample polynomial least square fit, 3th power n := 3+1; -- need constant term and five powers 1,2,3,4,5 put("fit exp(x)*sin(x) 100 points 0.0 to Pi, "); put(n-1,3); put_line(" degree polynomial"); fit_pn(n, A, Y, C); check_pn(n, C, rms_err, avg_err, max_err); put("rms_err="); put(rms_Err,3,4); put(", avg_err="); put(avg_Err,3,4); put(", max_err="); put(max_Err,3,4); new_line; new_line; n := 4+1; -- need constant term and five powers 1,2,3,4,5 put("fit exp(x)*sin(x) 100 points 0.0 to Pi, "); put(n-1,3); put_line(" degree polynomial"); fit_pn(n, A, Y, C); check_pn(n, C, rms_err, avg_err, max_err); put("rms_err="); put(rms_Err,3,4); put(", avg_err="); put(avg_Err,3,4); put(", max_err="); put(max_Err,3,4); new_line; new_line; n := 5+1; put("fit exp(x)*sin(x) 100 points 0.0 to Pi, "); put(n-1,3); put_line(" degree polynomial"); fit_pn(n, A, Y, C); check_pn(n, C, rms_err, avg_err, max_err); put("rms_err="); put(rms_Err,3,4); put(", avg_err="); put(avg_Err,3,4); put(", max_err="); put(max_Err,3,4); new_line; new_line; n := 6+1; put("fit exp(x)*sin(x) 100 points 0.0 to Pi, "); put(n-1,3); put_line(" degree polynomial"); fit_pn(n, A, Y, C); check_pn(n, C, rms_err, avg_err, max_err); put("rms_err="); put(rms_Err,3,4); put(", avg_err="); put(avg_Err,3,4); put(", max_err="); put(max_Err,3,4); new_line; new_line; n := 7+1; put("fit exp(x)*sin(x) 100 points 0.0 to Pi, "); put(n-1,3); put_line(" degree polynomial"); fit_pn(n, A, Y, C); check_pn(n, C, rms_err, avg_err, max_err); put("rms_err="); put(rms_Err,3,4); put(", avg_err="); put(avg_Err,3,4); put(", max_err="); put(max_Err,3,4); new_line; end Least_Square_Fit;