-- least_square_fit2d.adb  tailorable code, provide your input and setup

-- The purpose of this package is to provide a reliable and convenient
-- means for fitting existing data by a few coefficients. The companion
-- package check_fit provides the means to use the coefficients for
-- interpolation and limited extrapolation.
--
-- This package implements the least square fit.
--
-- The problem is stated as follows :
--   Given measured data for values of Y based on values of X1,X2 and X3. e.g.
--
--    Y_actual         X1      X2     X3
--    --------       -----   -----  -----
--     32.5           1.0     2.5    3.7
--      7.2           2.0     2.5    3.6
--      6.9           3.0     2.7    3.5
--     22.4           2.2     2.1    3.1
--     10.4           1.5     2.0    2.6
--     11.3           1.6     2.0    3.1
--
--  Find A,B and C such that   Y_approximate =  A * X1 + B * X2 + C * X3
--  and such that the sum of(Y_actual - Y_approximate) squared is minimized.
--
-- The method for determining the coefficients A,B and C follows directly
-- form the problem definition and mathematical analysis.(See more below)
--
-- Y is called the dependent variable and X1 .. Xn the independent variables.
-- The procedures below implements a few special cases and the general case.
--    The number of independent variables can vary.
--    The approximation equation may use powers of the independent variables
--    The user may create additional independent variables e.g. X2 = SIN(X1)
--    with the restriction that the independent variables are linearly
--    independent.  e.g.  Xi not equal  p Xj + q  for all i,j,p,q
--
--
--
-- The mathematical derivation of the least square fit is as follows :
--
-- Given data for the independent variable Y in terms of the dependent
-- variables S,T,U and V  consider that there exists a function F
-- such that     Y = F(S,T,U,V)
-- The problem is to find coefficients A,B,C and D such that
--            Y_approximate = A * S + B * T + C * U + D * V
-- and such that the sum of( Y - Y_approximate ) squared is minimized.
--
-- To find the minimum of  SUM( Y - Y_approximate ) ** 2
-- the derivatives must be taken with respect to S,T,U and V and
-- all must equal zero simultaneously. The steps follow :
--
--  SUM( Y - Y_approximate ) ** 2 = SUM( Y - A*S - B*T - C*U - D*V ) ** 2
--
-- d/dS =  -2 * S * SUM( Y - A*S - B*T - C*U - D*V )
-- d/dT =  -2 * T * SUM( Y - A*S - B*T - C*U - D*V )
-- d/dU =  -2 * U * SUM( Y - A*S - B*T - C*U - D*V )
-- d/dV =  -2 * V * SUM( Y - A*S - B*T - C*U - D*V )
--
-- Setting each of the above equal to zero and putting constant term on left
--    the -2 is factored out,
--    the independent variable is moved inside the summation
--
--  SUM( A*S*S + B*S*T + C*S*U + D*S*V = S*Y )
--  SUM( A*T*S + B*T*T + C*T*U + D*T*V = T*Y )
--  SUM( A*U*S + B*U*T + C*U*U + D*U*V = U*Y )
--  SUM( A*V*S + B*V*T + C*V*U + D*V*V = V*Y )
--
-- Distributing the SUM inside yields
--
--  A * SUM(S*S) + B * SUM(S*T) + C * SUM(S*U) + D * SUM(S*V) = SUM(S*Y)
--  A * SUM(T*S) + B * SUM(T*T) + C * SUM(T*U) + D * SUM(T*V) = SUM(T*Y)
--  A * SUM(U*S) + B * SUM(U*T) + C * SUM(U*U) + D * SUM(U*V) = SUM(U*Y)
--  A * SUM(V*S) + B * SUM(V*T) + C * SUM(V*U) + D * SUM(V*V) = SUM(V*Y)
--
-- To find the coefficients A,B,C and D solve the linear system of equations
--
--    | SUM(S*S)  SUM(S*T)  SUM(S*U)  SUM(S*V) |   | A |   | SUM(S*Y) |
--    | SUM(T*S)  SUM(T*T)  SUM(T*U)  SUM(T*V) | x | B | = | SUM(T*Y) |
--    | SUM(U*S)  SUM(U*T)  SUM(U*U)  SUM(U*V) |   | C |   | SUM(U*Y) |
--    | SUM(V*S)  SUM(V*T)  SUM(V*U)  SUM(V*V) |   | D |   | SUM(V*Y) |
--
-- Some observations :
--     S,T,U and V must be linearly independent.
--     There must be more data sets(Y, S, T, U, V) than variables.
--     The analysis did not depend on the number of independent variables
--     A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3
--     The general case for any order 2d fit follows, fit_2d.
--     Any substitution such as three variables to various powers may be used.
--     Here S=1, T=x, U=y, V=x*x, Y=y*y etc

with Ada.text_io;
use  Ada.text_io;
with Ada.Numerics;
use  Ada.Numerics;
with Ada.Numerics.Long_Elementary_Functions;
use  Ada.Numerics.Long_Elementary_Functions;
with real_arrays;
use  real_arrays;

procedure least_square_fit2d is
  n : integer;
  A : real_matrix(1..100,1..100);
  C : real_vector(1..100);
  Y : real_vector(1..100);
  type integer_vector is array(integer range <>) of integer;
  Apwr : integer_vector(1..100); -- 1..nnn, contains 0..n
  Bpwr : integer_vector(1..100);
  nnn : Integer;

  rms_err, avg_err, max_err : real;
  idata : integer := 0; -- data pair index used by data_set2d
  have_data : boolean := true; -- returned by data_set2d

  package fltio is new float_io(long_float);
  use fltio;
  package intio is new integer_io(integer);
  use intio;

  procedure data_set2d(z : out real;             -- a   z  value
                       y : out real;             -- a   y  value
                       x : out real;             -- an  x  value
                       data : in out boolean) is -- returns true for data
                                                 -- starts over if false
                       -- or read data z = f(x,y)
    k : integer := 25; -- data values
    -- idata  static
    hx : real := 0.5;
    hy : real := 0.2;
    xx : real;
    yy : real;
    zz : real;
  begin
    if idata>=k then
      idata := 0;
      data := false;
      return;     -- ready for check
    end if;
    xx := real(idata mod 5)*hx;
    yy := real(idata/5)*hy;
    yy := yy*(1.0+yy);
    zz := 1.0+2.0*xx+3.0*yy+4.0*xx*xx+5.0*xx*yy+6.0*yy*yy+7.0*xx*xx*yy;
    z := zz;
    y := yy;
    x := xx;
    idata := idata+1;
  end data_set2d;

  procedure gen_2d_powers(n : integer) is    -- highest sum of powers
                          -- nnn, number of terms generated
                          -- Apwr(1..nnn) powers of a (or x) generated
                          -- Bpwr(1..nnn) powers of b (or y) generated
    i, j : integer;
    nn : integer := 1; -- power being generated
  begin
    nnn := 1;
    Apwr(nnn) := 0;
    Bpwr(nnn) := 0;
    put_line("terms used to fit");
    put_line("x^0 y^0");
    i := nn;
    j := 0;
    while nn<=n loop
      nnn := nnn+1;
      Apwr(nnn) := i;
      Bpwr(nnn) := j;
      put_line("x^"&integer'image(i)&" y^"&integer'image(j));
      if i=0 and j=nn then
        nn := nn+1;
        i := nn;
        j := 0;
      else
        i := i-1;
        j := j+1;
      end if;
    end loop; -- while
  end gen_2d_powers;

  procedure simeq(n: Integer; -- tailored version for this code
                  A : real_matrix; -- (1..,1..) array
                  Y : real_vector; -- (1..) vector
                  X : out real_vector) is
    B : real_matrix(1..n,1..n+1);  -- working matrix
    row : array(1..n) of integer;  -- row interchange indicies
    hold, i_pivot : integer;       -- pivot indicies
    pivot : real;                  -- pivot element value
    abs_pivot : real;              -- abs of pivot element
    norm1 : real := 0.0;           -- 1 norm of matrix
    matrix_data_error : exception;
  begin
    for i in 1..n loop
      for j in 1..n loop
        B(i,j) := A(i,j);
        if abs B(i,j) > norm1 then
          norm1 := abs B(i,j);
        end if;
      end loop;
      B(i,n+1) := Y(i);
    end loop;
    for k in 1..n loop
      row(k) := k;
    end loop;
    for k in 1..n loop
      pivot := B(row(k),k);
      abs_pivot := abs pivot;
      i_pivot := k;
      for i in k..n loop
        if abs B(row(i),k) > abs_pivot then
          i_pivot := i;
          pivot := B(row(i),k);
          abs_pivot := abs pivot;
        end if;
      end loop;
      if abs_pivot < real'epsilon * norm1 then
        raise matrix_data_error;
      end if;
      hold := row(k);
      row(k) := row(i_pivot);
      row(i_pivot) := hold;
      for j in k+1..n+1 loop
        B(row(k),j) := B(row(k),j)/B(row(k),k);
      end loop;
      for i in 1..n loop
        if i /= k then
          for j in k+1..n+1 loop
            B(row(i),j) := B(row(i),j) - B(row(i),k)*B(row(k),j);
          end loop;
        end if;
      end loop;
    end loop;
    for i in 1..n loop
      X(i) := B(row(i),n+1);
    end loop;
  end simeq;

  procedure fit_2d(n : integer; A : in out real_matrix;
                                Y : in out real_vector;
                                C : in out real_vector) is
    xx, yy, zz : real;
    termi, termj : real;
    Av : real_vector(0..n);
    Bv : real_vector(0..n);
  begin
    gen_2d_powers(n); -- sets nnn and Apwr, Bpwr
    for i in 1..nnn loop
      for j in 1..nnn loop
        A(i,j) := 0.0;
      end loop;
      Y(i) := 0.0;
    end loop;
    have_data := true;
    while have_data loop
      data_set2d(zz, yy, xx, have_data);
      if not have_data then exit; end if;
      Av(0) := 1.0;
      Bv(0) := 1.0;
      for i in 1..n loop
        Av(i) := Av(i-1)*xx;
        Bv(i) := Bv(i-1)*yy;
      end loop;
      for i in 1..nnn loop
        termi := Av(Apwr(i))*Bv(Bpwr(i));
        for j in 1..nnn loop
          termj := Av(Apwr(j))*Bv(Bpwr(j));
          A(i,j) := A(i,j) + termi*termj;
        end loop;
        Y(i) := Y(i) + zz*termi;
      end loop;
    end loop; -- on while
    simeq(nnn, A, Y, C);
    for i in 1..nnn loop
      put("C(");
      put(i,3);
      put(")=");
      put(C(i),3,4);
      new_line;
    end loop;
  end fit_2d;

  procedure check_2d(n : integer;
                     C : real_vector;
                     rms_err : out real;
                     avg_err : out real;
                     max_err : out real) is
    Av : real_vector(0..n);
    Bv : real_vector(0..n);
    x, y, z, za, diff : real;
    sumsq : real := 0.0;
    sum : real := 0.0;
    maxe : real := 0.0;
    xmin, xmax, ymin, ymax, zmin, zmax, xbad, ybad, zbad : real;
    k, imax : integer;
  begin
    k := 0;
    have_data := true;
    while have_data loop
      data_set2d(z, y, x, have_data);
      if not have_data then exit; end if;
      if k=0 then
        xmin := x;
        xmax := x;
        ymin := y;
        ymax := y;
        zmin := z;
        zmax := z;
        imax := 1;
        xbad := x;
        ybad := y;
        zbad := z;
      end if;
      if x>xmax then xmax := x; end if;
      if x<xmin then xmin := x; end if;
      if y>ymax then ymax := y; end if;
      if y<ymin then ymin := y; end if;
      if z>zmax then zmax := z; end if;
      if z<zmin then zmin := z; end if;
      k := k + 1;
      za := 0.0;
      Av(0) := 1.0;
      Bv(0) := 1.0;
      for i in 1..n loop
        Av(i) := Av(i-1)*x;
        Bv(i) := Bv(i-1)*y;
      end loop;
      for i in 1..nnn loop
        za := za + C(I)*Av(Apwr(i))*Bv(Bpwr(i));
      end loop;
      diff := abs(z-za);
      if diff>maxe then
        maxe := diff;
        imax := k;
        xbad := x;
        ybad := y;
        zbad := z;
      end if;
      sum := sum + diff;
      sumsq := sumsq + diff*diff;
    end loop; -- on while
    put("check_2d k=");
    put(k,3);
    put(", xmin=");
    put(xmin,3,3);
    put(", xmax=");
    put(xmax,3,3);
    new_line;
    put("ymin=");
    put(ymin,3,3);
    put(", ymax=");
    put(ymax,3,3);
    put(", zmin=");
    put(zmin,3,3);
    put(", zmax=");
    put(zmax,3,3);
    new_line;
    max_err := maxe;
    avg_err := sum/real(k);
    rms_err := sqrt(sumsq/real(k));
    put("max=");
    put(maxe,2,4);
    put(" at ");
    put(imax,3);
    put(", xbad=");
    put(xbad,2,3);
    put(", ybad=");
    put(ybad,2,3);
    put(", zbad=");
    put(zbad,2,3);
    new_line;
  end check_2d;

begin
  put_line("least_square_fit2d.adb");

  -- sample 2d least square fit, first power
  n := 1;
  put("fit 1+2x+3y+4x^2+5xy+6y^2+7x^2y, to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_2d(n, A, Y, C);
  check_2d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 2; -- need constant term and five powers 1,2,3,4,5
  put("fit 1+2x+3y+4x^2+5xy+6y^2+7x^2y, to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_2d(n, A, Y, C);
  check_2d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 3;
  put("fit 1+2x+3y+4x^2+5xy+6y^2+7x^2y, to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_2d(n, A, Y, C);
  check_2d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

end Least_Square_Fit2d;