-- least_square_fit_4d.adb  tailorable code, provide your input and setup

-- The purpose of this package is to provide a reliable and convenient
-- means for fitting existing data by a few coefficients. The companion
-- package check_fit provides the means to use the coefficients for
-- interpolation and limited extrapolation.
--
-- This package implements the least square fit.
--
-- The problem is stated as follows :
--   Given measured data for values of Y based on values of X1,X2 and X3. e.g.
--
--    Y_actual         X1      X2     X3
--    --------       -----   -----  -----
--     32.5           1.0     2.5    3.7
--      7.2           2.0     2.5    3.6
--      6.9           3.0     2.7    3.5
--     22.4           2.2     2.1    3.1
--     10.4           1.5     2.0    2.6
--     11.3           1.6     2.0    3.1
--
--  Find a, b and c such that   Y_approximate =  a * X1 + b * X2 + c * X3
--  and such that the sum of (Y_actual - Y_approximate) squared is minimized.
--
-- The method for determining the coefficients a, b and c follows directly
-- form the problem definition and mathematical analysis. (See more below)
--
-- Y is called the dependent variable and X1 .. Xn the independent variables.
-- The procedures below implements a few special cases and the general case.
--    The number of independent variables can vary.
--    The approximation equation may use powers of the independent variables
--    The user may create additional independent variables e.g. X2 = SIN(X1)
--    with the restriction that the independent variables are linearly
--    independent.  e.g.  Xi not equal  p Xj + q  for all i,j,p,q
--
--
--
-- The mathematical derivation of the least square fit is as follows :
--
-- Given data for the independent variable Y in terms of the dependent
-- variables S,T,U and V  consider that there exists a function F
-- such that     Y = F(S,T,U,V)
-- The problem is to find coefficients a,b,c and d such that
--            Y_approximate = a * S + b * T + c * U + d * V
-- and such that the sum of ( Y - Y_approximate ) squared is minimized.
--
-- Note: a, b, c, d are scalars. S, T, U, V, Y, Y_approximate are vectors.
--
-- To find the minimum of  SUM( Y - Y_approximate ) ** 2
-- the derivatives must be taken with respect to a,b,c and d and
-- all must equal zero simultaneously. The steps follow :
--
--  SUM( Y - Y_approximate ) ** 2 = SUM( Y - a*S - b*T - c*U - d*V ) ** 2
--
-- d/da =  -2 * S * SUM( Y - a*S - b*T - c*U - d*V )
-- d/db =  -2 * T * SUM( Y - a*S - b*T - c*U - d*V )
-- d/dc =  -2 * U * SUM( Y - a*S - b*T - c*U - d*V )
-- d/dd =  -2 * V * SUM( Y - a*S - b*T - c*U - d*V )
--
-- Setting each of the above equal to zero and putting constant term on left
--    the -2 is factored out,
--    the independent variable is moved inside the summation
--
--  SUM( a*S*S + b*S*T + c*S*U + d*S*V = S*Y )
--  SUM( a*T*S + b*T*T + c*T*U + d*T*V = T*Y )
--  SUM( a*U*S + b*U*T + c*U*U + d*U*V = U*Y )
--  SUM( a*V*S + b*V*T + c*V*U + d*V*V = V*Y )
--
-- Distributing the SUM inside yields
--
--  a * SUM(S*S) + b * SUM(S*T) + c * SUM(S*U) + d * SUM(S*V) = SUM(S*Y)
--  a * SUM(T*S) + b * SUM(T*T) + c * SUM(T*U) + d * SUM(T*V) = SUM(T*Y)
--  a * SUM(U*S) + b * SUM(U*T) + c * SUM(U*U) + d * SUM(U*V) = SUM(U*Y)
--  a * SUM(V*S) + b * SUM(V*T) + c * SUM(V*U) + d * SUM(V*V) = SUM(V*Y)
--
-- To find the coefficients a,b,c and d solve the linear system of equations
--
--    | SUM(S*S)  SUM(S*T)  SUM(S*U)  SUM(S*V) |   | a |   | SUM(S*Y) |
--    | SUM(T*S)  SUM(T*T)  SUM(T*U)  SUM(T*V) | x | b | = | SUM(T*Y) |
--    | SUM(U*S)  SUM(U*T)  SUM(U*U)  SUM(U*V) |   | c |   | SUM(U*Y) |
--    | SUM(V*S)  SUM(V*T)  SUM(V*U)  SUM(V*V) |   | d |   | SUM(V*Y) |
--
-- Some observations :
--     S,T,U and V must be linearly independent.
--     There must be more data sets (Y, S, T, U, V) than variables.
--     The analysis did not depend on the number of independent variables
--     A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3
--     The general case for any order polynomial follows, fit_pn.
--     Any substitution such as three variables to various powers may be used.


with Ada.text_io;
use  Ada.text_io;
with Ada.Numerics;
use  Ada.Numerics;
with Ada.Numerics.Long_Elementary_Functions;
use  Ada.Numerics.Long_Elementary_Functions;
with real_arrays;
use  real_arrays;

procedure least_square_fit_4d is
  type integer_vector is array(integer range <>) of integer;
  type integer_matrix is array(integer range <>,
                               integer range <>) of integer;
  n : integer;
  A : real_matrix(1..200,1..200);
  C : real_vector(1..200);
  Y : real_vector(1..200);
  Apwr : integer_vector(1..200); -- 1..nnn, contains 0..n
  Bpwr : integer_vector(1..200);
  Cpwr : integer_vector(1..200);
  Dpwr : integer_vector(1..200);
  nnn : Integer;

  rms_err, avg_err, max_err : real;
  idata : integer := 0; -- data pair index used by data_set4d
  have_data : boolean := true; -- returned by data_set4d

  package fltio is new float_io(long_float);
  use fltio;
  package intio is new integer_io(integer);
  use intio;

  procedure data_set4d(u : out real;             -- a   u  value
                       t : out real;             -- a   t  value
                       z : out real;             -- a   z  value
                       y : out real;             -- a   y  value
                       x : out real;             -- an  x  value
                       data : in out boolean) is -- returns true for data
                                                 -- starts over if false
                       -- or read data z = f(x,y)
    k : integer := 200; -- data values
    -- idata  static
    hx : real := 0.03;
    hy : real := 0.1;
    hz : real := 0.1;
    ht : real := 0.08;
    xx : real;
    yy : real;
    zz : real;
    tt : real;
    uu : real;
  begin
    if idata>=k then
      idata := 0;
      data := false;
      return;     -- ready for check
    end if;
    xx := real(idata-45)*hx;
    yy := sin(xx);
    zz := cos(xx); -- must be linearly independent
    tt := exp(xx);
    uu := 1.0+2.0*xx+3.0*yy+4.0*xx*xx+5.0*xx*yy+6.0*yy*yy+7.0*xx*xx*yy+
          8.0*zz+9.0*zz*zz*yy*yy*yy+10.0*xx*zz+11.0*yy*zz+
          12.0*xx*yy*zz*tt;
    u := uu;
    t := tt;
    z := zz;
    y := yy;
    x := xx;
    idata := idata+1;
  end data_set4d;

  procedure gen_4d_powers(n : integer; -- highest sum of powers
                          mm : integer; -- 4 independent variables
                          nnn : in out  integer; -- returned values
                          A : out integer_vector; -- power of x
                          B : out integer_vector; -- power of y
                          C : out Integer_Vector; -- power of z
                          D : out integer_vector  -- power of t
                          ) is
    pwrsix : Integer_Vector(1..8) := (1,2,6,16,36,71,127,211);
                                     -- Start of Each Order
    pwrs : Integer_Matrix(1..210,1..4) :=(
           (0,0,0,0),(1,0,0,0),(0,1,0,0),(0,0,1,0),(0,0,0,1),
           (2,0,0,0),(1,1,0,0),(0,2,0,0),(1,0,1,0),(0,1,1,0),
           (0,0,2,0),(1,0,0,1),(0,1,0,1),(0,0,1,1),(0,0,0,2),
           (3,0,0,0),(2,1,0,0),(1,2,0,0),(0,3,0,0),(2,0,1,0),
           (1,1,1,0),(0,2,1,0),(1,0,2,0),(0,1,2,0),(0,0,3,0),
           (2,0,0,1),(1,1,0,1),(0,2,0,1),(1,0,1,1),(0,1,1,1),
           (0,0,2,1),(1,0,0,2),(0,1,0,2),(0,0,1,2),(0,0,0,3),
           (4,0,0,0),(3,1,0,0),(2,2,0,0),(1,3,0,0),(0,4,0,0),
           (3,0,1,0),(2,1,1,0),(1,2,1,0),(0,3,1,0),(2,0,2,0),
           (1,1,2,0),(0,2,2,0),(1,0,3,0),(0,1,3,0),(0,0,4,0),
           (3,0,0,1),(2,1,0,1),(1,2,0,1),(0,3,0,1),(2,0,1,1),
           (1,1,1,1),(0,2,1,1),(1,0,2,1),(0,1,2,1),(0,0,3,1),
           (2,0,0,2),(1,1,0,2),(0,2,0,2),(1,0,1,2),(0,1,1,2),
           (0,0,2,2),(1,0,0,3),(0,1,0,3),(0,0,1,3),(0,0,0,4),
           (5,0,0,0),(4,1,0,0),(3,2,0,0),(2,3,0,0),(1,4,0,0),
           (0,5,0,0),(4,0,1,0),(3,1,1,0),(2,2,1,0),(1,3,1,0),
           (0,4,1,0),(3,0,2,0),(2,1,2,0),(1,2,2,0),(0,3,2,0),
           (2,0,3,0),(1,1,3,0),(0,2,3,0),(1,0,4,0),(0,1,4,0),
           (0,0,5,0),(4,0,0,1),(3,1,0,1),(2,2,0,1),(1,3,0,1),
           (0,4,0,1),(3,0,1,1),(2,1,1,1),(1,2,1,1),(0,3,1,1),
           (2,0,2,1),(1,1,2,1),(0,2,2,1),(1,0,3,1),(0,1,3,1),
           (0,0,4,1),(3,0,0,2),(2,1,0,2),(1,2,0,2),(0,3,0,2),
           (2,0,1,2),(1,1,1,2),(0,2,1,2),(1,0,2,2),(0,1,2,2),
           (0,0,3,2),(2,0,0,3),(1,1,0,3),(0,2,0,3),(1,0,1,3),
           (0,1,1,3),(0,0,2,3),(1,0,0,4),(0,1,0,4),(0,0,1,4),
           (0,0,0,5),(6,0,0,0),(5,1,0,0),(4,2,0,0),(3,3,0,0),
           (2,4,0,0),(1,5,0,0),(0,6,0,0),(5,0,1,0),(4,1,1,0),
           (3,2,1,0),(2,3,1,0),(1,4,1,0),(0,5,1,0),(4,0,2,0),
           (3,1,2,0),(2,2,2,0),(1,3,2,0),(0,4,2,0),(3,0,3,0),
           (2,1,3,0),(1,2,3,0),(0,3,3,0),(2,0,4,0),(1,1,4,0),
           (0,2,4,0),(1,0,5,0),(0,1,5,0),(0,0,6,0),(5,0,0,1),
           (4,1,0,1),(3,2,0,1),(2,3,0,1),(1,4,0,1),(0,5,0,1),
           (4,0,1,1),(3,1,1,1),(2,2,1,1),(1,3,1,1),(0,4,1,1),
           (3,0,2,1),(2,1,2,1),(1,2,2,1),(0,3,2,1),(2,0,3,1),
           (1,1,3,1),(0,2,3,1),(1,0,4,1),(0,1,4,1),(0,0,5,1),
           (4,0,0,2),(3,1,0,2),(2,2,0,2),(1,3,0,2),(0,4,0,2),
           (3,0,1,2),(2,1,1,2),(1,2,1,2),(0,3,1,2),(2,0,2,2),
           (1,1,2,2),(0,2,2,2),(1,0,3,2),(0,1,3,2),(0,0,4,2),
           (3,0,0,3),(2,1,0,3),(1,2,0,3),(0,3,0,3),(2,0,1,3),
           (1,1,1,3),(0,2,1,3),(1,0,2,3),(0,1,2,3),(0,0,3,3),
           (2,0,0,4),(1,1,0,4),(0,2,0,4),(1,0,1,4),(0,1,1,4),
           (0,0,2,4),(1,0,0,5),(0,1,0,5),(0,0,1,5),(0,0,0,6) );
    ii1, ii2 : Integer;
  begin
    if mm/=4 then
      Put_Line("only for 4 independent variables, mm="&
                 Integer'Image(mm));
      nnn := 0;
      return;
    end if;
    if n>6 then
      Put_Line("only available to 6th power, n="&
                 Integer'Image(n));
      nnn := 0;
      return;
    end if;
    for i in 1..n+1 loop
      ii1 := pwrsix(i);
      ii2 := pwrsix(i+1)-1;
      for ii in pwrsix(i)..pwrsix(i+1)-1 loop
        A(ii) := pwrs(ii,1);
        B(ii) := pwrs(ii,2);
        C(ii) := pwrs(ii,3);
        D(ii) := pwrs(ii,4);
        Put_Line("a^"&Integer'Image(A(ii))&
                 " b^"&Integer'Image(B(ii))&
                 " c^"&Integer'Image(C(ii))&
                 " d^"&Integer'Image(D(ii)));
      end loop; -- ii
      New_Line;
    end loop; -- i
    nnn := pwrsix(n+2)-1;
  end gen_4d_powers;

  procedure simeq(n: Integer; -- tailored version for this code
                  A : real_matrix; -- (1..,1..) array
                  Y : real_vector; -- (1..) vector
                  X : out real_vector) is
    B : real_matrix(1..n,1..n+1);  -- working matrix
    row : array(1..n) of integer;  -- row interchange indicies
    hold, i_pivot : integer;       -- pivot indicies
    pivot : real;                  -- pivot element value
    abs_pivot : real;              -- abs of pivot element
    norm1 : real := 0.0;           -- 1 norm of matrix
    matrix_data_error : exception;
  begin
    for i in 1..n loop
      for j in 1..n loop
        B(i,j) := A(i,j);
        if abs B(i,j) > norm1 then
          norm1 := abs B(i,j);
        end if;
      end loop;
      B(i,n+1) := Y(i);
    end loop;
    for k in 1..n loop
      row(k) := k;
    end loop;
    for k in 1..n loop
      pivot := B(row(k),k);
      abs_pivot := abs pivot;
      i_pivot := k;
      for i in k..n loop
        if abs B(row(i),k) > abs_pivot then
          i_pivot := i;
          pivot := B(row(i),k);
          abs_pivot := abs pivot;
        end if;
      end loop;
      if abs_pivot < real'epsilon * norm1 *0.001 then
        --Put_Line("simeq n="&Integer'Image(n)&", k="&
        --          Integer'Image(k)&", abs_pivot="&
        --          Real'Image(abs_Pivot));
        -- raise matrix_data_error;
        null;
      end if;
      hold := row(k);
      row(k) := row(i_pivot);
      row(i_pivot) := hold;
      for j in k+1..n+1 loop
        B(row(k),j) := B(row(k),j)/B(row(k),k);
      end loop;
      for i in 1..n loop
        if i /= k then
          for j in k+1..n+1 loop
            B(row(i),j) := B(row(i),j) - B(row(i),k)*B(row(k),j);
          end loop;
        end if;
      end loop;
    end loop;
    for i in 1..n loop
      X(i) := B(row(i),n+1);
    end loop;
  end simeq;

  procedure fit_4d(n : integer; A : in out real_matrix;
                                Y : in out real_vector;
                                C : in out real_vector) is
    xx, yy, zz, tt, uu : real;
    termi, termj : real;
    Av : real_vector(0..n);
    Bv : real_vector(0..n);
    Cv : real_vector(0..n);
    Dv : real_vector(0..n);
  begin
    gen_4d_powers(n,4,nnn,Apwr,Bpwr,Cpwr,Dpwr);
                   -- sets nnn and Apwr, Bpwr, Cpwr, Dpwr
    for i in 1..nnn loop
      for j in 1..nnn loop
        A(i,j) := 0.0;
      end loop;
      Y(i) := 0.0;
    end loop;
    have_data := true;
    idata := 0;
    while have_data loop
      data_set4d(uu, tt, zz, yy, xx, have_data);
      if not have_data then exit; end if;
      Av(0) := 1.0;
      Bv(0) := 1.0;
      Cv(0) := 1.0;
      Dv(0) := 1.0;
      for i in 1..n loop
        Av(i) := Av(i-1)*xx;
        Bv(i) := Bv(i-1)*yy;
        Cv(i) := Cv(i-1)*zz;
        Dv(i) := Dv(i-1)*tt;
      end loop;
      for i in 1..nnn loop
        termi := Av(Apwr(i))*Bv(Bpwr(i))*Cv(Cpwr(i))*Dv(Dpwr(i));
        for j in 1..nnn loop
          termj := Av(Apwr(j))*Bv(Bpwr(j))*Cv(Cpwr(j))*Dv(Dpwr(j));
          A(i,j) := A(i,j) + termi*termj;
        end loop;
        Y(i) := Y(i) + uu*termi;
      end loop;
    end loop; -- on while

    simeq(nnn, A, Y, C);
    for i in 1..nnn loop
      put("C(");
      put(i,3);
      put(")=");
      put(C(i),3,4);
      new_line;
    end loop;
  exception
    when others =>
      Put_Line("could not fit data");
  end fit_4d;

  procedure check_4d(n : integer;
                     C : real_vector;
                     rms_err : out real;
                     avg_err : out real;
                     max_err : out real) is
    Av : real_vector(0..n);
    Bv : real_vector(0..n);
    Cv : real_vector(0..n);
    Dv : real_vector(0..n);
    x, y, z, t, u, ua, diff : real;
    sumsq : real := 0.0;
    sum : real := 0.0;
    maxe : real := 0.0;
    xmin, xmax, ymin, ymax, zmin, zmax, tmin, tmax, umin, umax : real;
    xbad, ybad, zbad, tbad, ubad : real;
    k, imax : integer;
  begin
    k := 0;
    have_data := true;
    while have_data loop
      data_set4d(u, t, z, y, x, have_data);
      if not have_data then exit; end if;
      if k=0 then
        xmin := x;
        xmax := x;
        ymin := y;
        ymax := y;
        zmin := z;
        zmax := z;
        tmin := t;
        tmax := t;
        umin := u;
        umax := u;
        imax := 1;
        xbad := x;
        ybad := y;
        zbad := z;
        tbad := t;
        ubad := u;
      end if;
      if x>xmax then xmax := x; end if;
      if x<xmin then xmin := x; end if;
      if y>ymax then ymax := y; end if;
      if y<ymin then ymin := y; end if;
      if z>zmax then zmax := z; end if;
      if z<zmin then zmin := z; end if;
      if t>zmax then tmax := t; end if;
      if t<zmin then tmin := t; end if;
      if u>umax then umax := u; end if;
      if u<umin then umin := u; end if;
      k := k + 1;
      ua := 0.0;
      Av(0) := 1.0;
      Bv(0) := 1.0;
      Cv(0) := 1.0;
      Dv(0) := 1.0;
      for i in 1..n loop
        Av(i) := Av(i-1)*x;
        Bv(i) := Bv(i-1)*y;
        Cv(i) := Cv(i-1)*z;
        Dv(i) := Dv(i-1)*t;
      end loop;
      for i in 1..nnn loop
        ua := ua + C(I)*Av(Apwr(i))*Bv(Bpwr(i))*Cv(Cpwr(i))*Dv(Dpwr(i));
      end loop;
      diff := abs(u-ua);
      if diff>maxe then
        maxe := diff;
        imax := k;
        xbad := x;
        ybad := y;
        zbad := z;
        tbad := t;
        ubad := u;
      end if;
      sum := sum + diff;
      sumsq := sumsq + diff*diff;
    end loop; -- on while
    put("check_4d k=");
    put(k,3);
    put(", xmin=");
    put(xmin,3,3);
    put(", xmax=");
    put(xmax,3,3);
    new_line;
    put("       ymin=");
    put(ymin,3,3);
    put(", ymax=");
    put(ymax,3,3);
    new_line;
    put("       zmin=");
    put(zmin,3,3);
    put(", zmax=");
    put(zmax,3,3);
    new_line;
    put("       tmin=");
    put(tmin,3,3);
    put(", tmax=");
    put(tmax,3,3);
    new_line;
    put("       umin=");
    put(umin,3,3);
    put(", umax=");
    put(umax,3,3);
    new_line;
    max_err := maxe;
    avg_err := sum/real(k);
    rms_err := sqrt(sumsq/real(k));
    put("max=");
    put(maxe,2,4);
    put(" at ");
    put(imax,3);
    put(", xbad=");
    put(xbad,2,3);
    put(", ybad=");
    put(ybad,2,3);
    put(", zbad=");
    put(zbad,2,3);
    put(", tbad=");
    put(tbad,2,3);
    put(", ubad=");
    put(ubad,2,3);
    new_line;
  end check_4d;

begin
  put_line("least_square_fit_4d.adb");

  -- sample 4d least square fit, first power
  n := 1;
  put("fit poly to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_4d(n, A, Y, C);
  check_4d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 2; -- highest power
  put("fit poly to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_4d(n, A, Y, C);
  check_4d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 3; -- highest power
  put("fit poly to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_4d(n, A, Y, C);
  check_4d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 4; -- highest power
  put("fit poly to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_4d(n, A, Y, C);
  check_4d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

  n := 5; -- highest power
  put("fit poly to ");
  put(n,2);
  put_line(" degree polynomial");
  fit_4d(n, A, Y, C);
  check_4d(n, C, rms_err, avg_err, max_err);
  put("rms_err=");
  put(rms_Err,3,4);
  put(", avg_err=");
  put(avg_Err,3,4);
  put(", max_err=");
  put(max_Err,3,4);
  new_line;
  new_line;

end Least_Square_Fit_4d;