! least_square_fit.f90  tailorable code, provide your input and setup 

! The purpose of this package is to provide a reliable and convenient
! means for fitting existing data by a few coefficients. The companion
! package check_fit provides the means to use the coefficients for
! interpolation and limited extrapolation.
! 
! This package implements the least square fit. 
! 
! The problem is stated as follows :
!   Given measured data for values of Y based on values of X1,X2 and X3. e.g.
! 
!    Y_actual         X1      X2     X3  
!    --------       -----   -----  -----
!     32.5           1.0     2.5    3.7 
!      7.2           2.0     2.5    3.6 
!      6.9           3.0     2.7    3.5 
!     22.4           2.2     2.1    3.1 
!     10.4           1.5     2.0    2.6 
!     11.3           1.6     2.0    3.1 
! 
!  Find a, b and c such that   Y_approximate =  a * X1 + b * X2 + c * X3
!  and such that the sum of (Y_actual - Y_approximate) squared is minimized.
! 
! The method for determining the coefficients a, b and c follows directly
! form the problem definition and mathematical analysis. (See more below)
! 
! Y is called the dependent variable and X1 .. Xn the independent variables.
! The procedures below implements a few special cases and the general case.
!    The number of independent variables can vary.
!    The approximation equation may use powers of the independent variables
!    The user may create additional independent variables e.g. X2 = SIN(X1)
!    with the restriction that the independent variables are linearly
!    independent.  e.g.  Xi not equal  p Xj + q  for all i,j,p,q
! 
! 
! 
! The mathematical derivation of the least square fit is as follows :
! 
! Given data for the independent variable Y in terms of the dependent
! variables S,T,U and V  consider that there exists a function F
! such that     Y = F(S,T,U,V)
! The problem is to find coefficients a,b,c and d such that
!            Y_approximate = a * S + b * T + c * U + d * V
! and such that the sum of ( Y - Y_approximate ) squared is minimized.
! 
! Note: a, b, c, d are scalars. S, T, U, V, Y, Y_approximate are vectors.
! 
! To find the minimum of  SUM( Y - Y_approximate ) ** 2
! the derivatives must be taken with respect to a,b,c and d and
! all must equal zero simultaneously. The steps follow :
! 
!  SUM( Y - Y_approximate ) ** 2 = SUM( Y - a*S - b*T - c*U - d*V ) ** 2
! 
! d/da =  -2 * S * SUM( Y - A*S - B*T - C*U - D*V )
! d/db =  -2 * T * SUM( Y - A*S - B*T - C*U - D*V )
! d/dc =  -2 * U * SUM( Y - A*S - B*T - C*U - D*V )
! d/dd =  -2 * V * SUM( Y - A*S - B*T - C*U - D*V )
! 
! Setting each of the above equal to zero and putting constant term on left
!    the -2 is factored out,
!    the independent variable is moved inside the summation
! 
!  SUM( a*S*S + b*S*T + c*S*U + d*S*V = S*Y )
!  SUM( a*T*S + b*T*T + c*T*U + d*T*V = T*Y )
!  SUM( a*U*S + b*U*T + c*U*U + d*U*V = U*Y )
!  SUM( a*V*S + b*V*T + c*V*U + d*V*V = V*Y )
! 
! Distributing the SUM inside yields
! 
!  a * SUM(S*S) + b * SUM(S*T) + c * SUM(S*U) + d * SUM(S*V) = SUM(S*Y)
!  a * SUM(T*S) + b * SUM(T*T) + c * SUM(T*U) + d * SUM(T*V) = SUM(T*Y)
!  a * SUM(U*S) + b * SUM(U*T) + c * SUM(U*U) + d * SUM(U*V) = SUM(U*Y)
!  a * SUM(V*S) + b * SUM(V*T) + c * SUM(V*U) + d * SUM(V*V) = SUM(V*Y)
! 
! To find the coefficients a,b,c and d solve the linear system of equations
! 
!    | SUM(S*S)  SUM(S*T)  SUM(S*U)  SUM(S*V) |   | a |   | SUM(S*Y) |
!    | SUM(T*S)  SUM(T*T)  SUM(T*U)  SUM(T*V) | x | b | = | SUM(T*Y) |
!    | SUM(U*S)  SUM(U*T)  SUM(U*U)  SUM(U*V) |   | c |   | SUM(U*Y) |
!    | SUM(V*S)  SUM(V*T)  SUM(V*U)  SUM(V*V) |   | d |   | SUM(V*Y) |
! 
! Some observations :
!     S,T,U and V must be linearly independent.
!     There must be more data sets (Y, S, T, U, V) than variables.
!     The analysis did not depend on the number of independent variables
!     A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3
!     The general case for any order polynomial follows, fit_pn.
!     Any substitution such as three variables to various powers may be used.
!

program least_square_fit
  implicit none
  double precision, dimension(0:50,0:50) :: A
  double precision, dimension(0:50) :: C
  double precision, dimension(0:50) :: Y
  double precision :: rms_err, avg_err, max_err
  integer :: n  ! number of terms in fit equation
  interface
    subroutine fit_pn(n, A, Y, C)
      implicit none
      integer, intent(in) :: n
      double precision, dimension(0:50,0:50), intent(out) :: A
      double precision, dimension(0:50), intent(out) :: Y
      double precision, dimension(0:50), intent(out) :: C
    end subroutine fit_pn
  end interface
  interface
    subroutine check_pn(n, C, rms_err, avg_err, max_err)
      implicit none
      integer, intent(in) :: n
      double precision, intent(in), dimension(0:50) :: C
      double precision, intent(out) :: rms_err, avg_err, max_err
    end subroutine check_pn
  end interface
  
  print *, "least_square_fit.f90"
  
  ! sample polynomial least square fit, 3th power 
  n=3 ! need constant term and three powers 1,2,3 
  print *, "fit exp(x)*sin(x) 100 points 0.0 to Pi, ",n, &
                     " degree polynomial"
  call fit_pn(n, A, Y, C)
  call check_pn(n, C, rms_err, avg_err, max_err)
  print *, "rms_err=",rms_err,", avg_err=",avg_err, &
                         ", max_err=",max_err
  print *, " "
 
  n=4 ! need constant term and four powers 
  print *, "fit exp(x)*sin(x) 100 points 0.0 to Pi, ",n, &
                     " degree polynomial"
  call fit_pn(n, A, Y, C)
  call check_pn(n, C, rms_err, avg_err, max_err)
  print *, "rms_err=",rms_err,", avg_err=",avg_err, &
                         ", max_err=",max_err
  print *, " "
  n=5 ! need constant term and five powers 
  print *, "fit exp(x)*sin(x) 100 points 0.0 to Pi, ",n, &
                     " degree polynomial"
  call fit_pn(n, A, Y, C)
  call check_pn(n, C, rms_err, avg_err, max_err)
  print *, "rms_err=",rms_err,", avg_err=",avg_err, &
                         ", max_err=",max_err
  print *, " "
  n=6
  print *, "fit exp(x)*sin(x) 100 points 0.0 to Pi, ",n, &
                     " degree polynomial"
  call fit_pn(n, A, Y, C)
  call check_pn(n, C, rms_err, avg_err, max_err)
  print *, "rms_err=",rms_err,", avg_err=",avg_err, &
                         ", max_err=",max_err
  print *, " "
  n=7
  print *, "fit exp(x)*sin(x) 100 points 0.0 to Pi, ",n, &
                     " degree polynomial"
  call fit_pn(n, A, Y, C)
  call check_pn(n, C, rms_err, avg_err, max_err)
  print *, "rms_err=",rms_err,", avg_err=",avg_err, &
                         ", max_err=",max_err
  print *, " "
end program least_square_fit

function data_set(yx) result(status)! returns 1 for data, 0 for end  
  implicit none                     ! sets value of y for value of x
  double precision, dimension(0:2), intent(out) :: yx
  integer :: status 
  integer, save :: idata = 0 ! reset after each use of the data set
  integer :: k = 100 ! 100 
  double precision :: x1 = 0.0
  double precision :: dx1 = 0.0314 ! approx Pi 
  double precision :: xx
  double precision :: yy

  idata = idata + 1  
  if(idata.gt.k) then
    idata = 0
    status = 0
    return ! ready for reread
  end if
  xx = x1 + idata * dx1
  yy = exp(xx)*sin(xx)
  yx(1) = xx
  yx(0) = yy
  status = 1
end function data_set 

subroutine fit_pn(n, A, Y, C) ! n is number of coefficients, power-1
  implicit none
  integer, intent(in) :: n
  double precision, dimension(0:50,0:50), intent(out) :: A
  double precision, dimension(0:50), intent(out) :: Y
  double precision, dimension(0:50), intent(out) :: C
  integer :: i, j
  double precision :: x, yd
  double precision, dimension(0:2) :: yx
  double precision, dimension(0:30) :: pwr ! at least n 
  interface
    function data_set(yx) result(status)! returns 1 for data, 0 for end  
      implicit none                     ! sets value of y for value of x
      double precision, dimension(0:2), intent(out) :: yx
      integer :: status
    end function data_set
  end interface
  interface
    subroutine simeq(n, A, Y, X)
      implicit none
      integer, intent(in) :: n
      double precision, dimension(0:50,0:50), intent(in) :: A
      double precision, dimension(0:50), intent(in) :: Y
      double precision, dimension(0:50), intent(out) :: X
    end subroutine simeq
  end interface
  
  do i=0,n
    do j=0,n
      A(i,j) = 0.0
    end do
    Y(i) = 0.0
  end do
  do while(data_set(yx).eq.1)
    yd = yx(0)
    x = yx(1)
    pwr(0) = 1.0
    do i=1,n
      pwr(i) = pwr(i-1)*x
    end do
    do i=0,n
      do j=0,n
        A(i,j) = A(i,j) + pwr(i)*pwr(j)
      end do
      Y(i) = Y(i) + yd*pwr(i)
    end do
  end do
  print *, "solving simultaneous equations"
  call simeq(n, A, Y, C)
  do i=0,n
    print *, "C(",i,")=",C(i)
  end do
end subroutine fit_pn 

subroutine check_pn(n, C, rms_err, avg_err, max_err)
  implicit none
  integer, intent(in) :: n
  double precision, intent(in), dimension(0:50) :: C
  double precision, intent(out) :: rms_err, avg_err, max_err
  double precision :: x, y, ya, diff
  double precision, dimension(0:2) :: yx
  double precision :: sumsq, sum, maxe
  double precision :: xmin = 0.0
  double precision :: xmax = 0.0
  double precision :: ymin = 0.0
  double precision :: ymax = 0.0
  double precision :: xbad, ybad
  integer :: i, k, imax = 0
  
  interface
    function data_set(yx) result(status)! returns 1 for data, 0 for end  
      implicit none                     ! sets value of y for value of x
      double precision, dimension(0:2), intent(out) :: yx
      integer :: status
    end function data_set
  end interface
  
  maxe = 0.0
  sum = 0.0
  sumsq = 0.0
  k = 0
  do while(data_set(yx).eq.1)
    y = yx(0)
    x = yx(1)
    if(k.eq.0) then
      xmin=x
      xmax=x
      ymin=y
      ymax=y
      imax=0
      xbad=x
      ybad=y
    end if
    if(x>xmax) xmax=x
    if(x<xmin) xmin=x
    if(y>ymax) ymax=y
    if(y<ymin) ymin=y
    k = k + 1
    ya = 0.0
    do i=n,1,-1
      ya = (C(i)+ya)*x
    end do
    ya = ya + C(0)
    diff = abs(y-ya)
    ! print *, "k=",k,", x=",x,", y=",y,", ya=",ya,", diff=",diff,", maxe=",maxe
    if(diff>maxe) then
      maxe=diff
      imax=k
      xbad=x
      ybad=y
    end if
    sum = sum + diff
    sumsq = sumsq + diff*diff
  end do
  print *, "check_pn k=",k,", xmin=",xmin,", xmax=",xmax
  print *, "   ymin=",ymin,", ymax=",ymax
  max_err = maxe
  avg_err = sum/k
  rms_err = sqrt(sumsq/k)
  print *, "max=",max_err," at i=",imax
  print *, "xbad=",xbad,", ybad=",ybad
end subroutine check_pn 

subroutine simeq(n, A, Y, X)
  implicit none
  integer, intent(in) :: n
  double precision, dimension(0:50,0:50), intent(in) :: A
  double precision, dimension(0:50), intent(in) :: Y
  double precision, dimension(0:50), intent(out) :: X

!      PURPOSE : SOLVE THE LINEAR SYSTEM OF EQUATIONS WITH REAL     
!                COEFFICIENTS   (A) * |X| = |Y|                     
!                                                                   
!      INPUT  : THE NUMBER OF EQUATIONS  n                          
!               THE REAL MATRIX  A   A(i,j) 
!               THE REAL VECTOR  Y                                  
!      OUTPUT : THE REAL VECTOR  X                                  
!                                                                   
!      METHOD : GAUSS-JORDAN ELIMINATION USING MAXIMUM ELEMENT      
!               FOR PIVOT.                                          
!                                                                   
!      USAGE  :     call simeq(n,A,X,Y)                                 
!                                                                   
!                                                                   
!    WRITTEN BY : JON SQUIRE , 28 MAY 1983                          
!    ORIGINAL DEC 1959 for IBM 650, TRANSLATED TO OTHER LANGUAGES   
!    e.g. FORTRAN converted to Ada converted to C to f90                   
!    this version zero based

    double precision, dimension(0:n,0:n+1) :: B  ! (n,n+1)  WORKING MATRIX 
    integer, dimension(0:n) :: ROW               ! ROW INTERCHANGE INDICES 
    integer :: HOLD , I_PIVOT                    ! PIVOT INDICES 
    double precision :: PIVOT                    ! PIVOT ELEMENT VALUE 
    double precision :: ABS_PIVOT
    integer :: i, j, k, m

    m = n+1

    ! BUILD WORKING DATA STRUCTURE 
    do i=0,n
      do j=0,n
        B(i,j) = A(i,j)
      end do ! j
      B(i,m) = Y(i)
    end do ! i
    ! SET UP ROW  INTERCHANGE VECTORS 
    do k=0,n
      ROW(k) = k
    end do ! k

    ! BEGIN MAIN REDUCTION LOOP 
    do k=0,n

      ! FIND LARGEST ELEMENT FOR PIVOT 
      PIVOT = B(ROW(k),k)
      ABS_PIVOT = abs(PIVOT)
      I_PIVOT = k
      do i=k,n
        if( abs(B(ROW(i),k)) > ABS_PIVOT) then
          I_PIVOT = i
          PIVOT = B(ROW(i),k)
          ABS_PIVOT = abs ( PIVOT )
        end if
      end do ! i

      ! HAVE PIVOT, INTERCHANGE ROW POINTERS 
      HOLD = ROW(k)
      ROW(k) = ROW(I_PIVOT)
      ROW(I_PIVOT) = HOLD

      ! CHECK FOR NEAR SINGULAR 
      if( ABS_PIVOT < 1.0E-10 ) then
        do j=k+1,n+1
          B(ROW(k),j) = 0.0
        end do ! j
        print *, 'redundant row (singular) ', ROW(k)
      else

        ! REDUCE ABOUT PIVOT 
        do j=k+1,m
          B(ROW(k),j) = B(ROW(k),j) / B(ROW(k),k)
        end do ! j

        ! INNER REDUCTION LOOP 
        do i=0,n
          if( i .ne. k) then
            do j=k+1,m
              B(ROW(i),j) = B(ROW(i),j) - B(ROW(i),k) * B(ROW(k),j)
            end do ! j
          end if
        end do ! i
      end if 
      ! FINISHED INNER REDUCTION 
    end do ! k

    ! END OF MAIN REDUCTION LOOP 
    ! BUILD  X  FOR RETURN, UNSCRAMBLING ROWS 
    do i=0,n
      X(i) = B(ROW(i),n+1)
    end do ! i
end subroutine simeq