// least_square_fit_4d.java tailorable code, provide your input and setup // The purpose of this package is to provide a reliable and convenient // means for fitting existing data by a few coefficients. The companion // package check_fit provides the means to use the coefficients for // interpolation and limited extrapolation. // // This package implements the least square fit. // // The problem is stated as follows : // Given measured data for values of Y based on values of X1,X2 and X3. e.g. // // Y_actual X1 X2 X3 X4 // -------- ----- ----- ----- ----- // 32.5 1.0 2.5 3.7 4.2 // 7.2 2.0 2.5 3.6 2.5 // 6.9 3.0 2.7 3.5 1.9 // 22.4 2.2 2.1 3.1 4.7 // 10.4 1.5 2.0 2.6 0.9 // 11.3 1.6 2.0 3.1 5.1 // // Find a, b, ... such that Y_approximate = a + b * X1 + c * X2 + d * X3 + // e * X4 + f * X1*X2 + g *X1*X3 ... k * X1*X2*X3*X4 ... z * X4*X4*X4*X4 // Find the 69 coefficients, including the constant // such that the sum of (Y_actual - Y_approximate) squared is minimized. // This is minimizing the root mean squire error. // // The method for determining the coefficients a, b and c follows directly // form the problem definition and mathematical analysis. (See more below) // // Y is called the dependent variable and X1 .. Xn the independent variables. // The procedures below implements a few special cases and the general case. // The number of independent variables can vary. // The approximation equation may use powers of the independent variables // The user may create additional independent variables e.g. X2 = SIN(X1) // with the restriction that the independent variables are linearly // independent. e.g. Xi not equal p Xj + q for all i,j,p,q // // // // The mathematical derivation of the least square fit is as follows : // // Given data for the independent variable Y in terms of the dependent // variables S,T,U and V consider that there exists a function F // such that Y = F(S,T,U,V) // The problem is to find coefficients a,b,c and d such that // Y_approximate = a * S + b * T + c * U + d * V // and such that the sum of ( Y - Y_approximate ) squared is minimized. // // Note: a, b, c, d are scalars. S, T, U, V, Y, Y_approximate are vectors. // // To find the minimum of SUM( Y - Y_approximate ) ** 2 // the derivatives must be taken with respect to a,b,c and d and // all must equal zero simultaneously. The steps follow : // // SUM( Y - Y_approximate ) ** 2 = SUM( Y - a*S - b*T - c*U - d*V ) ** 2 // // d/da = -2 * S * SUM( Y - A*S - B*T - C*U - D*V ) // d/db = -2 * T * SUM( Y - A*S - B*T - C*U - D*V ) // d/dc = -2 * U * SUM( Y - A*S - B*T - C*U - D*V ) // d/dd = -2 * V * SUM( Y - A*S - B*T - C*U - D*V ) // // Setting each of the above equal to zero and putting constant term on left // the -2 is factored out, // the independent variable is moved inside the summation // // SUM( a*S*S + b*S*T + c*S*U + d*S*V = S*Y ) // SUM( a*T*S + b*T*T + c*T*U + d*T*V = T*Y ) // SUM( a*U*S + b*U*T + c*U*U + d*U*V = U*Y ) // SUM( a*V*S + b*V*T + c*V*U + d*V*V = V*Y ) // // Distributing the SUM inside yields // // a * SUM(S*S) + b * SUM(S*T) + c * SUM(S*U) + d * SUM(S*V) = SUM(S*Y) // a * SUM(T*S) + b * SUM(T*T) + c * SUM(T*U) + d * SUM(T*V) = SUM(T*Y) // a * SUM(U*S) + b * SUM(U*T) + c * SUM(U*U) + d * SUM(U*V) = SUM(U*Y) // a * SUM(V*S) + b * SUM(V*T) + c * SUM(V*U) + d * SUM(V*V) = SUM(V*Y) // // To find the coefficients a,b,c and d solve the linear system of equations // // | SUM(S*S) SUM(S*T) SUM(S*U) SUM(S*V) | | a | | SUM(S*Y) | // | SUM(T*S) SUM(T*T) SUM(T*U) SUM(T*V) | x | b | = | SUM(T*Y) | // | SUM(U*S) SUM(U*T) SUM(U*U) SUM(U*V) | | c | | SUM(U*Y) | // | SUM(V*S) SUM(V*T) SUM(V*U) SUM(V*V) | | d | | SUM(V*Y) | // // Some observations : // S,T,U and V must be linearly independent. // There must be more data sets (Y, S, T, U, V) than variables. // The analysis did not depend on the number of independent variables // A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3 // The general case for any order polynomial follows, fit_pn. // Any substitution such as three variables to various powers may be used. public class least_square_fit_4d { double rms_err, avg_err, max_err; int idata = 0; // reset after each use of the data set int Apwr[] = new int[200]; // powers of each variable in a term int Bpwr[] = new int[200]; int Cpwr[] = new int[200]; int Dpwr[] = new int[200]; int debug = 0; least_square_fit_4d() { int n; int mm=4; // 4 dimensions int nnn[] = new int[1]; // number of terms System.out.println("least_square_fit_4d.java"); // sample polynomial least square fit, 3th power { n=3; // need constant term and powers 1,2,3 System.out.println("fit u "+(n)+" degree polynomial in 4 dimensions"); gen_4d_powers(n, mm, nnn, Apwr, Bpwr, Cpwr, Dpwr); double A[][] = new double[nnn[0]][nnn[0]]; double C[] = new double[nnn[0]]; double Y[] = new double[nnn[0]]; fit_4d(n, mm, nnn, A, Y, C); check_4d(n, mm, nnn, C); System.out.println("rms_err="+rms_err+", avg_err="+avg_err+ ", max_err="+max_err); System.out.println(" "); } { n=4; // need constant term and powers 1,2,3,4 System.out.println("fit u "+(n)+" degree polynomial in 4 dimensions"); gen_4d_powers(n, mm, nnn, Apwr, Bpwr, Cpwr, Dpwr); double A[][] = new double[nnn[0]][nnn[0]]; double C[] = new double[nnn[0]]; double Y[] = new double[nnn[0]]; fit_4d(n, mm, nnn, A, Y, C); check_4d(n, mm, nnn, C); System.out.println("rms_err="+rms_err+", avg_err="+avg_err+ ", max_err="+max_err); System.out.println(" "); } } // end least_square_fit_4d int data_set4d(double abcdu[], int ijkm[]) { int i=ijkm[0]; int j=ijkm[1]; int k=ijkm[2]; int m=ijkm[3]; // would have used l, ell, yet too close to i and 1 int n = 4; double u, a, b, c, d; i++; if(i>n) {i=0; j++;} if(j>n) {j=0; k++;} if(k>n) {k=0; m++;} if(m>n) {i=0; j=0; k=0; m=0; return 0;} a = (double)i; b = (double)j; c = (double)k; d = (double)m; u = 1.0 + 2.0*a + 3.0*b + 4.0*c + 5.0*d + 6.0*a*a + 7.0*a*b + 8.0*a*c + 9.0*a*d + 10.0*b*b + 11.0*b*c + 12.0*b*d + 13.0*c*c + 14.0*c*d + 15.0*d*d + 16.0*a*a*a + 17.0*a*a*b + 18.0*a*a*c + 19.0*a*a*d + 20.0*a*b*b + 21.0*a*b*c + 22.0*a*b*d + 23.0*a*c*c + 24.0*a*c*d + 25.0*a*d*d + 26.0*b*b*b + 27.0*b*b*c + 28.0*b*b*d + 29.0*b*c*c + 30.0*b*c*d + 31.0*c*c*c + 32.0*c*c*d + 33.0*c*d*d + 34.0*d*d*d + 35.0*a*a*a*a + 36.0*a*a*a*b + 37.0*a*a*a*c + 38.0*a*a*a*d + 39.0*a*a*b*b + 40.0*a*a*b*c + 41.0*a*a*b*d + 42.0*a*a*c*c + 43.0*a*a*c*d + 44.0*a*a*d*d + 45.0*a*b*b*b + 46.0*a*b*b*c + 47.0*a*b*b*d + 48.0*a*b*c*c + 49.0*a*b*c*d + 50.0*a*a*d*d + 51.0*a*c*c*c + 52.0*a*c*c*d + 53.0*a*c*d*d + 54.0*a*d*d*d + 55.0*b*b*b*b + 56.0*b*b*b*c + 57.0*b*b*b*d + 58.0*b*b*c*c + 59.0*b*b*c*d + 60.0*b*b*d*d + 61.0*b*c*c*c + 62.0*b*c*c*d + 63.0*b*c*d*d + 64.0*b*d*d*d + 65.0*c*c*c*c + 66.0*c*c*c*d + 67.0*c*c*d*d + 68.0*c*d*d*d + 69.0*d*d*d*d; abcdu[0]=a; abcdu[1]=b; abcdu[2]=c; abcdu[3]=d; abcdu[4]=u; ijkm[0]=i; ijkm[1]=j; ijkm[2]=k; ijkm[3]=m; return 1; } // end data_set4d void fit_4d(int n, int mm, int nnn[], double A[][], double Y[], double C[]) { int i, j, k, nn; double Av[] = new double[10]; // at least n double Bv[] = new double[10]; // powers of variables double Cv[] = new double[10]; double Dv[] = new double[10]; double abcdu[] = new double[5]; int ijkm[] = {0,0,0,0}; double term_i, term_j; nn = nnn[0]; for(i=0; i abs_pivot) { I_pivot = i; pivot = B[row[i]][k]; abs_pivot = Math.abs(pivot); } } // have pivot, interchange row indicies hold = row[k]; row[k] = row[I_pivot]; row[I_pivot] = hold; // check for near singular if(abs_pivot < 1.0E-10) { for(int j=k+1; j