/* least_square_fit_4d.c four variables up to fourth order */ /* tailorable code, provide your input and setup */ #include #include #include #undef abs #define abs(x) (((x)<0.0)?(-(x)):(x)) #undef max #define max(x,y) (((x)>(y))?((x)):(y)) #undef min #define min(x,y) (((x)<(y))?((x)):(y)) static double A[40000]; static double C[200]; static double Y[200]; static int Apwr[200]; /* powers of each variable in a term */ static int Bpwr[200]; static int Cpwr[200]; static int Dpwr[200]; static int debug = 0; static void simeq(int n, double A[], double Y[], double X[]); /* * The purpose of this package is to provide a reliable and convenient * means for fitting existing data by a few coefficients. The companion * package check_fit provides the means to use the coefficients for * interpolation and limited extrapolation. * * This package implements the least square fit. * * The problem is stated as follows : * Given measured data for values of Y based on values of X1,X2 and X3. e.g. * * Y_actual X1 X2 X3 X4 * -------- ----- ----- ----- ----- * 32.5 1.0 2.5 3.7 4.2 * 7.2 2.0 2.5 3.6 0.9 * 6.9 3.0 2.7 3.5 5.1 * 22.4 2.2 2.1 3.1 1.2 * 10.4 1.5 2.0 2.6 3.1 * 11.3 1.6 2.0 3.1 2.4 * * Find a, b, c ... such that Y_approximate = a + b * X1 + c * X2 + d * X3 * d * X4 + e * X1*X2 + f * X1*X3 + ... + k * X1*X2*X3*X4 + ... + * z * X4*X4*X4*X4 66 coefficients * such that the sum of (Y_actual - Y_approximate) squared is minimized. * This minimizes the root mean squire error. * * The method for determining the coefficients a, b and c follows directly * form the problem definition and mathematical analysis. (See more below) * * Y is called the dependent variable and X1 .. Xn the independent variables. * The procedures below implements a few special cases and the general case. * The number of independent variables can vary. * The approximation equation may use powers of the independent variables * The user may create additional independent variables e.g. X2 = SIN(X1) * with the restriction that the independent variables are linearly * independent. e.g. Xi not equal p Xj + q for all i,j,p,q * * * * The mathematical derivation of the least square fit is as follows : * * Given data for the independent variable Y in terms of the dependent * variables S,T,U and V consider that there exists a function F * such that Y = F(S,T,U,V) * The problem is to find coefficients a,b,c and d such that * Y_approximate = a * S + b * T + c * U + d * V * and such that the sum of ( Y - Y_approximate ) squared is minimized. * * Note: a, b, c, d are scalars. S, T, U, V, Y, Y_approximate are vectors. * * To find the minimum of SUM( Y - Y_approximate ) ** 2 * the derivatives must be taken with respect to a,b,c and d and * all must equal zero simultaneously. The steps follow : * * SUM( Y - Y_approximate ) ** 2 = SUM( Y - a*S - b*T - c*U - d*V ) ** 2 * * d/da = -2 * S * SUM( Y - A*S - B*T - C*U - D*V ) * d/db = -2 * T * SUM( Y - A*S - B*T - C*U - D*V ) * d/dc = -2 * U * SUM( Y - A*S - B*T - C*U - D*V ) * d/dd = -2 * V * SUM( Y - A*S - B*T - C*U - D*V ) * * Setting each of the above equal to zero and putting constant term on left * the -2 is factored out, * the independent variable is moved inside the summation * * SUM( a*S*S + b*S*T + c*S*U + d*S*V = S*Y ) * SUM( a*T*S + b*T*T + c*T*U + d*T*V = T*Y ) * SUM( a*U*S + b*U*T + c*U*U + d*U*V = U*Y ) * SUM( a*V*S + b*V*T + c*V*U + d*V*V = V*Y ) * * Distributing the SUM inside yields * * a * SUM(S*S) + b * SUM(S*T) + c * SUM(S*U) + d * SUM(S*V) = SUM(S*Y) * a * SUM(T*S) + b * SUM(T*T) + c * SUM(T*U) + d * SUM(T*V) = SUM(T*Y) * a * SUM(U*S) + b * SUM(U*T) + c * SUM(U*U) + d * SUM(U*V) = SUM(U*Y) * a * SUM(V*S) + b * SUM(V*T) + c * SUM(V*U) + d * SUM(V*V) = SUM(V*Y) * * To find the coefficients a,b,c and d solve the linear system of equations * * | SUM(S*S) SUM(S*T) SUM(S*U) SUM(S*V) | | a | | SUM(S*Y) | * | SUM(T*S) SUM(T*T) SUM(T*U) SUM(T*V) | x | b | = | SUM(T*Y) | * | SUM(U*S) SUM(U*T) SUM(U*U) SUM(U*V) | | c | | SUM(U*Y) | * | SUM(V*S) SUM(V*T) SUM(V*U) SUM(V*V) | | d | | SUM(V*Y) | * * Some observations : * S,T,U and V must be linearly independent. * There must be more data sets (Y, S, T, U, V) than variables. * The analysis did not depend on the number of independent variables * A polynomial fit results from the substitutions S=1, T=X, U=X**2, V=X**3 * The general case for any order polynomial follows, fit_pn. * Any substitution such as three variables to various powers may be used. */ static int data_set4d(double *u1, double *a1, double *b1, double *c1, double *d1) { static int i=0; static int j=0; static int k=0; static int m=0; /* would have used l, ell, yet too close to i and 1 */ int n = 4; double u, a, b, c, d; i++; if(i>n) {i=0; j++;} if(j>n) {j=0; k++;} if(k>n) {k=0; m++;} if(m>n) {i=0; j=0; k=0; m=0; return 0;} a = (double)i; b = (double)j; c = (double)k; d = (double)m; u = 1.0 + 2.0*a + 3.0*b + 4.0*c + 5.0*d + 6.0*a*a + 7.0*a*b + 8.0*a*c + 9.0*a*d + 10.0*b*b + 11.0*b*c + 12.0*b*d + 13.0*c*c + 14.0*c*d + 15.0*d*d + 16.0*a*a*a + 17.0*a*a*b + 18.0*a*a*c + 19.0*a*a*d + 20.0*a*b*b + 21.0*a*b*c + 22.0*a*b*d + 23.0*a*c*c + 24.0*a*c*d + 25.0*a*d*d + 26.0*b*b*b + 27.0*b*b*c + 28.0*b*b*d + 29.0*b*c*c + 30.0*b*c*d + 31.0*c*c*c + 32.0*c*c*d + 33.0*c*d*d + 34.0*d*d*d + 35.0*a*a*a*a + 36.0*a*a*a*b + 37.0*a*a*a*c + 38.0*a*a*a*d + 39.0*a*a*b*b + 40.0*a*a*b*c + 41.0*a*a*b*d + 42.0*a*a*c*c + 43.0*a*a*c*d + 44.0*a*a*d*d + 45.0*a*b*b*b + 46.0*a*b*b*c + 47.0*a*b*b*d + 48.0*a*c*c*c + 49.0*a*c*c*d + 50.0*a*c*d*d + 51.0*a*d*d*d + 52.0*b*b*b*b + 53.0*b*b*b*c + 54.0*b*b*b*d + 55.0*b*b*c*c + 56.0*b*b*c*d + 57.0*b*b*d*d + 58.0*b*c*c*c + 59.0*b*c*c*d + 60.0*b*c*d*d + 61.0*b*d*d*d + 62.0*c*c*c*c + 63.0*c*c*c*d + 64.0*c*c*d*d + 65.0*c*d*d*d + 66.0*d*d*d*d; if(debug>1) printf("data_set u=%g, a=%g, b=%g, c=%g, d=%g\n", u, a, b, c, d); *u1=u; *a1=a; *b1=b; *c1=c; *d1=d; return 1; } /* n=highest sum of powers, maximum 4 here. * mm=number of independent variables, must be 4 here. * nnn= number of coefficients, returned */ static int gen_4d_powers(int n, int mm, int *nnn, int a[], int b[], int c[], int d[]) { int i,j,k,m; /* need more, or generalize, for more variables */ int ii = 0; /* pointer to next available a[ii], b[ii], c[ii], d[ii] */ int nn=1; /* power being generated */ int pwrsix[6]={0,1,5,15,34,66}; /* start of each order */ int pwrs[67][4]={{0,0,0,0},{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}, {2,0,0,0},{1,1,0,0},{1,0,1,0},{1,0,0,1},{0,2,0,0}, {0,1,1,0},{0,1,0,1},{0,0,2,0},{0,0,1,1},{0,0,0,2}, {3,0,0,0},{2,1,0,0},{2,0,1,0},{2,0,0,1},{1,2,0,0}, {1,1,1,0},{1,1,0,1},{1,0,2,0},{1,0,1,1},{1,0,0,2}, {0,3,0,0},{0,2,1,0},{0,2,0,1},{0,1,2,0},{0,1,1,1}, {0,0,3,0},{0,0,2,1},{0,0,1,2},{0,0,0,3},{4,0,0,0}, {3,1,0,0},{3,0,1,0},{3,0,0,1},{2,2,0,0},{2,1,1,0}, {2,1,0,1},{2,0,2,0},{2,0,1,1},{2,0,0,2},{1,3,0,0}, {1,2,1,0},{1,2,0,1},{1,0,3,0},{1,0,2,1},{1,0,1,2}, {1,0,0,3},{0,4,0,0},{0,3,1,0},{0,3,0,1},{0,2,2,0}, {0,2,1,1},{0,2,0,2},{0,1,3,0},{0,1,2,1},{0,1,1,2}, {0,1,0,3},{0,0,4,0},{0,0,3,1},{0,0,2,2},{0,0,1,3}, {0,0,0,4}}; if(mm != 4) printf("ERROR, this only good for mm=4 independent variables\n"); printf("terms used to find fit \n"); for(i=0; i<=n; i++) /* power requested */ { for(ii=pwrsix[i]; ii4) printf("Av[%d]=%g, Bv[%d]=%g, Cv[%d]=%g, Dv[%d]=%g \n", i, Av[i], i, Bv[i], i, Cv[i], i, Dv[i]); } for(i=0; i2) { for(i=0; iamax) amax=a1; if(a1bmax) bmax=b1; if(b1cmax) cmax=c1; if(c1dmax) dmax=d1; if(d1umax) umax=u1; if(u1maxe) maxe=diff; sum = sum + diff; sumsq = sumsq + diff*diff; k++; } printf("check_4d k=%d, amin=%g, amax=%g, bmin=%g, bmax=%g \n", k, amin, amax, bmin, bmax); printf(" cmin=%g, cmax=%g, dmin=%g, dmax=%g, umin=%g, umax=%g \n", cmin, cmax, dmin, dmax, umin, umax); *max_err = maxe; *avg_err = sum/(double)k; *rms_err = sqrt(sumsq/(double)k); } /* end check_4d */ static void simeq(int n, double A[], double Y[], double X[]) { /* PURPOSE : SOLVE THE LINEAR SYSTEM OF EQUATIONS WITH REAL */ /* COEFFICIENTS [A] * |X| = |Y| */ /* */ /* INPUT : THE NUMBER OF EQUATIONS n */ /* THE REAL MATRIX A should be A[i][j] but A[i*n+j] */ /* THE REAL VECTOR Y */ /* OUTPUT : THE REAL VECTOR X */ /* */ /* METHOD : GAUSS-JORDAN ELIMINATION USING MAXIMUM ELEMENT */ /* FOR PIVOT. */ /* */ /* USAGE : simeq(n,A,Y,X); */ /* */ /* */ /* WRITTEN BY : JON SQUIRE , 28 MAY 1983 */ /* ORIGONAL DEC 1959 for IBM 650, TRANSLATED TO OTHER LANGUAGES */ /* e.g. FORTRAN converted to Ada converted to C */ double *B; /* [n][n+1] WORKING MATRIX */ int *ROW; /* ROW INTERCHANGE INDICIES */ int HOLD , I_PIVOT; /* PIVOT INDICIES */ double PIVOT; /* PIVOT ELEMENT VALUE */ double ABS_PIVOT; int i,j,k,m; B = (double *)calloc((n+1)*(n+1), sizeof(double)); ROW = (int *)calloc(n, sizeof(int)); m = n+1; /* BUILD WORKING DATA STRUCTURE */ for(i=0; i ABS_PIVOT){ I_PIVOT = i; PIVOT = B[ROW[i]*m+k]; ABS_PIVOT = abs ( PIVOT ); } } /* HAVE PIVOT, INTERCHANGE ROW POINTERS */ HOLD = ROW[k]; ROW[k] = ROW[I_PIVOT]; ROW[I_PIVOT] = HOLD; /* CHECK FOR NEAR SINGULAR */ if( ABS_PIVOT < 1.0E-8 ){ for(j=k+1; j