/* time_mp4.c time parallelism of four processor multiprocessor */ /* gcc -o time_mp4 -O3 time_mp4.c -lm -lpthread */ /* time_mp4 > time_mp4.out */ #define NTHREADS 4 #include #include #include #include #include #include /* FTYPE may be double or float */ #define FTYPE double void fft(FTYPE *b, FTYPE *a2, FTYPE *a1, int n, int sgn); #define PI 3.141592653589793238462643 /* this is about 20 seconds computation per thread */ void *compute(void *arg) { int myid=*(int *)arg; printf("compute %d: running \n",myid); fflush(stdout); /* a good idea inside a thread */ /* time_mp4 canned data, timing test */ int i, j, k, m, n; FTYPE *a1, *a2, *b; int sgn = 1; /* default sign is 1 for FFT */ FTYPE w; n = 65536; /* number of complex points */ m = 32; /* number of FFT and IFFT */ w = 2.0*PI/(FTYPE)n; printf("compute %d: allocating space for 4*%d elements\n", myid, n); b = (FTYPE *) malloc( 2*n*sizeof(FTYPE) ); /* complex */ a1 = (FTYPE *) malloc( n*sizeof(FTYPE) ); /* real */ a2 = (FTYPE *) malloc( n*sizeof(FTYPE) ); /* real */ printf(" running %d FFT and IFFT of length %d\n", m, n); fflush(stdout); for (i=0; i>=1) if(a & q) b|=p; return b; } /* end bitrev */ /* ilog2(n) -- return an integer log, base 2 */ int ilog2(int n) { int i; for (i=8*sizeof(int)-1; i>=0 && ((1<