kube-gustavson-fft.cpp

Go to the documentation of this file.
00001 
00002 /*!*************************************************************
00003  * kube-gustavson-fft.c
00004  *
00005  * Forward and inverse discrete 2D Fourier transforms.
00006  *
00007  * Originally a FORTRAN implementation published in:
00008  * Programming for Digital Signal Processing, IEEE Press 1979,
00009  * Section 1, by G. D. Bergland and M. T. Dolan.
00010  * Ported long ago from Fortran to old-fashioned Standard C by
00011  * someone who failed to put his or her name in the source.
00012  * Ported from Standard C to ANSI C by Stefan Gustavson
00013  * (stegu@itn.liu.se) 2003-10-20.
00014  *
00015  * This is a pretty fast FFT algorithm. Not super-optimized
00016  * lightning-fast, but very good considering its age and
00017  * relative simplicity. There are several places in the code
00018  * where clock cycles could be saved, for example by getting rid
00019  * of some copying of data back and forth, but the savings
00020  * would not be that great. If you want optimally fast FFTs,
00021  * consider the excellent FFTW package. (http://www.fftw.org)
00022  *
00023  * This software is in the public domain.
00024  *
00025  **************************************************************
00026  *
00027  * int forward_fft2f(COMPLEX *array, int rows, int cols)
00028  * int inverse_fft2f(COMPLEX *array, int rows, int cols)
00029  * int forward_fft2d(DCOMPLEX *array, int rows, int cols)
00030  * int inverse_fft2d(DCOMPLEX *array, int rows, int cols)
00031  *
00032  * These functions compute the forward and inverse DFT's, respectively, 
00033  * of a single-precision COMPLEX or DCOMPLEX array by means of an
00034  * FFT algorithm.
00035  *
00036  * The result is a COMPLEX/DCOMPLEX array of the same size, returned
00037  * in the same space as the input array.  That is, the original array
00038  * is overwritten and destroyed.
00039  *
00040  * Rows and columns must each be an integral power of 2.
00041  *
00042  * These routines return integer value ERROR if an error was detected,
00043  * NO_ERROR otherwise.
00044  *
00045  * This particular implementation of the DFT uses the transform pair
00046  * defined as follows:
00047  * Let there be two complex arrays each with n rows and m columns.
00048  * Index them as 
00049  * \f[f(x,y):    0 <= x <= m - 1,  0 <= y <= n - 1 \f]
00050  * \f[F(u,v):    -m/2 <= u <= m/2 - 1,  -n/2 <= v <= n/2 - 1 \f]
00051  *
00052  * Then the forward and inverse transforms are related as follows.
00053  * Forward:
00054  * \f[F(u,v) = \sum_{x=0}^{m-1} \sum_{y=0}^{n-1} 
00055  *                      f(x,y) \exp{-2\pi i (ux/m + vy/n)} \f]
00056  *
00057  * Inverse:
00058  * \f[f(x,y) = 1/(mn) \sum_{u=-m/2}^{m/2-1} \sum_{v=-n/2}^{n/2-1} 
00059  *                      F(u,v) \exp{2\pi i (ux/m + vy/n)} \f]
00060  *  
00061  * Therefore, the transforms have these properties:
00062  * 1.  \f$\sum_x \sum_y  f(x,y) = F(0,0)\f$
00063  * 2.  \f$m n \sum_x \sum_y |f(x,y)|^2 = \sum_u \sum_v |F(u,v)|^2\f$
00064  *
00065  */
00066 
00067 #include <stdio.h>
00068 #include <math.h>
00069 #include <stdlib.h>
00070 
00071 #include "kube-gustavson-fft.hpp"
00072 
00078 #define LoadRow(buffer,row,cols) if (1){\
00079     register int j,k;\
00080     for (j = row*cols, k = 0 ; k < cols ; j++, k++){\
00081         stageBuff[k].re = buffer[j].re;\
00082           stageBuff[k].im = buffer[j].im;\
00083         }\
00084     } else {}
00085 
00086 #define StoreRow(buffer,row,cols) if (1){\
00087     register int j,k;\
00088     for (j = row*cols, k = 0 ; k < cols ; j++, k++){\
00089         buffer[j].re = stageBuff[k].re;\
00090         buffer[j].im = stageBuff[k].im;\
00091         }\
00092     } else {}
00093 
00094 #define LoadCol(buffer,col,rows,cols) if (1){\
00095     register int j,k;\
00096     for (j = i,k = 0 ; k < rows ; j+=cols, k++){\
00097         stageBuff[k].re = buffer[j].re;\
00098         stageBuff[k].im = buffer[j].im;\
00099         }\
00100     } else {}
00101 
00102 #define StoreCol(buffer,col,rows,cols) if (1){\
00103     register int j,k;\
00104     for (j = i,k = 0 ; k < rows ; j+=cols, k++){\
00105         buffer[j].re = stageBuff[k].re;\
00106         buffer[j].im = stageBuff[k].im;\
00107         }\
00108     } else {}
00109 
00110 
00111 /* Do something useful with an error message */
00112 #define handle_error(msg) fprintf(stderr,msg)
00113 
00114 DCOMPLEX *stageBuff;  /* buffer to hold a row or column at a time */
00115 COMPLEX         *bigBuff;    /* a pointer to a float input array */
00116 DCOMPLEX *bigBuffd;   /* a pointer to a double input array */
00117 
00118 
00120 int allocateBuffer(int size)
00121 {
00122   stageBuff = (DCOMPLEX*) malloc(size*sizeof(DCOMPLEX));
00123   if(stageBuff==(DCOMPLEX*)NULL) 
00124     {
00125       handle_error("Insufficient storage for fft buffers");
00126       return(ERROR);
00127     }
00128   else return(NO_ERROR);
00129 }
00130 
00131 
00133 void freeBuffer(void)
00134 {
00135   free((char*)stageBuff);
00136 }
00137 
00138 
00140 int power_of_2(int n)
00141 {
00142   int bits=0;
00143   while(n) {
00144     bits += n & 1;
00145     n >>= 1;
00146   }
00147   return(bits==1);
00148 }
00149 
00150 
00152 int fastlog2(int n)
00153 {
00154   int log = -1;
00155   while(n) {
00156     log++;
00157     n >>= 1;
00158   }
00159   return(log);
00160 }
00161 
00162 
00164 void R2TX(int nthpo, DCOMPLEX *c0, DCOMPLEX *c1)
00165 {
00166   int k,kk;
00167   double *cr0, *ci0, *cr1, *ci1, r1, fi1;
00168 
00169   cr0 = &(c0[0].re);
00170   ci0 = &(c0[0].im);
00171   cr1 = &(c1[0].re);
00172   ci1 = &(c1[0].im);
00173   
00174   for(k=0; k<nthpo; k+=2) 
00175     {
00176       kk = k*2;
00177 
00178       r1 = cr0[kk] + cr1[kk];
00179       cr1[kk] = cr0[kk] - cr1[kk];
00180       cr0[kk] = r1;
00181       fi1 = ci0[kk] + ci1[kk];
00182       ci1[kk] = ci0[kk] - ci1[kk];
00183       ci0[kk] = fi1;
00184     }
00185 }
00186 
00187 
00189 void R4TX(int nthpo, DCOMPLEX *c0, DCOMPLEX *c1,
00190           DCOMPLEX *c2, DCOMPLEX *c3)
00191 {
00192   int k,kk;
00193   double *cr0, *ci0, *cr1, *ci1, *cr2, *ci2, *cr3, *ci3;
00194   double r1, r2, r3, r4, i1, i2, i3, i4;
00195 
00196   cr0 = &(c0[0].re);
00197   cr1 = &(c1[0].re);
00198   cr2 = &(c2[0].re);
00199   cr3 = &(c3[0].re);
00200   ci0 = &(c0[0].im);
00201   ci1 = &(c1[0].im);
00202   ci2 = &(c2[0].im);
00203   ci3 = &(c3[0].im);
00204   
00205   for(k=1;k<=nthpo;k+=4) 
00206     {
00207       kk = (k-1)*2;  /* real and imag parts alternate */
00208       
00209       r1 = cr0[kk] + cr2[kk];
00210       r2 = cr0[kk] - cr2[kk];
00211       r3 = cr1[kk] + cr3[kk];
00212       r4 = cr1[kk] - cr3[kk];
00213       i1 = ci0[kk] + ci2[kk];
00214       i2 = ci0[kk] - ci2[kk];
00215       i3 = ci1[kk] + ci3[kk];
00216       i4 = ci1[kk] - ci3[kk];
00217       cr0[kk] = r1 + r3;
00218       ci0[kk] = i1 + i3;
00219       cr1[kk] = r1 - r3;
00220       ci1[kk] = i1 - i3;
00221       cr2[kk] = r2 - i4;
00222       ci2[kk] = i2 + r4;
00223       cr3[kk] = r2 + i4;
00224       ci3[kk] = i2 - r4;
00225     }
00226 }
00227 
00228         
00230 void R8TX(int nxtlt, int nthpo, int length,
00231           DCOMPLEX *cc0, DCOMPLEX *cc1, DCOMPLEX *cc2, DCOMPLEX *cc3,
00232           DCOMPLEX *cc4, DCOMPLEX *cc5, DCOMPLEX *cc6, DCOMPLEX *cc7)
00233 {
00234   int j,k,kk;
00235   double scale, arg, tr, ti;
00236   double c1, c2, c3, c4, c5, c6, c7;
00237   double s1, s2, s3, s4, s5, s6, s7;
00238   double ar0, ar1, ar2, ar3, ar4, ar5, ar6, ar7;
00239   double ai0, ai1, ai2, ai3, ai4, ai5, ai6, ai7;
00240   double br0, br1, br2, br3, br4, br5, br6, br7;
00241   double bi0, bi1, bi2, bi3, bi4, bi5, bi6, bi7;
00242   
00243   double *cr0, *cr1, *cr2, *cr3, *cr4, *cr5, *cr6, *cr7;
00244   double *ci0, *ci1, *ci2, *ci3, *ci4, *ci5, *ci6, *ci7;
00245 
00246   cr0 = &(cc0[0].re);
00247   cr1 = &(cc1[0].re);
00248   cr2 = &(cc2[0].re);
00249   cr3 = &(cc3[0].re);
00250   cr4 = &(cc4[0].re);
00251   cr5 = &(cc5[0].re);
00252   cr6 = &(cc6[0].re);
00253   cr7 = &(cc7[0].re);
00254 
00255   ci0 = &(cc0[0].im);
00256   ci1 = &(cc1[0].im);
00257   ci2 = &(cc2[0].im);
00258   ci3 = &(cc3[0].im);
00259   ci4 = &(cc4[0].im);
00260   ci5 = &(cc5[0].im);
00261   ci6 = &(cc6[0].im);
00262   ci7 = &(cc7[0].im);
00263 
00264   scale = TWOPI/length;
00265 
00266   for(j=1; j<=nxtlt; j++) 
00267     {
00268       arg = (j-1)*scale;
00269       c1 = cos(arg);
00270       s1 = sin(arg);
00271       c2 = c1*c1 - s1*s1;
00272       s2 = c1*s1 + c1*s1;
00273       c3 = c1*c2 - s1*s2;
00274       s3 = c2*s1 + s2*c1;
00275       c4 = c2*c2 - s2*s2;
00276       s4 = c2*s2 + c2*s2;
00277       c5 = c2*c3 - s2*s3;
00278       s5 = c3*s2 + s3*c2;
00279       c6 = c3*c3 - s3*s3;
00280       s6 = c3*s3 + c3*s3;
00281       c7 = c3*c4 - s3*s4;
00282       s7 = c4*s3 + s4*c3;
00283 
00284       for(k=j;k<=nthpo;k+=length) 
00285         {
00286           kk = (k-1)*2; /* index by twos; re & im alternate */
00287           
00288           ar0 = cr0[kk] + cr4[kk];
00289           ar1 = cr1[kk] + cr5[kk];
00290           ar2 = cr2[kk] + cr6[kk];
00291           ar3 = cr3[kk] + cr7[kk];
00292           ar4 = cr0[kk] - cr4[kk];
00293           ar5 = cr1[kk] - cr5[kk];
00294           ar6 = cr2[kk] - cr6[kk];
00295           ar7 = cr3[kk] - cr7[kk];
00296           ai0 = ci0[kk] + ci4[kk];
00297           ai1 = ci1[kk] + ci5[kk];
00298           ai2 = ci2[kk] + ci6[kk];
00299           ai3 = ci3[kk] + ci7[kk];
00300           ai4 = ci0[kk] - ci4[kk];
00301           ai5 = ci1[kk] - ci5[kk];
00302           ai6 = ci2[kk] - ci6[kk];
00303           ai7 = ci3[kk] - ci7[kk];
00304           br0 = ar0 + ar2;
00305           br1 = ar1 + ar3;
00306           br2 = ar0 - ar2;
00307           br3 = ar1 - ar3;
00308           br4 = ar4 - ai6;
00309           br5 = ar5 - ai7;
00310           br6 = ar4 + ai6;
00311           br7 = ar5 + ai7;
00312           bi0 = ai0 + ai2;
00313           bi1 = ai1 + ai3;
00314           bi2 = ai0 - ai2;
00315           bi3 = ai1 - ai3;
00316           bi4 = ai4 + ar6;
00317           bi5 = ai5 + ar7;
00318           bi6 = ai4 - ar6;
00319           bi7 = ai5 - ar7;
00320           cr0[kk] = br0 + br1;
00321           ci0[kk] = bi0 + bi1;
00322           if(j>1) 
00323             {
00324               cr1[kk] = c4*(br0-br1) - s4*(bi0-bi1);
00325               cr2[kk] = c2*(br2-bi3) - s2*(bi2+br3);
00326               cr3[kk] = c6*(br2+bi3) - s6*(bi2-br3);
00327               ci1[kk] = c4*(bi0-bi1) + s4*(br0-br1);
00328               ci2[kk] = c2*(bi2+br3) + s2*(br2-bi3);
00329               ci3[kk] = c6*(bi2-br3) + s6*(br2+bi3);
00330               tr = IRT2*(br5-bi5);
00331               ti = IRT2*(br5+bi5);
00332               cr4[kk] = c1*(br4+tr) - s1*(bi4+ti);
00333               ci4[kk] = c1*(bi4+ti) + s1*(br4+tr);
00334               cr5[kk] = c5*(br4-tr) - s5*(bi4-ti);
00335               ci5[kk] = c5*(bi4-ti) + s5*(br4-tr);
00336               tr = -IRT2*(br7+bi7);
00337               ti = IRT2*(br7-bi7);
00338               cr6[kk] = c3*(br6+tr) - s3*(bi6+ti);
00339               ci6[kk] = c3*(bi6+ti) + s3*(br6+tr);
00340               cr7[kk] = c7*(br6-tr) - s7*(bi6-ti);
00341               ci7[kk] = c7*(bi6-ti) + s7*(br6-tr);
00342             }
00343           else 
00344             {
00345               cr1[kk] = br0 - br1;
00346               cr2[kk] = br2 - bi3;
00347               cr3[kk] = br2 + bi3;
00348               ci1[kk] = bi0 - bi1;
00349               ci2[kk] = bi2 + br3;
00350               ci3[kk] = bi2 - br3;
00351               tr = IRT2*(br5-bi5);
00352               ti = IRT2*(br5+bi5);
00353               cr4[kk] = br4 + tr;
00354               ci4[kk] = bi4 + ti;
00355               cr5[kk] = br4 - tr;
00356               ci5[kk] = bi4 - ti;
00357               tr = -IRT2*(br7+bi7);
00358               ti = IRT2*(br7-bi7);
00359               cr6[kk] = br6 + tr;
00360               ci6[kk] = bi6 + ti;
00361               cr7[kk] = br6 - tr;
00362               ci7[kk] = bi6 - ti;
00363             }
00364         }
00365     }
00366 }
00367 
00368 
00380 void FFT842(int direction, int n, DCOMPLEX *b)
00381 /* direction: FFT_FORWARD or FFT_INVERSE
00382  * n:  length of vector
00383  * *b: input vector */
00384 {
00385   double fn, r, fi;
00386 
00387   int L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15;
00388 /*  int j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;*/
00389   int j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;
00390   int i, j, ij, ji, ij1, ji1;
00391 /*  int nn, n2pow, n8pow, nthpo, ipass, nxtlt, length;*/
00392   int n2pow, n8pow, nthpo, ipass, nxtlt, length;
00393 
00394   n2pow = fastlog2(n);
00395   nthpo = n;
00396   fn = 1.0 / (double)nthpo; /* Scaling factor for inverse transform */
00397     
00398   if(direction==FFT_FORWARD) 
00399     /* Conjugate the input */
00400     for(i=0;i<n;i++) {
00401       b[i].im = -b[i].im;
00402     }
00403   
00404   if(direction==FFT_INVERSE)
00405     /* Scramble the inputs */
00406     for(i=0,j=n/2;j<n;i++,j++) 
00407       {
00408         r = b[j].re; fi = b[j].im;
00409         b[j].re = b[i].re; b[j].im = b[i].im;
00410         b[i].re = r; b[i].im = fi;
00411       }
00412   
00413   n8pow = n2pow/3;
00414   
00415   if(n8pow)
00416     {
00417       /* Radix 8 iterations */
00418       for(ipass=1;ipass<=n8pow;ipass++) 
00419         {
00420           nxtlt = 0x1 << (n2pow - 3*ipass);
00421           length = 8*nxtlt;
00422           R8TX(nxtlt, nthpo, length,
00423                b, b+nxtlt, b+2*nxtlt, b+3*nxtlt,
00424                b+4*nxtlt, b+5*nxtlt, b+6*nxtlt, b+7*nxtlt);
00425         }
00426     }
00427   
00428   if(n2pow%3 == 1) 
00429     {
00430       /* A final radix 2 iteration is needed */
00431         R2TX(nthpo, b, b+1); 
00432     }
00433   
00434   if(n2pow%3 == 2)  
00435     {
00436       /* A final radix 4 iteration is needed */
00437       R4TX(nthpo, b, b+1, b+2, b+3); 
00438     }
00439   
00440   for(j=1;j<=15;j++) 
00441     {
00442       L[j] = 1;
00443       if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j);
00444     }
00445 
00446   L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7];
00447   L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15];
00448 
00449   ij = 1;
00450     
00451   for(j1=1;j1<=L1;j1++)
00452     for(j2=j1;j2<=L2;j2+=L1)
00453       for(j3=j2;j3<=L3;j3+=L2)
00454         for(j4=j3;j4<=L4;j4+=L3)
00455           for(j5=j4;j5<=L5;j5+=L4)
00456             for(j6=j5;j6<=L6;j6+=L5)
00457               for(j7=j6;j7<=L7;j7+=L6)
00458                 for(j8=j7;j8<=L8;j8+=L7)
00459                   for(j9=j8;j9<=L9;j9+=L8)
00460                     for(j10=j9;j10<=L10;j10+=L9)
00461                       for(j11=j10;j11<=L11;j11+=L10)
00462                         for(j12=j11;j12<=L12;j12+=L11)
00463                           for(j13=j12;j13<=L13;j13+=L12)
00464                             for(j14=j13;j14<=L14;j14+=L13)
00465                               for(ji=j14;ji<=L15;ji+=L14) 
00466                                 {
00467                                   ij1 = ij-1;
00468                                   ji1 = ji-1;     
00469                                   if(ij-ji<0)
00470                                         {
00471                                           r = b[ij1].re;
00472                                           b[ij1].re = b[ji1].re;
00473                                           b[ji1].re = r;
00474                                           fi = b[ij1].im;
00475                                           b[ij1].im = b[ji1].im;
00476                                           b[ji1].im = fi;
00477                                         }
00478                                   ij++;
00479                                 }
00480 
00481   if(direction==FFT_FORWARD)  /* Take conjugates & unscramble outputs */
00482     for(i=0,j=n/2; j<n; i++,j++)
00483         {
00484           r = b[j].re; fi = b[j].im;
00485           b[j].re = b[i].re; b[j].im = -b[i].im;
00486           b[i].re = r; b[i].im = -fi;
00487         }
00488 
00489   if(direction==FFT_INVERSE) /* Scale outputs */
00490     for(i=0; i<nthpo; i++) 
00491       {
00492         b[i].re *= fn;
00493         b[i].im *= fn;
00494       }
00495 }
00496 
00497 
00498 /*
00499  * Compute 2D DFT on float data:
00500  * forward if direction==FFT_FORWARD,
00501  * inverse if direction==FFT_INVERSE.
00502  */
00503 int fft2f(COMPLEX *array, int rows, int cols, int direction)
00504 {
00505   int i, maxsize, errflag;
00506   
00507   if(!power_of_2(rows) || !power_of_2(cols)) {
00508     handle_error("fft: input array must have dimensions a power of 2");
00509     return(ERROR);
00510   }
00511   
00512   /* Allocate 1D buffer */
00513   bigBuff = array;
00514   maxsize = rows>cols ? rows : cols;
00515   errflag = allocateBuffer(maxsize);
00516   if(errflag != NO_ERROR) return(errflag);
00517 
00518   /* Compute transform row by row */
00519   if(cols>1)
00520     for(i=0;i<rows;i++) 
00521       {
00522         LoadRow(bigBuff,i,cols);
00523         FFT842(direction,cols,stageBuff);
00524         StoreRow(bigBuff,i,cols);
00525       }
00526   
00527   /* Compute transform column by column */
00528   if(rows>1)
00529     for(i=0;i<cols;i++) 
00530       {
00531         LoadCol(bigBuff,i,rows,cols);
00532         FFT842(direction,rows,stageBuff);
00533         StoreCol(bigBuff,i,rows,cols);
00534       }
00535 
00536   freeBuffer();
00537   return(NO_ERROR);
00538 }
00539 
00540 /* 
00541  * Compute 2D DFT on double data:
00542  * forward if direction==FFT_FORWARD,
00543  * inverse if direction==FFT_INVERSE.
00544  */
00545 int fft2d(DCOMPLEX *array, int rows, int cols, int direction)
00546 {
00547   int i, maxsize, errflag;
00548   
00549   if(!power_of_2(rows) || !power_of_2(cols)) {
00550     handle_error("fft: input array must have dimensions a power of 2");
00551     return(ERROR);
00552   }
00553   
00554   /* Allocate 1D buffer */
00555   bigBuffd = array;
00556   maxsize = rows>cols ? rows : cols;
00557   errflag = allocateBuffer(maxsize);
00558   if(errflag != NO_ERROR) return(errflag);
00559 
00560   /* Compute transform row by row */
00561   if(cols>1)
00562     for(i=0;i<rows;i++) 
00563       {
00564         LoadRow(bigBuffd,i,cols);
00565         FFT842(direction,cols,stageBuff);
00566         StoreRow(bigBuffd,i,cols);
00567       }
00568   
00569   /* Compute transform column by column */
00570   if(rows>1)
00571     for(i=0;i<cols;i++) 
00572       {
00573         LoadCol(bigBuffd,i,rows,cols);
00574         FFT842(direction,rows,stageBuff);
00575         StoreCol(bigBuffd,i,rows,cols);
00576       }
00577 
00578   freeBuffer();
00579   return(NO_ERROR);
00580 }
00581 
00582 /* Finally, the entry points we announce in kube_fft.h */
00583 
00585 int forward_fft2f(COMPLEX *array, int rows, int cols)
00586 {
00587         return(fft2f(array, rows, cols, FFT_FORWARD));
00588 }
00589 
00591 int inverse_fft2f(COMPLEX *array, int rows, int cols)
00592 {
00593         return(fft2f(array, rows, cols, FFT_INVERSE));
00594 }
00595 
00597 int forward_fft2d(DCOMPLEX *array, int rows, int cols)
00598 {
00599         return(fft2d(array, rows, cols, FFT_FORWARD));
00600 }
00601 
00603 int inverse_fft2d(DCOMPLEX *array, int rows, int cols)
00604 {
00605         return(fft2d(array, rows, cols, FFT_INVERSE));
00606 }

Generated on Fri Jul 24 12:49:17 2009 for Xgrayimg Library by  doxygen 1.5.5