00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 #include <stdio.h>
00068 #include <math.h>
00069 #include <stdlib.h>
00070
00071 #include "kube-gustavson-fft.hpp"
00072
00078 #define LoadRow(buffer,row,cols) if (1){\
00079 register int j,k;\
00080 for (j = row*cols, k = 0 ; k < cols ; j++, k++){\
00081 stageBuff[k].re = buffer[j].re;\
00082 stageBuff[k].im = buffer[j].im;\
00083 }\
00084 } else {}
00085
00086 #define StoreRow(buffer,row,cols) if (1){\
00087 register int j,k;\
00088 for (j = row*cols, k = 0 ; k < cols ; j++, k++){\
00089 buffer[j].re = stageBuff[k].re;\
00090 buffer[j].im = stageBuff[k].im;\
00091 }\
00092 } else {}
00093
00094 #define LoadCol(buffer,col,rows,cols) if (1){\
00095 register int j,k;\
00096 for (j = i,k = 0 ; k < rows ; j+=cols, k++){\
00097 stageBuff[k].re = buffer[j].re;\
00098 stageBuff[k].im = buffer[j].im;\
00099 }\
00100 } else {}
00101
00102 #define StoreCol(buffer,col,rows,cols) if (1){\
00103 register int j,k;\
00104 for (j = i,k = 0 ; k < rows ; j+=cols, k++){\
00105 buffer[j].re = stageBuff[k].re;\
00106 buffer[j].im = stageBuff[k].im;\
00107 }\
00108 } else {}
00109
00110
00111
00112 #define handle_error(msg) fprintf(stderr,msg)
00113
00114 DCOMPLEX *stageBuff;
00115 COMPLEX *bigBuff;
00116 DCOMPLEX *bigBuffd;
00117
00118
00120 int allocateBuffer(int size)
00121 {
00122 stageBuff = (DCOMPLEX*) malloc(size*sizeof(DCOMPLEX));
00123 if(stageBuff==(DCOMPLEX*)NULL)
00124 {
00125 handle_error("Insufficient storage for fft buffers");
00126 return(ERROR);
00127 }
00128 else return(NO_ERROR);
00129 }
00130
00131
00133 void freeBuffer(void)
00134 {
00135 free((char*)stageBuff);
00136 }
00137
00138
00140 int power_of_2(int n)
00141 {
00142 int bits=0;
00143 while(n) {
00144 bits += n & 1;
00145 n >>= 1;
00146 }
00147 return(bits==1);
00148 }
00149
00150
00152 int fastlog2(int n)
00153 {
00154 int log = -1;
00155 while(n) {
00156 log++;
00157 n >>= 1;
00158 }
00159 return(log);
00160 }
00161
00162
00164 void R2TX(int nthpo, DCOMPLEX *c0, DCOMPLEX *c1)
00165 {
00166 int k,kk;
00167 double *cr0, *ci0, *cr1, *ci1, r1, fi1;
00168
00169 cr0 = &(c0[0].re);
00170 ci0 = &(c0[0].im);
00171 cr1 = &(c1[0].re);
00172 ci1 = &(c1[0].im);
00173
00174 for(k=0; k<nthpo; k+=2)
00175 {
00176 kk = k*2;
00177
00178 r1 = cr0[kk] + cr1[kk];
00179 cr1[kk] = cr0[kk] - cr1[kk];
00180 cr0[kk] = r1;
00181 fi1 = ci0[kk] + ci1[kk];
00182 ci1[kk] = ci0[kk] - ci1[kk];
00183 ci0[kk] = fi1;
00184 }
00185 }
00186
00187
00189 void R4TX(int nthpo, DCOMPLEX *c0, DCOMPLEX *c1,
00190 DCOMPLEX *c2, DCOMPLEX *c3)
00191 {
00192 int k,kk;
00193 double *cr0, *ci0, *cr1, *ci1, *cr2, *ci2, *cr3, *ci3;
00194 double r1, r2, r3, r4, i1, i2, i3, i4;
00195
00196 cr0 = &(c0[0].re);
00197 cr1 = &(c1[0].re);
00198 cr2 = &(c2[0].re);
00199 cr3 = &(c3[0].re);
00200 ci0 = &(c0[0].im);
00201 ci1 = &(c1[0].im);
00202 ci2 = &(c2[0].im);
00203 ci3 = &(c3[0].im);
00204
00205 for(k=1;k<=nthpo;k+=4)
00206 {
00207 kk = (k-1)*2;
00208
00209 r1 = cr0[kk] + cr2[kk];
00210 r2 = cr0[kk] - cr2[kk];
00211 r3 = cr1[kk] + cr3[kk];
00212 r4 = cr1[kk] - cr3[kk];
00213 i1 = ci0[kk] + ci2[kk];
00214 i2 = ci0[kk] - ci2[kk];
00215 i3 = ci1[kk] + ci3[kk];
00216 i4 = ci1[kk] - ci3[kk];
00217 cr0[kk] = r1 + r3;
00218 ci0[kk] = i1 + i3;
00219 cr1[kk] = r1 - r3;
00220 ci1[kk] = i1 - i3;
00221 cr2[kk] = r2 - i4;
00222 ci2[kk] = i2 + r4;
00223 cr3[kk] = r2 + i4;
00224 ci3[kk] = i2 - r4;
00225 }
00226 }
00227
00228
00230 void R8TX(int nxtlt, int nthpo, int length,
00231 DCOMPLEX *cc0, DCOMPLEX *cc1, DCOMPLEX *cc2, DCOMPLEX *cc3,
00232 DCOMPLEX *cc4, DCOMPLEX *cc5, DCOMPLEX *cc6, DCOMPLEX *cc7)
00233 {
00234 int j,k,kk;
00235 double scale, arg, tr, ti;
00236 double c1, c2, c3, c4, c5, c6, c7;
00237 double s1, s2, s3, s4, s5, s6, s7;
00238 double ar0, ar1, ar2, ar3, ar4, ar5, ar6, ar7;
00239 double ai0, ai1, ai2, ai3, ai4, ai5, ai6, ai7;
00240 double br0, br1, br2, br3, br4, br5, br6, br7;
00241 double bi0, bi1, bi2, bi3, bi4, bi5, bi6, bi7;
00242
00243 double *cr0, *cr1, *cr2, *cr3, *cr4, *cr5, *cr6, *cr7;
00244 double *ci0, *ci1, *ci2, *ci3, *ci4, *ci5, *ci6, *ci7;
00245
00246 cr0 = &(cc0[0].re);
00247 cr1 = &(cc1[0].re);
00248 cr2 = &(cc2[0].re);
00249 cr3 = &(cc3[0].re);
00250 cr4 = &(cc4[0].re);
00251 cr5 = &(cc5[0].re);
00252 cr6 = &(cc6[0].re);
00253 cr7 = &(cc7[0].re);
00254
00255 ci0 = &(cc0[0].im);
00256 ci1 = &(cc1[0].im);
00257 ci2 = &(cc2[0].im);
00258 ci3 = &(cc3[0].im);
00259 ci4 = &(cc4[0].im);
00260 ci5 = &(cc5[0].im);
00261 ci6 = &(cc6[0].im);
00262 ci7 = &(cc7[0].im);
00263
00264 scale = TWOPI/length;
00265
00266 for(j=1; j<=nxtlt; j++)
00267 {
00268 arg = (j-1)*scale;
00269 c1 = cos(arg);
00270 s1 = sin(arg);
00271 c2 = c1*c1 - s1*s1;
00272 s2 = c1*s1 + c1*s1;
00273 c3 = c1*c2 - s1*s2;
00274 s3 = c2*s1 + s2*c1;
00275 c4 = c2*c2 - s2*s2;
00276 s4 = c2*s2 + c2*s2;
00277 c5 = c2*c3 - s2*s3;
00278 s5 = c3*s2 + s3*c2;
00279 c6 = c3*c3 - s3*s3;
00280 s6 = c3*s3 + c3*s3;
00281 c7 = c3*c4 - s3*s4;
00282 s7 = c4*s3 + s4*c3;
00283
00284 for(k=j;k<=nthpo;k+=length)
00285 {
00286 kk = (k-1)*2;
00287
00288 ar0 = cr0[kk] + cr4[kk];
00289 ar1 = cr1[kk] + cr5[kk];
00290 ar2 = cr2[kk] + cr6[kk];
00291 ar3 = cr3[kk] + cr7[kk];
00292 ar4 = cr0[kk] - cr4[kk];
00293 ar5 = cr1[kk] - cr5[kk];
00294 ar6 = cr2[kk] - cr6[kk];
00295 ar7 = cr3[kk] - cr7[kk];
00296 ai0 = ci0[kk] + ci4[kk];
00297 ai1 = ci1[kk] + ci5[kk];
00298 ai2 = ci2[kk] + ci6[kk];
00299 ai3 = ci3[kk] + ci7[kk];
00300 ai4 = ci0[kk] - ci4[kk];
00301 ai5 = ci1[kk] - ci5[kk];
00302 ai6 = ci2[kk] - ci6[kk];
00303 ai7 = ci3[kk] - ci7[kk];
00304 br0 = ar0 + ar2;
00305 br1 = ar1 + ar3;
00306 br2 = ar0 - ar2;
00307 br3 = ar1 - ar3;
00308 br4 = ar4 - ai6;
00309 br5 = ar5 - ai7;
00310 br6 = ar4 + ai6;
00311 br7 = ar5 + ai7;
00312 bi0 = ai0 + ai2;
00313 bi1 = ai1 + ai3;
00314 bi2 = ai0 - ai2;
00315 bi3 = ai1 - ai3;
00316 bi4 = ai4 + ar6;
00317 bi5 = ai5 + ar7;
00318 bi6 = ai4 - ar6;
00319 bi7 = ai5 - ar7;
00320 cr0[kk] = br0 + br1;
00321 ci0[kk] = bi0 + bi1;
00322 if(j>1)
00323 {
00324 cr1[kk] = c4*(br0-br1) - s4*(bi0-bi1);
00325 cr2[kk] = c2*(br2-bi3) - s2*(bi2+br3);
00326 cr3[kk] = c6*(br2+bi3) - s6*(bi2-br3);
00327 ci1[kk] = c4*(bi0-bi1) + s4*(br0-br1);
00328 ci2[kk] = c2*(bi2+br3) + s2*(br2-bi3);
00329 ci3[kk] = c6*(bi2-br3) + s6*(br2+bi3);
00330 tr = IRT2*(br5-bi5);
00331 ti = IRT2*(br5+bi5);
00332 cr4[kk] = c1*(br4+tr) - s1*(bi4+ti);
00333 ci4[kk] = c1*(bi4+ti) + s1*(br4+tr);
00334 cr5[kk] = c5*(br4-tr) - s5*(bi4-ti);
00335 ci5[kk] = c5*(bi4-ti) + s5*(br4-tr);
00336 tr = -IRT2*(br7+bi7);
00337 ti = IRT2*(br7-bi7);
00338 cr6[kk] = c3*(br6+tr) - s3*(bi6+ti);
00339 ci6[kk] = c3*(bi6+ti) + s3*(br6+tr);
00340 cr7[kk] = c7*(br6-tr) - s7*(bi6-ti);
00341 ci7[kk] = c7*(bi6-ti) + s7*(br6-tr);
00342 }
00343 else
00344 {
00345 cr1[kk] = br0 - br1;
00346 cr2[kk] = br2 - bi3;
00347 cr3[kk] = br2 + bi3;
00348 ci1[kk] = bi0 - bi1;
00349 ci2[kk] = bi2 + br3;
00350 ci3[kk] = bi2 - br3;
00351 tr = IRT2*(br5-bi5);
00352 ti = IRT2*(br5+bi5);
00353 cr4[kk] = br4 + tr;
00354 ci4[kk] = bi4 + ti;
00355 cr5[kk] = br4 - tr;
00356 ci5[kk] = bi4 - ti;
00357 tr = -IRT2*(br7+bi7);
00358 ti = IRT2*(br7-bi7);
00359 cr6[kk] = br6 + tr;
00360 ci6[kk] = bi6 + ti;
00361 cr7[kk] = br6 - tr;
00362 ci7[kk] = bi6 - ti;
00363 }
00364 }
00365 }
00366 }
00367
00368
00380 void FFT842(int direction, int n, DCOMPLEX *b)
00381
00382
00383
00384 {
00385 double fn, r, fi;
00386
00387 int L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15;
00388
00389 int j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;
00390 int i, j, ij, ji, ij1, ji1;
00391
00392 int n2pow, n8pow, nthpo, ipass, nxtlt, length;
00393
00394 n2pow = fastlog2(n);
00395 nthpo = n;
00396 fn = 1.0 / (double)nthpo;
00397
00398 if(direction==FFT_FORWARD)
00399
00400 for(i=0;i<n;i++) {
00401 b[i].im = -b[i].im;
00402 }
00403
00404 if(direction==FFT_INVERSE)
00405
00406 for(i=0,j=n/2;j<n;i++,j++)
00407 {
00408 r = b[j].re; fi = b[j].im;
00409 b[j].re = b[i].re; b[j].im = b[i].im;
00410 b[i].re = r; b[i].im = fi;
00411 }
00412
00413 n8pow = n2pow/3;
00414
00415 if(n8pow)
00416 {
00417
00418 for(ipass=1;ipass<=n8pow;ipass++)
00419 {
00420 nxtlt = 0x1 << (n2pow - 3*ipass);
00421 length = 8*nxtlt;
00422 R8TX(nxtlt, nthpo, length,
00423 b, b+nxtlt, b+2*nxtlt, b+3*nxtlt,
00424 b+4*nxtlt, b+5*nxtlt, b+6*nxtlt, b+7*nxtlt);
00425 }
00426 }
00427
00428 if(n2pow%3 == 1)
00429 {
00430
00431 R2TX(nthpo, b, b+1);
00432 }
00433
00434 if(n2pow%3 == 2)
00435 {
00436
00437 R4TX(nthpo, b, b+1, b+2, b+3);
00438 }
00439
00440 for(j=1;j<=15;j++)
00441 {
00442 L[j] = 1;
00443 if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j);
00444 }
00445
00446 L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7];
00447 L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15];
00448
00449 ij = 1;
00450
00451 for(j1=1;j1<=L1;j1++)
00452 for(j2=j1;j2<=L2;j2+=L1)
00453 for(j3=j2;j3<=L3;j3+=L2)
00454 for(j4=j3;j4<=L4;j4+=L3)
00455 for(j5=j4;j5<=L5;j5+=L4)
00456 for(j6=j5;j6<=L6;j6+=L5)
00457 for(j7=j6;j7<=L7;j7+=L6)
00458 for(j8=j7;j8<=L8;j8+=L7)
00459 for(j9=j8;j9<=L9;j9+=L8)
00460 for(j10=j9;j10<=L10;j10+=L9)
00461 for(j11=j10;j11<=L11;j11+=L10)
00462 for(j12=j11;j12<=L12;j12+=L11)
00463 for(j13=j12;j13<=L13;j13+=L12)
00464 for(j14=j13;j14<=L14;j14+=L13)
00465 for(ji=j14;ji<=L15;ji+=L14)
00466 {
00467 ij1 = ij-1;
00468 ji1 = ji-1;
00469 if(ij-ji<0)
00470 {
00471 r = b[ij1].re;
00472 b[ij1].re = b[ji1].re;
00473 b[ji1].re = r;
00474 fi = b[ij1].im;
00475 b[ij1].im = b[ji1].im;
00476 b[ji1].im = fi;
00477 }
00478 ij++;
00479 }
00480
00481 if(direction==FFT_FORWARD)
00482 for(i=0,j=n/2; j<n; i++,j++)
00483 {
00484 r = b[j].re; fi = b[j].im;
00485 b[j].re = b[i].re; b[j].im = -b[i].im;
00486 b[i].re = r; b[i].im = -fi;
00487 }
00488
00489 if(direction==FFT_INVERSE)
00490 for(i=0; i<nthpo; i++)
00491 {
00492 b[i].re *= fn;
00493 b[i].im *= fn;
00494 }
00495 }
00496
00497
00498
00499
00500
00501
00502
00503 int fft2f(COMPLEX *array, int rows, int cols, int direction)
00504 {
00505 int i, maxsize, errflag;
00506
00507 if(!power_of_2(rows) || !power_of_2(cols)) {
00508 handle_error("fft: input array must have dimensions a power of 2");
00509 return(ERROR);
00510 }
00511
00512
00513 bigBuff = array;
00514 maxsize = rows>cols ? rows : cols;
00515 errflag = allocateBuffer(maxsize);
00516 if(errflag != NO_ERROR) return(errflag);
00517
00518
00519 if(cols>1)
00520 for(i=0;i<rows;i++)
00521 {
00522 LoadRow(bigBuff,i,cols);
00523 FFT842(direction,cols,stageBuff);
00524 StoreRow(bigBuff,i,cols);
00525 }
00526
00527
00528 if(rows>1)
00529 for(i=0;i<cols;i++)
00530 {
00531 LoadCol(bigBuff,i,rows,cols);
00532 FFT842(direction,rows,stageBuff);
00533 StoreCol(bigBuff,i,rows,cols);
00534 }
00535
00536 freeBuffer();
00537 return(NO_ERROR);
00538 }
00539
00540
00541
00542
00543
00544
00545 int fft2d(DCOMPLEX *array, int rows, int cols, int direction)
00546 {
00547 int i, maxsize, errflag;
00548
00549 if(!power_of_2(rows) || !power_of_2(cols)) {
00550 handle_error("fft: input array must have dimensions a power of 2");
00551 return(ERROR);
00552 }
00553
00554
00555 bigBuffd = array;
00556 maxsize = rows>cols ? rows : cols;
00557 errflag = allocateBuffer(maxsize);
00558 if(errflag != NO_ERROR) return(errflag);
00559
00560
00561 if(cols>1)
00562 for(i=0;i<rows;i++)
00563 {
00564 LoadRow(bigBuffd,i,cols);
00565 FFT842(direction,cols,stageBuff);
00566 StoreRow(bigBuffd,i,cols);
00567 }
00568
00569
00570 if(rows>1)
00571 for(i=0;i<cols;i++)
00572 {
00573 LoadCol(bigBuffd,i,rows,cols);
00574 FFT842(direction,rows,stageBuff);
00575 StoreCol(bigBuffd,i,rows,cols);
00576 }
00577
00578 freeBuffer();
00579 return(NO_ERROR);
00580 }
00581
00582
00583
00585 int forward_fft2f(COMPLEX *array, int rows, int cols)
00586 {
00587 return(fft2f(array, rows, cols, FFT_FORWARD));
00588 }
00589
00591 int inverse_fft2f(COMPLEX *array, int rows, int cols)
00592 {
00593 return(fft2f(array, rows, cols, FFT_INVERSE));
00594 }
00595
00597 int forward_fft2d(DCOMPLEX *array, int rows, int cols)
00598 {
00599 return(fft2d(array, rows, cols, FFT_FORWARD));
00600 }
00601
00603 int inverse_fft2d(DCOMPLEX *array, int rows, int cols)
00604 {
00605 return(fft2d(array, rows, cols, FFT_INVERSE));
00606 }