home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga ACS 1997 #2
/
amigaacscoverdisc
/
utilities
/
shareware
/
music
/
gfft-2.03
/
source
/
gfft-2.03-source.lha
/
cfft.c
< prev
next >
Wrap
C/C++ Source or Header
|
1996-01-02
|
13KB
|
388 lines
/***************************************************************************
* Copyright (C) 1994 Charles P. Peterson *
* 4007 Enchanted Sun, San Antonio, Texas 78244-1254 *
* Email: Charles_P_Peterson@fcircus.sat.tx.us *
* *
* This is free software with NO WARRANTY. *
* See gfft.c, or run program itself, for details. *
* Support is available for a fee. *
***************************************************************************
*
* Program: gfft--General FFT analysis
* File: cfft.c
* Purpose: fast Fourier analysis function with complex i/o array
* Author: Charles Peterson (CPP)
* History: 4-June-1993 CPP; Created.
* 9-March-1994 CPP; added SAVE_TRIG_OPT sections.
* 6-Feb-95 CPP (1.31); Progress Requester
* Comment:
* Inspired by the algorithm expressed in four1.c, page 411-412 in
* Numerical Recipes in C (William H. Press, Brian P. Flannery, Saul A.
* Teukolsky, William T. Vetterling. Cambridge University Press, 1988.)
* In turn, inspiration for their work came from N. Brenner of Lincoln
* Laboratories, Danielson, Lanczos, J. W. Cooley, J. W. Tukey, and many
* others, including Fourier, for whom the underlying analysis is named.
* This, however, is an original implementation without any copied code.
* I hope it is much easier to read and understand than its predecessors.
* If nothing else, it is very verbose, and doesn't require an intuitive
* understanding of all trigonometric identities. Things got a little
* worse when I added an option to save the previously used trig values,
* but most people would consider that (in principle) worthwhile.
*
* Single precision floating point, except for trigonometric values
* which are computed in double precision.
*/
#define PROGRESS_INDICATOR
#define SAVE_TRIG_OPT
/*
* Comment the above lines out for easiest use outside GFFT.
*
* You will lose the optional saving of trig values, but (see note below)
* that really doesn't make a big difference in performance.
*
* Otherwise, when using this code outside GFFT you will have to duplicate
* the GFFT memory allocation function, or modify this file accordingly
* (e.g. with an error return if the memory demanded cannot be allocated).
*
* Note: When SAVE_TRIG_OPT is defined, as in GFFT, there is a BOOLEAN
* global variable SaveMemory which disables the saving of trig values
* when set.
*/
#include <math.h>
#ifdef _AMIGA
#ifdef _M68881
#include <m68881.h>
#endif
#endif
#include "complex.h"
#ifdef SAVE_TRIG_OPT
#include "gfft.h" /* gmalloc safe allocation (longjmps on error) */
#include "settings.h" /* SaveMemory option */
#endif
#ifdef PROGRESS_INDICATOR
#include "wbench.h"
#define PROGRESS_INCREMENT 1024
long Inner_Loop_Count = 0;
extern double Percent_Per_Loop;
extern double Initial_Progress;
static long next_progress_count = 0;
#endif
void cfft (Complex_float datac[], long n, int isign)
{
/* ARGUMENTS:
*
* datac input/output Complex_float data array
* transformed according to isign (see below)
*
* n number of complex data elements
* *** n MUST BE A POWER OF 2 ***
* *** THIS IS NOT CHECKED FOR ***
*
* isign if 1, datac is replaced with its discrete Fourier transform
* if -1, datac is replaced with n times its INVERSE
* discrete Fourier transform
*/
#ifdef SAVE_TRIG_OPT
static Complex_float *wkvector = NULL;
static last_n = 0;
#endif
#ifdef PROGRESS_INDICATOR
next_progress_count = 0;
#endif
/*
* This function is divided into 2 or 3 main blocks to minimize the scope
* of variables involved and hopefully to encourage better register use.
*/
/* In the first block we sort the input array of complex numbers into
* bit reversal order (e.g. the value at binary index ...001 is swapped
* with the value at binary index 100...)
*
* While i counts forwards from 1, j counts "inversely" from binary 100...
*
* Skip i==0 and i==n-1 (They never get swapped anyway)
*/
{
long const halfn = n / 2;
long const nminus1 = n - 1;
long register j = halfn;
long register i = 1;
for (; i < nminus1; i++)
{
if (j > i) /* If move required, and not done before */
{
CF_SWAP (datac[i], datac[j]);
}
/*
* The following sub-block is for inverse counting.
* To count inversely, we clear highest set bits until reaching
* the first clear bit, then set it.
*/
{
long register scanbit = halfn;
while (j >= scanbit && scanbit > 0)
{
j -= scanbit;
scanbit >>= 1;
}
j += scanbit;
} /* End inverse counting sub-block */
} /* End for (i = 1, 2, ..., n-1) */
} /* End sorting block */
/*
* The following block(s) are the Danielson-Lanczos section of the routine.
* Here we combine the transforms, initially of one value each, by two
* at a time giving us transforms twice as large, until everything has
* been combined into one big transform.
*
* Thanks to clever sorting, each transform of 'even' elements is
* followed by a transform of 'odd' elements during each combination.
*
* mathematical summary (in which = denotes equality or definition):
* (A transform of length one simply copies input to output)
* F[k] = F[k][even] + (w ** k) * F[k][odd]
* (w ** k) = - (w ** (k + N/2))
* w = e ** (2 * PI * i / N)
* e ** (i * theta) = cos (theta) + i * sin (theta)
* i = sqr (-1)
* e and PI are the named transcendental numbers
* cos and sin are the named trigonometric functions
* N is the number of complex samples; k is a number 0..N-1
*/
#ifdef SAVE_TRIG_OPT
if (SaveMemory)
#endif
/*
* This is the unaccelerated version
* The trig constants are computed as they are needed
* each time cfft is called.
*/
{
long osize; /* Size of old transforms being combined */
long nsize; /* Size of new transforms being produced */
Complex_double w;
Complex_double wtemp;
Complex_double wk; /* (w ** k) */
Complex_float wk_times_f_k_odd;
Complex_float wkfloat; /* Floating copy for innermost loop */
double theta;
long ieven, iodd, k;
for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
{
theta = 2.0 * PI / (isign * nsize); /* PI is from math.h */
w.real = cos (theta);
w.imaginary = sin (theta);
wk.real = 1.0;
wk.imaginary = 0.0;
wkfloat.real = 1.0;
wkfloat.imaginary = 0.0;
for (k = 0; k < osize; k++)
{
for (ieven = k; ieven < n; ieven += nsize)
{
iodd = ieven + osize;
/*
* Note that the the upper and lower parts of each new
* transform being produced use the same components--
* taken through a 'second cycle' since they are periodic
* in their original N (nsize/2). Only W**k differs,
* and, fortuitously, W**k = -W**(k+nsize/2). Also
* fortuitously, the input slots for F_k_even and F_k_odd
* are separated by nsize/2, as are the output slots for
* F_k and F_(k+nsize/2), so we can use and write over the
* same slots during each pass in the inner loop, using the
* same value of W**k (we subtract instead of negating).
*/
C_MULT (wkfloat, datac[iodd], wk_times_f_k_odd);
C_SUB (datac[ieven], wk_times_f_k_odd, datac[iodd]);
C_ADD (datac[ieven], wk_times_f_k_odd, datac[ieven]);
} /* end for (sums using same wk factor) */
/* C_MULT (wk, w, wk) but must use temp or i/o coincide */
C_MULT (wk, w, wtemp);
wkfloat.real = (float) (wk.real = wtemp.real);
wkfloat.imaginary = (float) (wk.imaginary =
wtemp.imaginary);
} /* end for (k = 1, 2, ..., osize) */
} /* end for (osize, nsize...) */
} /* end of Danielson-Lanczos section (SaveMemory slow version) */
#ifdef SAVE_TRIG_OPT
else /* if (!SaveMemory) */
{
/* To speed things up under most cases (where the same N is used for
* more than one transform, we are going to first create the vector of
* trigonometrically derived wk constants. If N is unchanged, and the
* the vector has already been created, we simply re-use the previous
* vector. If it weren't for this section (which uses GFFT allocation
* routines) there would be no memory allocated in cfft, and no need for
* the gfft.h header file). Note that this also increases the dynamic
* memory requirements of gfft significantly because the vector necessary
* must be large enough for 2*n complex floating point numbers, making it
* one of the biggest single dynamic memory uses--particularly for large N
* which some people might be interested in. So, I have decided
* to make this feature optional, if included at all. As currently
* implemented, it also requires the 'safe' gmalloc, which longjmps
* on error (so as not to confuse the interface here and elsewhere).
*
* 11-March-94 CPP; I'm somewhat disappointed with the 5-7% overall
* performance improvement attributable to this modification, in spite of
* the fact that lstat shows cfft as using 67% (58% accelerated) of the
* total time for a 30 sec batch spectrum analysis (run on a vanilla 68000
* with no FFP even). But, I should have anticipated this minor change;
* the trigs become a relatively small part of the overall calculations.
* Don't let anyone tell you any different. (Meanwhile, the dreaded
* ok_spectrum and ok_read, which look so horribly inefficient, were only
* taking around 2% of the total time each.) The number one line is the
* complex subtraction following the main fft complex multiplication; it
* probably waits there for the complex multiplication to finish. That,
* not surprisingly, it the chief bottleneck. As it should be.
*
*/
if (n > 1 && (!wkvector || n != last_n))
{
long osize; /* Size of old transforms being combined */
long nsize; /* Size of new transforms being produced */
Complex_double w;
Complex_double wk; /* (w ** k) */
Complex_double wtemp;
double theta;
long k;
long vsize; /* vector size */
long ntemp;
long wki = 0;
/*
* The vector size is N + log2(N) - 1 where log2 is 'log base 2'
* we calculate this using integer arithmetic
*/
vsize = -1;
for (ntemp = n; ntemp > 1; ntemp /= 2) vsize++; /* log base 2 */
vsize += n;
if (wkvector)
{
gfree (wkvector);
}
wkvector = gmalloc (vsize * sizeof(Complex_float),
NOTHING_SPECIAL);
for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
{
theta = 2.0 * PI / (isign * nsize); /* PI is from math.h */
w.real = cos (theta);
w.imaginary = sin (theta);
wk.real = 1.0;
wk.imaginary = 0.0;
wkvector[wki].real = 1.0;
wkvector[wki++].imaginary = 0.0;
for (k = 0; k < osize; k++)
{
/* C_MULT (wk, w, wk) but must use temp or i/o coincide */
C_MULT (wk, w, wtemp);
wkvector[wki].real = (float) (wk.real = wtemp.real);
wkvector[wki++].imaginary = (float) (wk.imaginary =
wtemp.imaginary);
} /* end for (k = 1, 2, ..., osize) */
} /* end for (osize, nsize...) */
last_n = n;
} /* end of trig constants computation block */
/*
* Now, we compute this fft
*/
{
long osize; /* Size of old transforms being combined */
long nsize; /* Size of new transforms being produced */
Complex_float wk_times_f_k_odd;
long ieven, iodd, k;
register Complex_float *wkp = wkvector;
for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
{
for (k = 0; k < osize; k++)
{
for (ieven = k; ieven < n; ieven += nsize)
{
iodd = ieven + osize;
/*
* Note that the the upper and lower parts of each new
* transform being produced use the same components--
* taken through a 'second cycle' since they are periodic
* in their original N (nsize/2). Only W**k differs,
* and, fortuitously, W**k = -W**(k+nsize/2). Also
* fortuitously, the input slots for F_k_even and F_k_odd
* are separated by nsize/2, as are the output slots for
* F_k and F_(k+nsize/2), so we can use and write over the
* same slots during each pass in the inner loop, using the
* same value of W**k (we subtract instead of negating).
*/
C_MULT (*wkp, datac[iodd], wk_times_f_k_odd);
C_SUB (datac[ieven],wk_times_f_k_odd, datac[iodd]);
C_ADD (datac[ieven],wk_times_f_k_odd, datac[ieven]);
#ifdef PROGRESS_INDICATOR
if (++Inner_Loop_Count >= next_progress_count)
{
next_progress_count += PROGRESS_INCREMENT;
progress_requester__update
((int) (Initial_Progress + (Inner_Loop_Count *
Percent_Per_Loop)));
}
#endif
} /* end for (sums using same wk factor) */
wkp++;
} /* end for (k = 1, 2, ..., osize) */
wkp++;
} /* end for (osize, nsize...) */
} /* end of transform computation */
} /* end of !SaveMemory (fast memory hog) version */
#endif /* whether SAVE_TRIG_OPT defined or not */
} /* end of cfft */