You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
436 lines
12 KiB
436 lines
12 KiB
/* GSL-GENFFT - Power2 FFT C Code Generator
|
|
* Copyright (C) 2001 Tim Janik
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
#include <gsl/gslcommon.h>
|
|
#include <gsl/gslmath.h>
|
|
#include <gsl/gslfft.h>
|
|
#include <sys/time.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
|
|
#define MAX_FFT_SIZE (65536 * 2)
|
|
#define EPSILON (4.8e-6)
|
|
|
|
|
|
/* --- prototypes --- */
|
|
static void reference_power2_fftc (unsigned int n_values,
|
|
const double *rivalues_in,
|
|
double *rivalues_out,
|
|
int esign);
|
|
static void fill_rand (guint n,
|
|
double *a);
|
|
static double diff (guint m,
|
|
guint p,
|
|
double *a1,
|
|
double *a2,
|
|
const gchar *str);
|
|
|
|
|
|
/* --- variables --- */
|
|
static double ref_fft_in[MAX_FFT_SIZE] = { 0, };
|
|
static double ref_fft_aout[MAX_FFT_SIZE] = { 0, };
|
|
static double ref_fft_sout[MAX_FFT_SIZE] = { 0, };
|
|
static double work_fft_in[MAX_FFT_SIZE] = { 0, };
|
|
static double work_fft_aout[MAX_FFT_SIZE] = { 0, };
|
|
static double work_fft_sout[MAX_FFT_SIZE] = { 0, };
|
|
|
|
|
|
/* --- functions --- */
|
|
int
|
|
main (int argc,
|
|
char *argv[])
|
|
{
|
|
struct timeval tv;
|
|
guint i;
|
|
|
|
/* initialize GSL */
|
|
if (!g_thread_supported ())
|
|
g_thread_init (NULL);
|
|
gsl_init (NULL, NULL);
|
|
|
|
/* initialize random numbers */
|
|
gettimeofday (&tv, NULL);
|
|
srand (tv.tv_sec ^ tv.tv_usec);
|
|
|
|
/* run tests */
|
|
for (i = 2; i <= MAX_FFT_SIZE >> 1; i <<= 1)
|
|
{
|
|
double d;
|
|
|
|
g_print ("Testing fft code for size %u\n", i);
|
|
|
|
/* setup reference and work fft records */
|
|
fill_rand (i << 1, ref_fft_in);
|
|
memset (ref_fft_aout, 0, MAX_FFT_SIZE * sizeof (ref_fft_aout[0]));
|
|
memset (ref_fft_sout, 0, MAX_FFT_SIZE * sizeof (ref_fft_sout[0]));
|
|
memcpy (work_fft_in, ref_fft_in, MAX_FFT_SIZE * sizeof (work_fft_in[0]));
|
|
memset (work_fft_aout, 0, MAX_FFT_SIZE * sizeof (work_fft_aout[0]));
|
|
memset (work_fft_sout, 0, MAX_FFT_SIZE * sizeof (work_fft_sout[0]));
|
|
reference_power2_fftc (i, ref_fft_in, ref_fft_aout, +1);
|
|
reference_power2_fftc (i, ref_fft_in, ref_fft_sout, -1);
|
|
|
|
/* perform fft test */
|
|
gsl_power2_fftac (i, work_fft_in, work_fft_aout);
|
|
gsl_power2_fftsc (i, work_fft_in, work_fft_sout);
|
|
|
|
/* check differences */
|
|
d = diff (MAX_FFT_SIZE, 0, ref_fft_in, work_fft_in, "Checking input record");
|
|
if (d)
|
|
g_error ("Reference record was modified");
|
|
d = diff (MAX_FFT_SIZE, 0, ref_fft_aout, work_fft_aout, "Reference analysis against GSL analysis");
|
|
if (fabs (d) > EPSILON)
|
|
g_error ("Error sum in analysis FFT exceeds epsilon: %g > %g", d, EPSILON);
|
|
d = diff (MAX_FFT_SIZE, 0, ref_fft_sout, work_fft_sout, "Reference synthesis against GSL synthesis");
|
|
if (fabs (d) > EPSILON)
|
|
g_error ("Error sum in analysis FFT exceeds epsilon: %g > %g", d, EPSILON);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
fill_rand (guint n,
|
|
double *a)
|
|
{
|
|
while (n--)
|
|
a[n] = -1. + 2. * rand() / (RAND_MAX + 1.0);
|
|
}
|
|
|
|
static double
|
|
diff (guint m,
|
|
guint p,
|
|
double *a1,
|
|
double *a2,
|
|
const gchar *str)
|
|
{
|
|
double d = 0, max = 0, min = 1e+32;
|
|
guint n;
|
|
|
|
g_print ("%s\n", str);
|
|
for (n = 0; n < m; n++)
|
|
{
|
|
double a = ABS (a1[n] - a2[n]);
|
|
if (n < p)
|
|
g_print ("%3u:%.3f) % 19.9f - % 19.9f = % 19.9f (% 19.9f)\n",
|
|
n, ((float) n) / (float) m,
|
|
a1[n], a2[n],
|
|
a1[n] - a2[n],
|
|
a1[n] / a2[n]);
|
|
d += a;
|
|
max = MAX (max, a);
|
|
min = MIN (min, a);
|
|
}
|
|
g_print ("Diff sum: %.9f, ", d);
|
|
g_print ("min/av/max: %.9f %.9f %.9f, ", min, d / (double) m, max);
|
|
g_print ("noise: %u %u %u\n",
|
|
g_bit_storage (1. / min),
|
|
g_bit_storage (m / d),
|
|
g_bit_storage (1. / max));
|
|
return d;
|
|
}
|
|
|
|
|
|
/* --- fft implementation --- */
|
|
#define BUTTERFLY_XY(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim) { \
|
|
register double T1re, T1im, T2re, T2im; \
|
|
T1re = X2re * Wre; \
|
|
T1im = X2im * Wre; \
|
|
T2re = X2im * Wim; \
|
|
T2im = X2re * Wim; \
|
|
T1re -= T2re; \
|
|
T1im += T2im; \
|
|
T2re = X1re - T1re; \
|
|
T2im = X1im - T1im; \
|
|
Y1re = X1re + T1re; \
|
|
Y1im = X1im + T1im; \
|
|
Y2re = T2re; \
|
|
Y2im = T2im; \
|
|
}
|
|
#define BUTTERFLY_10(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \
|
|
register double T2re, T2im; \
|
|
T2re = X1re - X2re; \
|
|
T2im = X1im - X2im; \
|
|
Y1re = X1re + X2re; \
|
|
Y1im = X1im + X2im; \
|
|
Y2re = T2re; \
|
|
Y2im = T2im; \
|
|
}
|
|
#define BUTTERFLY_01(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \
|
|
register double T2re, T2im; \
|
|
T2re = X1re + X2im; \
|
|
T2im = X1im - X2re; \
|
|
Y1re = X1re - X2im; \
|
|
Y1im = X1im + X2re; \
|
|
Y2re = T2re; \
|
|
Y2im = T2im; \
|
|
}
|
|
#define BUTTERFLY_0m(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \
|
|
register double T2re, T2im; \
|
|
T2re = X1re - X2im; \
|
|
T2im = X1im + X2re; \
|
|
Y1re = X1re + X2im; \
|
|
Y1im = X1im - X2re; \
|
|
Y2re = T2re; \
|
|
Y2im = T2im; \
|
|
}
|
|
#define BUTTERFLY_10scale(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,S) { \
|
|
register double T2re, T2im; \
|
|
T2re = X1re - X2re; \
|
|
T2im = X1im - X2im; \
|
|
Y1re = X1re + X2re; \
|
|
Y1im = X1im + X2im; \
|
|
Y2re = T2re * S; \
|
|
Y2im = T2im * S; \
|
|
Y1re *= S; \
|
|
Y1im *= S; \
|
|
}
|
|
#define WMULTIPLY(Wre,Wim,Dre,Dim) { \
|
|
register double T1re, T1im, T2re, T2im; \
|
|
T1re = Wre * Dre; \
|
|
T1im = Wim * Dre; \
|
|
T2re = Wim * Dim; \
|
|
T2im = Wre * Dim; \
|
|
T1re -= T2re; \
|
|
T1im += T2im; \
|
|
Wre += T1re; \
|
|
Wim += T1im; \
|
|
}
|
|
|
|
static inline void
|
|
reference_bitreverse_fft2analysis (const unsigned int n,
|
|
const double *X,
|
|
double *Y)
|
|
{
|
|
const unsigned int n2 = n >> 1, n1 = n + n2, max = n >> 2;
|
|
unsigned int i, r;
|
|
|
|
BUTTERFLY_10 (X[0], X[1],
|
|
X[n], X[n + 1],
|
|
Y[0], Y[1],
|
|
Y[2], Y[3],
|
|
__1, __0);
|
|
if (n < 4)
|
|
return;
|
|
BUTTERFLY_10 (X[n2], X[n2 + 1],
|
|
X[n1], X[n1 + 1],
|
|
Y[4], Y[5],
|
|
Y[6], Y[7],
|
|
__1, __0);
|
|
if (n < 8)
|
|
return;
|
|
for (i = 1, r = 0; i < max; i++)
|
|
{
|
|
unsigned int k, j = n >> 1;
|
|
|
|
while (r >= j)
|
|
{
|
|
r -= j;
|
|
j >>= 1;
|
|
}
|
|
r |= j;
|
|
|
|
k = r >> 1;
|
|
j = i << 3;
|
|
BUTTERFLY_10 (X[k], X[k + 1],
|
|
X[k + n], X[k + n + 1],
|
|
Y[j], Y[j + 1],
|
|
Y[j + 2], Y[j + 3],
|
|
__1, __0);
|
|
k += n2;
|
|
j += 4;
|
|
BUTTERFLY_10 (X[k], X[k + 1],
|
|
X[k + n], X[k + n + 1],
|
|
Y[j], Y[j + 1],
|
|
Y[j + 2], Y[j + 3],
|
|
__1, __0);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
reference_bitreverse_fft2synthesis (const unsigned int n,
|
|
const double *X,
|
|
double *Y)
|
|
{
|
|
const unsigned int n2 = n >> 1, n1 = n + n2, max = n >> 2;
|
|
unsigned int i, r;
|
|
double scale = n;
|
|
|
|
scale = 1.0 / scale;
|
|
BUTTERFLY_10scale (X[0], X[1],
|
|
X[n], X[n + 1],
|
|
Y[0], Y[1],
|
|
Y[2], Y[3],
|
|
scale);
|
|
if (n < 4)
|
|
return;
|
|
BUTTERFLY_10scale (X[n2], X[n2 + 1],
|
|
X[n1], X[n1 + 1],
|
|
Y[4], Y[5],
|
|
Y[6], Y[7],
|
|
scale);
|
|
if (n < 8)
|
|
return;
|
|
for (i = 1, r = 0; i < max; i++)
|
|
{
|
|
unsigned int k, j = n >> 1;
|
|
|
|
while (r >= j)
|
|
{
|
|
r -= j;
|
|
j >>= 1;
|
|
}
|
|
r |= j;
|
|
|
|
k = r >> 1;
|
|
j = i << 3;
|
|
BUTTERFLY_10scale (X[k], X[k + 1],
|
|
X[k + n], X[k + n + 1],
|
|
Y[j], Y[j + 1],
|
|
Y[j + 2], Y[j + 3],
|
|
scale);
|
|
k += n2;
|
|
j += 4;
|
|
BUTTERFLY_10scale (X[k], X[k + 1],
|
|
X[k + n], X[k + n + 1],
|
|
Y[j], Y[j + 1],
|
|
Y[j + 2], Y[j + 3],
|
|
scale);
|
|
}
|
|
}
|
|
|
|
static void
|
|
reference_power2_fftc (unsigned int n_values,
|
|
const double *rivalues_in,
|
|
double *rivalues,
|
|
int esign)
|
|
{
|
|
const unsigned int n_values2 = n_values << 1;
|
|
double theta = esign < 0 ? -3.1415926535897932384626433832795029 : 3.1415926535897932384626433832795029;
|
|
unsigned int block_size = 2 << 1;
|
|
double last_sin;
|
|
|
|
if (esign > 0)
|
|
reference_bitreverse_fft2analysis (n_values, rivalues_in, rivalues);
|
|
else
|
|
reference_bitreverse_fft2synthesis (n_values, rivalues_in, rivalues);
|
|
theta *= (double) 1.0 / 2.;
|
|
last_sin = sin (theta);
|
|
|
|
if (n_values < 4)
|
|
return;
|
|
|
|
do
|
|
{
|
|
double Dre, Dim, Wre, Wim;
|
|
unsigned int k, i, half_block = block_size >> 1;
|
|
unsigned int block_size2 = block_size << 1;
|
|
|
|
theta *= 0.5;
|
|
Dim = last_sin;
|
|
last_sin = sin (theta);
|
|
Dre = last_sin * last_sin * -2.;
|
|
|
|
/* loop over first coefficient in each block ==> w == {1,0} */
|
|
for (i = 0; i < n_values2; i += block_size2)
|
|
{
|
|
unsigned int v1 = i, v2 = i + block_size;
|
|
|
|
BUTTERFLY_10 (rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
__1, __0);
|
|
}
|
|
Wre = Dre + 1.0; /* update Wk */
|
|
Wim = Dim; /* update Wk */
|
|
/* loop for every Wk in the first half of each subblock */
|
|
for (k = 2; k < half_block; k += 2)
|
|
{
|
|
/* loop over kth coefficient in each block */
|
|
for (i = k; i < n_values2; i += block_size2)
|
|
{
|
|
unsigned int v1 = i, v2 = i + block_size;
|
|
|
|
BUTTERFLY_XY (rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
Wre, Wim);
|
|
}
|
|
WMULTIPLY (Wre, Wim, Dre, Dim); /* update Wk */
|
|
}
|
|
/* handle middle coefficient ==> w == {0,+-1} */
|
|
if (k < block_size)
|
|
{
|
|
/* loop over kth coefficient in each block */
|
|
if (esign > 0)
|
|
for (i = k; i < n_values2; i += block_size2)
|
|
{
|
|
unsigned int v1 = i, v2 = i + block_size;
|
|
|
|
BUTTERFLY_01 (rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
__0, __1);
|
|
}
|
|
else
|
|
for (i = k; i < n_values2; i += block_size2)
|
|
{
|
|
unsigned int v1 = i, v2 = i + block_size;
|
|
|
|
BUTTERFLY_0m (rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
__0, __1);
|
|
}
|
|
/* update Wk */
|
|
if (esign > 0)
|
|
{
|
|
Wre = -Dim;
|
|
Wim = Dre + 1.0;
|
|
}
|
|
else
|
|
{
|
|
Wre = Dim;
|
|
Wim = -Dre - 1.0;
|
|
}
|
|
k += 2;
|
|
}
|
|
/* loop for every Wk in the second half of each subblock */
|
|
for (; k < block_size; k += 2)
|
|
{
|
|
/* loop over kth coefficient in each block */
|
|
for (i = k; i < n_values2; i += block_size2)
|
|
{
|
|
unsigned int v1 = i, v2 = i + block_size;
|
|
|
|
BUTTERFLY_XY (rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
rivalues[v1], rivalues[v1 + 1],
|
|
rivalues[v2], rivalues[v2 + 1],
|
|
Wre, Wim);
|
|
}
|
|
WMULTIPLY (Wre, Wim, Dre, Dim); /* update Wk */
|
|
}
|
|
block_size = block_size2;
|
|
}
|
|
while (block_size <= n_values);
|
|
}
|