summaryrefslogtreecommitdiffstats
path: root/FFT_Test/fft_hardware.hcc
diff options
context:
space:
mode:
Diffstat (limited to 'FFT_Test/fft_hardware.hcc')
-rw-r--r--FFT_Test/fft_hardware.hcc449
1 files changed, 449 insertions, 0 deletions
diff --git a/FFT_Test/fft_hardware.hcc b/FFT_Test/fft_hardware.hcc
new file mode 100644
index 0000000..8977a83
--- /dev/null
+++ b/FFT_Test/fft_hardware.hcc
@@ -0,0 +1,449 @@
+#include <pal_master.hch>
+#include <stdlib.hch>
+#include "weights256.hch"
+#include "config.hch"
+#include "debug.hch"
+#include "xilinxmult.hch"
+
+#define PERFORM_FFT_CALCULATION 1
+#define USE_UNSIGNED_AUDIO 0
+#define PRINT_DEBUG 0
+/* Define two multi-port RAMs for FFT calculation; one for real and one for imaginary values
+ * Extra block RAM settings are defined to make sure read and write actions can be performed
+ * within one clock-cycle.
+ * Left out extra settings on new board the clock changes TODO !!!!
+ */
+#if HARDWARE_MULTIPLY
+mpram
+{
+ ram signed 18 rwrite[256];
+ rom signed 18 read[256];
+} real with {block = "BlockRAM"/*, westart=2.5, welength=1, rclkpos={1.5}, wclkpos={3}, clkpulselen=0.5*/};
+
+mpram
+{
+ ram signed 18 rwrite[256];
+ rom signed 18 read[256];
+} imaginary with {block = "BlockRAM"/*, westart=2.5, welength=1, rclkpos={1.5}, wclkpos={3}, clkpulselen=0.5*/};
+#else
+mpram
+{
+ ram signed 24 rwrite[256];
+ rom signed 24 read[256];
+} real with {block = "BlockRAM"/*, westart=2.5, welength=1, rclkpos={1.5}, wclkpos={3}, clkpulselen=0.5*/};
+
+mpram
+{
+ ram signed 24 rwrite[256];
+ rom signed 24 read[256];
+} imaginary with {block = "BlockRAM"/*, westart=2.5, welength=1, rclkpos={1.5}, wclkpos={3}, clkpulselen=0.5*/};
+#endif
+// multiplication factors for equalizer function
+ram signed 7 eq_settings[16] = {0,2,4,7,10,13,16,19,22,26,30,35,41,48,55,63};
+
+
+/****************************************************************
+* Function: multiply *
+* *
+* Arguments *
+* x,y signed variables *
+* *
+* Description *
+* Just a multiplier. But by doing this in a function the *
+* FPGA space needed is reduced. *
+* *
+* Return Values *
+* The result after multiplication *
+****************************************************************/
+macro proc multiply(result, op_a, op_b)
+{
+#if HARDWARE_MULTIPLY
+ xilinxmult(result, op_a, adjs(op_b,18));
+#else
+ result = (adjs(op_a,38))*(adjs(op_a,38));
+#endif
+}
+
+
+
+
+/*******************************************************************
+* Function: calculate_fft *
+* *
+* Arguments *
+* select_inverse Boolean that indicates FFT or iFFT calculation *
+* *
+* Description *
+* This routine performs the Fast Fourier Transform for *
+* calculation of the frequency spectrum *
+* *
+*******************************************************************/
+void calculate_fft(unsigned 1 select_inverse)
+{
+ unsigned 4 level;
+ unsigned 8 point1,point2,j,f,k;
+ unsigned 9 e,i;
+ signed 16 weight1,weight2;
+#if HARDWARE_MULTIPLY
+ signed 18 p,q,r,t;
+#else
+ signed 24 p,q,r,t;
+#endif
+ signed a,b;
+
+#if HARDWARE_MULTIPLY
+ // Macro to provide rescaling of 36-bit result of fixed point multiply
+ // down to an 18-bit result. The range of bits selected depends on the
+ // number that represents the value of "1" in the trig function lookup
+ // tables. (Eg. for 16384 == 1, the lowest bit selected should be [14]).
+ macro expr rescale (x) = (x[35] @ x[30:14]);
+#else
+ //Macro to rescale the multiply result down to a 24-bit value.
+ macro expr rescale (x) = ((x>>FRACBITS)<-24);
+#endif
+
+ for(level=1;level<=NUMBER_OF_COLUMNS;level++) // count all the columns
+ {
+ e=1<<(NUMBER_OF_COLUMNS-level+1); // number of points in each block in this column
+ f=(e>>1)<-8; // number of butterflies in each block in this column
+
+ for(j=1;j<=f;j++) // count all the butterflies in each block
+ {
+ par
+ {
+ // Weight factors for real (the same for FFT and iFFT)
+ weight1 = weight_re[((j-1)<<(level-1))<-7];
+
+
+ // Weight factors for imaginary (opposite for FFT and iFFT)
+ weight2 = (!select_inverse) ? (weight_im[((j-1)<<(level-1))<-7]) : -(weight_im[((j-1)<<(level-1))<-7]);
+
+ /* ORIGINAL CODE BELOW, MODIFIED BECAUSE OF MISMATCHING OUTPUT WITH BORLAND TESTAPP
+ weight2 = (!select_inverse) ? -(weight_im[((j-1)<<(level-1))<-7]) : weight_im[((j-1)<<(level-1))<-7];
+ */
+
+
+
+ for(i=0@j;i<=NUMBER_OF_POINTS;i+=e) // count all the blocks in this column
+ { // Butterfly calculation
+ par
+ {
+ point1 = ((i<-8)-1);
+ point2 = (((i<-8)+f)-1);
+ }
+
+ par
+ {
+ p = (real.read[point1] >> 1) + (real.rwrite[point2] >> 1);
+ q = (imaginary.read[point1] >> 1) + (imaginary.rwrite[point2] >> 1);
+ }
+
+ par
+ {
+ r = (real.read[point1] >> 1) - (real.rwrite[point2] >> 1);
+ t = (imaginary.read[point1] >> 1) - (imaginary.rwrite[point2] >> 1);
+ }
+
+ multiply(a,r,weight1);
+ multiply(b,t,weight2);
+
+ par
+ {
+ real.rwrite[point2] = (rescale(a-b));
+ imaginary.rwrite[point1] = q;
+ }
+
+ multiply(a,t,weight1);
+ multiply(b,r,weight2);
+
+ par
+ {
+ real.rwrite[point1] = p;
+ imaginary.rwrite[point2] = (rescale(a+b));
+ }
+
+ }
+ }
+ }
+ }
+
+ j=1;
+ for(i=1;i<NUMBER_OF_POINTS;i++)
+ {
+ if(i<(0@j))
+ {
+ par
+ {
+ point1=j-1;
+ point2=(i-1)<-8;
+ }
+ /*
+ COPYING ARRAY VALUES FROM ONE PLACE TO ANOTHER IN THE ARRAT MUST BE DONE IN
+ 2 STEPS. FIRSTLY THE VALUES ARE COPIED TO SEPARATE VARIABLES AFTER THAT THEY
+ ARE COPIED BACK TO THEIR NEW POSITION IN THE ARRAY. THIS MUST BE DONE TO
+ PREVENT TIMING ISSUES FROM OCCURING.
+ */
+ par
+ {
+ p = real.read[point1];
+ q = imaginary.read[point1];
+ }
+ par
+ {
+ r = real.read[point2];
+ t = imaginary.read[point2];
+ }
+ par
+ {
+ real.rwrite[point1] = r;
+ imaginary.rwrite[point1] = t;
+ }
+ par
+ {
+ real.rwrite[point2] = p;
+ imaginary.rwrite[point2] = q;
+ }
+ }
+
+ k = NUMBER_OF_POINTS>>1;
+
+
+ while(k<j)
+ {
+ j = j-k;
+ k = k>>1;
+ }
+
+ j+=k;
+ }
+
+}
+
+/********************************************************************/
+#if HARDWARE_MULTIPLY
+void perform_fft(signed 18 *pcm_audio)
+#else
+void perform_fft(signed 16 *pcm_audio)
+#endif
+{
+ unsigned 8 k;
+
+
+#if HARDWARE_MULTIPLY
+ signed 18 sample;
+ k=0;
+ sample = adjs(pcm_audio[k],18);
+#else
+ signed 24 sample;
+ k=0;
+ sample = adjs(pcm_audio[k],24);
+#endif
+
+ //initialize variables for the copying pipeline
+
+
+ // copy audio data to real-array before starting FFT calculation
+ // and set imaginary values to zero
+ do
+ {
+ //Copying the array values has been pipelined to prevent parallel access to the
+ //pcm_audio array. This copying procedure must be finished before another
+ //sample is read from the audio input. The time available for this loop is
+ //determined by the sampling rate of 44,1 Khz
+ par
+ {
+ //COPYING NEEDS TO BE DONE IN 2 STEPS, BECAUSE THE VALUE THAT NEEDS TO WRITTEN
+ //TO THE REAL-RAM NEEDS TO BE AVAILABLE ON THE START OFF THE CLOCKCYCLE.
+#if HARDWARE_MULTIPLY
+ sample = adjs(pcm_audio[k+1],18);
+#else
+ sample = adjs(pcm_audio[k+1],24);
+#endif
+ real.rwrite[k] = sample;
+ imaginary.rwrite[k] = 0;
+ k++;
+ }
+ } while (k);
+
+
+
+#if PERFORM_FFT_CALCULATION
+ calculate_fft(0);
+#endif
+
+
+}
+
+/********************************************************************/
+#if HARDWARE_MULTIPLY
+void perform_ifft(signed 18 *modified_audio/*, unsigned 6 *ifft_info*/)
+#else
+void perform_ifft(signed 16 *modified_audio/*, unsigned 6 *ifft_info*/)
+#endif
+{
+ unsigned 6 k;
+#if HARDWARE_MULTIPLY
+ signed 18 p;
+#else
+ signed 24 p;
+#endif
+#if PERFORM_FFT_CALCULATION
+ calculate_fft(1);
+#endif
+
+ k=0;
+//initialize variables for the copying pipeline
+#if PERFORM_FFT_CALCULATION
+ #if HARDWARE_MULTIPLY
+ p = (real.read[(0@k)+95] << NUMBER_OF_COLUMNS);
+ #else
+ p = (real.read[(0@k)+95] >> NUMBER_OF_COLUMNS);
+ #endif
+#else
+ p = (real.read[(0@k)+95]);
+#endif
+
+ do
+ {
+ //Copying the array values has been pipelined to prevent parallel access to the
+ //pcm_audio array. This copying procedure must be finished before another
+ //sample is read from the audio input. The time available for this loop is
+ //determined by the sampling rate of 44,1 Khz
+ par
+ {
+#if PERFORM_FFT_CALCULATION
+ #if HARDWARE_MULTIPLY
+ p = (real.read[(0@k)+95] << NUMBER_OF_COLUMNS);
+ #else
+ p = (real.read[(0@k)+95] >> NUMBER_OF_COLUMNS);
+ #endif
+#else
+ p = (real.read[(0@k)+95]);
+#endif
+#if HARDWARE_MULTIPLY
+ modified_audio[k] = p ;
+#else
+ modified_audio[k] = (p<-16);
+#endif
+ //ifft_info[k] = (unsigned)(p[15:10]);
+ k++;
+ }
+ } while(k);
+}
+
+/********************************************************************/
+void equalize_audio(unsigned 4 *eq_level, unsigned 7 *fft_info)
+{
+#if 1
+#if HARDWARE_MULTIPLY
+ signed 18 p,q;
+#else
+ signed 24 p,q;
+#endif
+ signed 16 a;
+ unsigned 8 i, mirror_i, bit, m, n;
+ unsigned 7 old_value;
+ unsigned 9 tmp;
+
+ //macro expr equalize_bar = multiply(q,a)[29:6];
+
+ macro proc equalize_bar(retval)
+ {
+ signed result;
+ multiply(result, q,a);
+#if HARDWARE_MULTIPLY
+ retval = result[23:6]; //drop last 6 bit to compensate the maximum multiplication with 64 from the eq_settings array
+#else
+ retval = result[29:6]; //drop last 6 bit to compensate the maximum multiplication with 64 from the eq_settings array
+#endif
+ }
+#if HARDWARE_MULTIPLY
+ p = real.read[0] - DC_COMPONENT_HARDWARE;
+#else
+ p = real.read[0] - DC_COMPONENT; // remove DC component for calculations
+#endif
+ real.rwrite[0] = p;
+// imaginary.rwrite[0] = 0; // remove DC component
+
+
+ for(i=0;i<=NUMBER_OF_FREQUENCIES;i++)
+ {
+ // set multiplication factor (0..64) for current frequency bar
+ a = 63;// adjs(eq_settings[eq_level[i<-7]],16);
+
+ // multiply frequency with this factor and divide by 64 (drop 6 LSB's)
+ q = real.read[i];
+ equalize_bar(p);
+ real.rwrite[i] = p;
+
+ q = imaginary.read[i];
+ equalize_bar(p);
+ imaginary.rwrite[i] = p;
+
+ // the upper part(128..255) of the spectrum is mirrored to the lower part;
+ // these values need to be adjusted too
+ if ((i<-7)!=0) // if not in DC component bar
+ {
+ mirror_i = (NUMBER_OF_POINTS-1)-i+1;
+ q = real.read[mirror_i];
+ equalize_bar(p);
+ real.rwrite[mirror_i] = p;
+
+ q = imaginary.read[mirror_i];
+ equalize_bar(p);
+ imaginary.rwrite[mirror_i] = p;
+ }
+ }
+
+ //write data to fft_info for display purposes
+ for(i=0;i<NUMBER_OF_FREQUENCIES;i++)
+ {
+ p = real.read[i];
+ q = imaginary.read[i];
+#if HARDWARE_MULTIPLY
+ if (p[17] == 1) p = -p; else delay;
+ if (q[17] == 1) q = -q; else delay;
+#else
+ if (p[23] == 1) p = -p; else delay;
+ if (q[23] == 1) q = -q; else delay;
+#endif
+ p = (p<q) ? q : p; // This is done to get the best visual frequency result
+
+ if (display_log)
+ {
+
+ bit=126;
+#if HARDWARE_MULTIPLY
+ while ((p[15] == 0) && (bit != 0))
+#else
+ while ((p[21] == 0) && (bit != 0))
+#endif
+ par
+ {
+ p = p<<1;
+ bit = bit - 18;
+ }
+ old_value = fft_info[i<-7];
+ tmp = ((0@old_value) + (0@bit))>>1;
+ fft_info[i<-7] = (old_value <= (tmp<-7)) ? (tmp<-7) : old_value-1;
+ }
+ else
+ {
+ old_value = fft_info[i<-7];
+#if HARDWARE_MULTIPLY
+ fft_info[i<-7] = (old_value<=(unsigned)(p[15:9])) ? (unsigned)(p[15:9]) : old_value-1;
+#else
+ fft_info[i<-7] = (old_value<=(unsigned)(p[21:15])) ? (unsigned)(p[21:15]) : old_value-1;
+#endif
+ }
+ }
+
+ // add DC component again before inverse FFT calculation is performed
+#if HARDWARE_MULTIPLY
+ p = real.read[0] + DC_COMPONENT_HARDWARE;
+#else
+ p = real.read[0] + DC_COMPONENT;
+#endif
+ real.rwrite[0] = p;
+#endif
+}