From fba8175e99f7b145a4e668b9f1f484ff0aa93eb6 Mon Sep 17 00:00:00 2001
From: Scott Lawson <scottlawsonbc@gmail.com>
Date: Sun, 29 Jan 2017 00:41:13 -0800
Subject: [PATCH] Add zero padded FFT calculation, could help #25

Zero pads the audio time samples until the length is equal to the next largest power of two. This improves the algorithmic complexity of the FFT calculations.

Initialized the Hamming window when the module is loaded instead of every loop.

Replaced a call to numpy.roll() with direct array index manipulation.
---
 python/visualization.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/python/visualization.py b/python/visualization.py
index db16725..88097bb 100644
--- a/python/visualization.py
+++ b/python/visualization.py
@@ -167,7 +167,7 @@ mel_smoothing = dsp.ExpFilter(np.tile(1e-1, config.N_FFT_BINS),
                          alpha_decay=0.5, alpha_rise=0.99)
 volume = dsp.ExpFilter(config.MIN_VOLUME_THRESHOLD,
                        alpha_decay=0.02, alpha_rise=0.02)
-
+fft_window = np.hamming(int(config.MIC_RATE / config.FPS) * config.N_ROLLING_HISTORY)
 # Keeps track of the number of buffer overflows
 # Lots of buffer overflows could mean that FPS is set too high
 buffer_overflows = 1
@@ -186,7 +186,7 @@ def microphone_update(stream):
     # Normalize samples between 0 and 1
     y = y / 2.0**15
     # Construct a rolling window of audio samples
-    y_roll = np.roll(y_roll, -1, axis=0)
+    y_roll[:-1] = y_roll[1:]
     y_roll[-1, :] = np.copy(y)
     y_data = np.concatenate(y_roll, axis=0)
     volume.update(np.nanmean(y_data ** 2))
@@ -197,10 +197,12 @@ def microphone_update(stream):
         led.update()
     else:
         # Transform audio input into the frequency domain
-        XS, YS = dsp.fft(y_data, window=np.hamming)
-        # Remove half of the FFT data because of symmetry
-        YS = YS[:len(YS) // 2]
-        XS = XS[:len(XS) // 2]
+        N = len(y_data)
+        N_zeros = 2**int(np.ceil(np.log2(N))) - N
+        # Pad with zeros until the next power of two
+        y_data *= fft_window
+        y_padded = np.pad(y_data, (0, N_zeros), mode='constant')
+        YS = np.abs(np.fft.rfft(y_padded)[:N // 2])
         # Construct a Mel filterbank from the FFT data
         mel = np.atleast_2d(np.abs(YS)).T * dsp.mel_y.T
         # Scale data to values more suitable for visualization