diff options
-rw-r--r-- | config.ld | 2 | ||||
-rw-r--r-- | fft.lua | 185 |
2 files changed, 146 insertions, 41 deletions
@@ -9,7 +9,7 @@ examples = "examples/" file = { "applause.lua", "sndfile-stream.lua", -- "sndfile.lua", - "filters.lua", "dssi.lua", "midi.lua", "evdev.lua" + "filters.lua", "fft.lua", "dssi.lua", "midi.lua", "evdev.lua" } no_space_before_args = true @@ -1,11 +1,10 @@ --- --- @module applause --- --- TODO: --- Documentation local bit = require "bit" local ffi = require "ffi" +-- FIXME: We return these numbers, so should this be public? local complex = ffi.typeof('double complex') -- NOTE: LuaJIT 2.1 does not yet support complex number arithmetics @@ -22,13 +21,9 @@ local function complex_conj(x) return complex(x.re, -x.im) end --- The FFT() function returns the complex frequency spectrum, but may modify the --- input array in-place. --- This is important in order to avoid reallocations when performing FFT at real-time. - +--[[ -- Direct application of the DFT formula. -- This has complexity O(n^2) and is here only for reference. ---[[ function FFT(samples, invert) local ret = table.new(#samples, 0) @@ -53,6 +48,7 @@ function FFT(samples, invert) end ]] +--[[ -- Naive implementation of a radix-2 decimation-in-time (DIT) FFT -- via the Cooley-Tukey algorithm. -- This has complexity O(n*log2(n)), but requires lots of allocations. @@ -60,11 +56,6 @@ end -- Samples can be real-values (audio samples), but also complex values -- (the frequency spectrum). -- #samples must be a power of 2. --- A frequency spectrum result should always be mirrored. --- --- Each bin i in the resulting frequency spectrum represents frequency --- (i-1)*samplerate/#samples. ---[[ function FFT(samples, inverse) if #samples == 1 then return {complex(samples[1])} end @@ -94,16 +85,29 @@ function FFT(samples, inverse) end ]] --- Implementation of a radix-2 decimation-in-time (DIT) FFT --- via the Cooley-Tukey algorithm. --- This is still an out-of-place algorithm, but requires allocations +--- Perform radix-2 decimation-in-time (DIT) FFT for internal usage. +-- This is an implementation of the Cooley-Tukey algorithm. +-- It is still an out-of-place algorithm, but requires allocations -- of auxilliary memory (2*N) only once. -- On the other hand, this version does not require a potentially costly -- bit reversal operation. -- This has complexity O(N*log2(N)). +-- This is used for both directions (FFT and IFFT). -- For real inputs, the complex output array will conjugate symmetrical. -- But the second half is actually cheap to calculate. --- See https://literateprograms.org/cooley-tukey_fft_algorithm__c_.html +-- @tparam {sample,...} x Array of real or complex samples +-- @int x_start First index in `x` to analyze. +-- @int N Number of samples in `x` to transform. +-- @int skip Number of entries to skip. +-- The function will convert `x[x_start]`, `x[x_start+skip]`, up tp +-- `x[x_start+skip*(N-1)]`. +-- @tab X Output table. It should be preallocated to a size of `X_start+N-1`. +-- @int X_start Index of first complex ouput sample in `X`. +-- @tab E Temporary storage table. It should be preallocated to a size of `E_start+N-1`. +-- @int E_start Index of first element in `E` to use. +-- @tparam {number,...} twiddles +-- Twiddle factors, precalculated via @{get_twiddles}. +-- @see https://literateprograms.org/cooley-tukey_fft_algorithm__c_.html local function FFT_internal(x, x_start, N, skip, X, X_start, E, E_start, twiddles) if N == 1 then -- NOTE: This is responsible for the mirrorring, important for FFT_backward() @@ -125,9 +129,16 @@ local function FFT_internal(x, x_start, N, skip, X, X_start, E, E_start, twiddle end end --- For real-valued inputs, complex outputs (audio samples to frequency spectrums): --- The frequency spectrum is always conjugate symmetric, so `out` can be #samples/2+1 +--- Perform forward-FFT. +-- For real-valued inputs (audio samples), the outputs will be complex (frequency spectrums). +-- The frequency spectrum is always conjugate symmetric, so `out` can be half the size of `samples`, -- which significantly simplifies manipulations in the frequency domain. +-- @tparam {number,...} samples Audio samples to transform. +-- @tab out The output array. Should be preallocated to `#samples/2+1`. +-- @tab E Temporary storage table. It should be preallocated to a size of `#samples`. +-- @tparam {number,...} twiddles +-- Twiddle factors, precalculated via `get_twiddles(#samples)`. +-- @return The output array `out`. local function FFT_forward(samples, out, E, twiddles) local D, D_start = E, 1+#samples/2 @@ -146,9 +157,15 @@ local function FFT_forward(samples, out, E, twiddles) return out end --- For complex-valued inputs and real-valued outputs (frequency spectrums to audio samples): +--- Perform backward-FFT. +-- For complex-valued inputs (frequency spectrum), the outputs will be real-valued (audio samples): -- The frequency spectrum is made to be mirrored, so `out` will be twice the length of `samples` minus 2. --- NOTE: You will still need to generate twiddles with get_twiddles(true). +-- @tparam {complex,...} samples Spectrum samples (`double complex` C type) to transform. +-- @tab out The output array. Should be preallocated to `(#samples-1)*2`. +-- @tab E Temporary storage table. It should be preallocated to a size of `(#samples-1)*2`. +-- @tparam {number,...} twiddles +-- Twiddle factors, precalculated via `get_twiddles(#samples, true)`. +-- @return The output array `out`. local function FFT_backward(samples, out, E, twiddles) local size = (#samples-1)*2 -- number of output samples local D, D_start = E, 1+size/2 @@ -166,6 +183,10 @@ local function FFT_backward(samples, out, E, twiddles) return out end +--- Precalculate twiddle factors. +-- @int N Number of output samples to calculcate twiddle factors for. +-- @bool[opt=false] inverse If true, calculcate factors for inverse transformations. +-- @treturn {number,...} `N/2` twiddle factors. local function get_twiddles(N, inverse) local ret = table.new(N/2, 0) for k = 1, N/2 do @@ -177,6 +198,22 @@ local function get_twiddles(N, inverse) return ret end +--- Transform fixed number of audio samples to frequency spectrum (FFT). +-- The spectrum is represented by complex numbers (C type: `double complex`). +-- The resulting spectrum is conjuage symmetric and the redundant half is +-- therefore not returned. +-- Each bin `i` in the resulting frequency spectrum represents frequency +-- `(i-1)*samplerate/#samples`. +-- The number of input samples therefore also restricts the frequency resolution. +-- For the human ear, a buffer size of 2048 would be more than enough. +-- @tparam {number,...}|Stream samples +-- The audio samples to transform. +-- This can be a Stream, but only if it is not infinite (it will be automatically +-- converted to a table). +-- The number of samples must be a power of 2. +-- @treturn {complex,...} +-- The complex spectrum of size `#samples/2+1`. +-- @usage FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024)) function FFT(samples) assert(type(samples) == "table") if samples.is_a_stream then samples = samples:totable() end @@ -190,8 +227,13 @@ function FFT(samples) return FFT_forward(samples, out, scratch, twiddles) end --- Convert complex frequency spectrum to magnitude/amplitude per frequency. --- FIXME: Should this be a Stream method? +--- Convert complex frequency spectrum to magnitude/amplitude per frequency. +-- This conversion is **inplace**. +-- @tparam {complex,...} spectrum Frequency spectrum. +-- @treturn {number,...} The magnitudes per frequency (same table as `spectrum`). +-- @see FFT +-- @usage tostream(magnitude(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024)))):gnuplot() +-- @fixme Should this be a Stream method? -- Would also resolve having to choose between inplace and out-of-place calculation. function magnitude(spectrum) for i = 1, #spectrum do @@ -200,9 +242,17 @@ function magnitude(spectrum) end return spectrum end --- Convert frequency spectrum to phase in value between [0,1]. --- Why the threshold is necessary, see --- https://www.gaussianwaves.com/2015/11/interpreting-fft-results-obtaining-magnitude-and-phase-information/ + +--- Convert frequency spectrum to phase in value between [0,1]. +-- This conversion is **inplace**. +-- @tparam {complex,...} spectrum Frequency spectrum. +-- @number[opt=0.1] threshold +-- The magnitude threshold. Why this is necessary, see +-- [this blog post](https://www.gaussianwaves.com/2015/11/interpreting-fft-results-obtaining-magnitude-and-phase-information/). +-- @treturn {number,...} The phase value between [0,1] per frequency (same table as `spectrum`). +-- @see FFT +-- @see magnitude +-- @usage tostream(phase(FFT(Stream.SinOsc(samplerate*20/1024, 0.7):sub(1, 1024)))):gnuplot() function phase(spectrum, threshold) threshold = threshold or 0.1 for i = 1, #spectrum do @@ -212,9 +262,20 @@ function phase(spectrum, threshold) return spectrum end --- Inverse FFT. --- Converts complex samples in the frequency domain to real-values samples in the --- time domain (ie. sound). +--- Transform fixed number of frequency bins to audio samples (inverse FFT). +-- The spectrum is represented by complex numbers (C type: `double complex`). +-- The spectrum is assumed to be conjuage symmetric and the redundant half is +-- not required to be provided. +-- @tparam {complex,...}|Stream spectrum +-- The frequency bins to transform. +-- This can be a Stream, but only if it is not infinite (it will be automatically +-- converted to a table). +-- This spectrum length must be such that the resulting number of audio samples +-- is a power of 2. +-- @treturn {number,...} +-- The audio samples of size `(#spectrum-1)*2`. +-- @see FFT +-- @usage IFFT(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024))):gnuplot() function IFFT(spectrum) assert(type(spectrum) == "table") if spectrum.is_a_stream then spectrum = spectrum:totable() end @@ -229,13 +290,27 @@ function IFFT(spectrum) return tostream(FFT_backward(spectrum, out, scratch, twiddles)) end --- Analyze a potentially realtime stream --- This gives a stream of frequency spectrums --- (of half the length). --- FIXME: It's unelegant to pass in the window function as an argument. +--- Analyze frequencies of a potentially infinite realtime stream via FFT. +-- This partitions the source stream into buffers of a given `size`, +-- optionally maps a window function and eventually applies the forward FFT, +-- resulting in a stream of frequency spectrums (STFT). +-- @within Class Stream +-- @int size The size of the FFT buffer. This must be a power of 2. +-- @func[opt] window_fnc +-- An optional window function to apply to the audio data before FFT conversion. +-- @treturn Stream +-- Stream of frequency spectrums (arrays of complex numbers, ie. +-- C type `double complex`). +-- These arrays will have size `size/2+1`. +-- @see Hamming +-- @see Hanning +-- @see FFT +-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play() +-- +-- @fixme It's unelegant to pass in the window function as an argument. -- This could only be avoided by moving the partitioning out of this method --- and repeating the buffer size once again. E.g.: --- foo:partition(1024):map(Hamming):FFT(1024)... +-- and repeating the buffer size once again. +-- E.g.: `foo:partition(1024):map(Hamming):FFT(1024)...`. -- Of course, if we had another FFT implementation like Danielson-Lanczos, -- that does not require preallocation of a few arrays, this problem also wouldn't -- exist. @@ -257,9 +332,21 @@ function Stream:FFT(size, window_fnc) end) end --- NOTE: Here `size` refers to the size of the time domain chunks, --- ie. the same value you pass into Stream:FFT(). --- A different FFT implementation like Danielson-Lanczos that works entirely +--- Synthesize audio samples from a stream of frequency spectrums via inverse FFT. +-- This is the inverse of @{Stream:FFT}. +-- Note that when performing FFT and IFFT on a real-time stream, as is usually done, +-- this will introduce a latency of `size/samplerate` seconds. +-- @within Class Stream +-- @int size +-- The size of the time domain chunks (ie. the same value passed into @{Stream:FFT}). +-- This will be twice the size of the spectrum arrays minus 2. +-- @treturn Stream +-- A stream of real-valued audio samples synthesized from the frequency spectrums. +-- @see Stream:FFT +-- @see IFFT +-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play() +-- +-- @fixme A different FFT implementation like Danielson-Lanczos that works entirely -- inplace, might not need repitition of these sizes. function Stream:IFFT(size) -- NOTE: The size could be inferred from the first spectrum array, @@ -284,12 +371,21 @@ end -- See also the commented-out code in filters.lua. -- +--- Apply Hamming window to a fixed number of samples. +-- @tparam {number,...}|Stream samples +-- The audio samples to window. +-- This can be a Stream, but only if it is not infinite (it will be automatically +-- converted to a table). +-- @treturn {number,...} +-- The windowed samples. +-- The conversion is **inplace**, so this may be the same table as `samples`. +-- @see Hanning +-- @see Stream:FFT +-- @usage tostream(magnitude(FFT(Hamming(Stream.SinOsc(samplerate*20.5/1024):sub(1, 1024))))):gnuplot() function Hamming(samples) assert(type(samples) == "table") if samples.is_a_stream then samples = samples:totable() end - --assert(bit.band(#samples, #samples-1) == 0, "Array length needs to be a power of 2") - local cos = math.cos local pi2 = 2*math.pi @@ -301,7 +397,16 @@ function Hamming(samples) return samples end --- Also called Hann +--- Apply Hann window to a fixed number of samples. +-- @tparam {number,...}|Stream samples +-- The audio samples to window. +-- This can be a Stream, but only if it is not infinite (it will be automatically +-- converted to a table). +-- @treturn {number,...} +-- The windowed samples. +-- The conversion is **inplace**, so this may be the same table as `samples`. +-- @see Hamming +-- @see Stream:FFT function Hanning(samples) assert(type(samples) == "table") if samples.is_a_stream then samples = samples:totable() end |