fft.lua: documented module

author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2023-12-19 19:30:18 +0300
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2023-12-19 19:38:32 +0300
commit: 2bbaaaf6c67f2f284e74cd46c804f7267f5439a6 (patch)
tree: 8402b3fca30c2dc3b111e577c31ee86392d3f6a4
parent: 664cd1590ecb716b60dc75411eb873b130b2f38b (diff)
download: applause2-2bbaaaf6c67f2f284e74cd46c804f7267f5439a6.tar.gz
2 files changed, 146 insertions, 41 deletions
diff --git a/config.ld b/config.ld
index b7b4ad5..cbee1b4 100644
--- a/config.ld
+++ b/config.ld
@@ -9,7 +9,7 @@ examples = "examples/"
 
 file = {
 	"applause.lua", "sndfile-stream.lua", -- "sndfile.lua",
-	"filters.lua", "dssi.lua", "midi.lua", "evdev.lua"
+	"filters.lua", "fft.lua", "dssi.lua", "midi.lua", "evdev.lua"
 }
 
 no_space_before_args = true
diff --git a/fft.lua b/fft.lua
index 4f63c0e..f2f3bd9 100644
--- a/fft.lua
+++ b/fft.lua
@@ -1,11 +1,10 @@
 ---
 --- @module applause
 ---
--- TODO:
--- Documentation
 local bit = require "bit"
 local ffi = require "ffi"
 
+-- FIXME: We return these numbers, so should this be public?
 local complex = ffi.typeof('double complex')
 
 -- NOTE: LuaJIT 2.1 does not yet support complex number arithmetics
@@ -22,13 +21,9 @@ local function complex_conj(x)
 	return complex(x.re, -x.im)
 end
 
--- The FFT() function returns the complex frequency spectrum, but may modify the
--- input array in-place.
--- This is important in order to avoid reallocations when performing FFT at real-time.
-
+--[[
 -- Direct application of the DFT formula.
 -- This has complexity O(n^2) and is here only for reference.
---[[
 function FFT(samples, invert)
 	local ret = table.new(#samples, 0)
 
@@ -53,6 +48,7 @@ function FFT(samples, invert)
 end
 ]]
 
+--[[
 -- Naive implementation of a radix-2 decimation-in-time (DIT) FFT
 -- via the Cooley-Tukey algorithm.
 -- This has complexity O(n*log2(n)), but requires lots of allocations.
@@ -60,11 +56,6 @@ end
 -- Samples can be real-values (audio samples), but also complex values
 -- (the frequency spectrum).
 -- #samples must be a power of 2.
--- A frequency spectrum result should always be mirrored.
---
--- Each bin i in the resulting frequency spectrum represents frequency
--- (i-1)*samplerate/#samples.
---[[
 function FFT(samples, inverse)
 	if #samples == 1 then return {complex(samples[1])} end
 
@@ -94,16 +85,29 @@ function FFT(samples, inverse)
 end
 ]]
 
--- Implementation of a radix-2 decimation-in-time (DIT) FFT
--- via the Cooley-Tukey algorithm.
--- This is still an out-of-place algorithm, but requires allocations
+--- Perform radix-2 decimation-in-time (DIT) FFT for internal usage.
+-- This is an implementation of the Cooley-Tukey algorithm.
+-- It is still an out-of-place algorithm, but requires allocations
 -- of auxilliary memory (2*N) only once.
 -- On the other hand, this version does not require a potentially costly
 -- bit reversal operation.
 -- This has complexity O(N*log2(N)).
+-- This is used for both directions (FFT and IFFT).
 -- For real inputs, the complex output array will conjugate symmetrical.
 -- But the second half is actually cheap to calculate.
--- See https://literateprograms.org/cooley-tukey_fft_algorithm__c_.html
+-- @tparam {sample,...} x Array of real or complex samples
+-- @int x_start First index in `x` to analyze.
+-- @int N Number of samples in `x` to transform.
+-- @int skip Number of entries to skip.
+--   The function will convert `x[x_start]`, `x[x_start+skip]`, up tp
+--   `x[x_start+skip*(N-1)]`.
+-- @tab X Output table. It should be preallocated to a size of `X_start+N-1`.
+-- @int X_start Index of first complex ouput sample in `X`.
+-- @tab E Temporary storage table. It should be preallocated to a size of `E_start+N-1`.
+-- @int E_start Index of first element in `E` to use.
+-- @tparam {number,...} twiddles
+--   Twiddle factors, precalculated via @{get_twiddles}.
+-- @see https://literateprograms.org/cooley-tukey_fft_algorithm__c_.html
 local function FFT_internal(x, x_start, N, skip, X, X_start, E, E_start, twiddles)
 	if N == 1 then
 		-- NOTE: This is responsible for the mirrorring, important for FFT_backward()
@@ -125,9 +129,16 @@ local function FFT_internal(x, x_start, N, skip, X, X_start, E, E_start, twiddle
 	end
 end
 
--- For real-valued inputs, complex outputs (audio samples to frequency spectrums):
--- The frequency spectrum is always conjugate symmetric, so `out` can be #samples/2+1
+--- Perform forward-FFT.
+-- For real-valued inputs (audio samples), the outputs will be complex (frequency spectrums).
+-- The frequency spectrum is always conjugate symmetric, so `out` can be half the size of `samples`,
 -- which significantly simplifies manipulations in the frequency domain.
+-- @tparam {number,...} samples Audio samples to transform.
+-- @tab out The output array. Should be preallocated to `#samples/2+1`.
+-- @tab E Temporary storage table. It should be preallocated to a size of `#samples`.
+-- @tparam {number,...} twiddles
+--   Twiddle factors, precalculated via `get_twiddles(#samples)`.
+-- @return The output array `out`.
 local function FFT_forward(samples, out, E, twiddles)
 	local D, D_start = E, 1+#samples/2
 
@@ -146,9 +157,15 @@ local function FFT_forward(samples, out, E, twiddles)
 	return out
 end
 
--- For complex-valued inputs and real-valued outputs (frequency spectrums to audio samples):
+--- Perform backward-FFT.
+-- For complex-valued inputs (frequency spectrum), the outputs will be real-valued (audio samples):
 -- The frequency spectrum is made to be mirrored, so `out` will be twice the length of `samples` minus 2.
--- NOTE: You will still need to generate twiddles with get_twiddles(true).
+-- @tparam {complex,...} samples Spectrum samples (`double complex` C type) to transform.
+-- @tab out The output array. Should be preallocated to `(#samples-1)*2`.
+-- @tab E Temporary storage table. It should be preallocated to a size of `(#samples-1)*2`.
+-- @tparam {number,...} twiddles
+--   Twiddle factors, precalculated via `get_twiddles(#samples, true)`.
+-- @return The output array `out`.
 local function FFT_backward(samples, out, E, twiddles)
 	local size = (#samples-1)*2 -- number of output samples
 	local D, D_start = E, 1+size/2
@@ -166,6 +183,10 @@ local function FFT_backward(samples, out, E, twiddles)
 	return out
 end
 
+--- Precalculate twiddle factors.
+-- @int N Number of output samples to calculcate twiddle factors for.
+-- @bool[opt=false] inverse If true, calculcate factors for inverse transformations.
+-- @treturn {number,...} `N/2` twiddle factors.
 local function get_twiddles(N, inverse)
 	local ret = table.new(N/2, 0)
 	for k = 1, N/2 do
@@ -177,6 +198,22 @@ local function get_twiddles(N, inverse)
 	return ret
 end
 
+--- Transform fixed number of audio samples to frequency spectrum (FFT).
+-- The spectrum is represented by complex numbers (C type: `double complex`).
+-- The resulting spectrum is conjuage symmetric and the redundant half is
+-- therefore not returned.
+-- Each bin `i` in the resulting frequency spectrum represents frequency
+-- `(i-1)*samplerate/#samples`.
+-- The number of input samples therefore also restricts the frequency resolution.
+-- For the human ear, a buffer size of 2048 would be more than enough.
+-- @tparam {number,...}|Stream samples
+--   The audio samples to transform.
+--   This can be a Stream, but only if it is not infinite (it will be automatically
+--   converted to a table).
+--   The number of samples must be a power of 2.
+-- @treturn {complex,...}
+--   The complex spectrum of size `#samples/2+1`.
+-- @usage FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024))
 function FFT(samples)
 	assert(type(samples) == "table")
 	if samples.is_a_stream then samples = samples:totable() end
@@ -190,8 +227,13 @@ function FFT(samples)
 	return FFT_forward(samples, out, scratch, twiddles)
 end
 
--- Convert complex frequency spectrum to magnitude/amplitude per frequency.
--- FIXME: Should this be a Stream method?
+--- Convert complex frequency spectrum to magnitude/amplitude per frequency.
+-- This conversion is **inplace**.
+-- @tparam {complex,...} spectrum Frequency spectrum.
+-- @treturn {number,...} The magnitudes per frequency (same table as `spectrum`).
+-- @see FFT
+-- @usage tostream(magnitude(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024)))):gnuplot()
+-- @fixme Should this be a Stream method?
 -- Would also resolve having to choose between inplace and out-of-place calculation.
 function magnitude(spectrum)
 	for i = 1, #spectrum do
@@ -200,9 +242,17 @@ function magnitude(spectrum)
 	end
 	return spectrum
 end
--- Convert frequency spectrum to phase in value between [0,1].
--- Why the threshold is necessary, see
--- https://www.gaussianwaves.com/2015/11/interpreting-fft-results-obtaining-magnitude-and-phase-information/
+
+--- Convert frequency spectrum to phase in value between [0,1].
+-- This conversion is **inplace**.
+-- @tparam {complex,...} spectrum Frequency spectrum.
+-- @number[opt=0.1] threshold
+--   The magnitude threshold. Why this is necessary, see
+--   [this blog post](https://www.gaussianwaves.com/2015/11/interpreting-fft-results-obtaining-magnitude-and-phase-information/).
+-- @treturn {number,...} The phase value between [0,1] per frequency (same table as `spectrum`).
+-- @see FFT
+-- @see magnitude
+-- @usage tostream(phase(FFT(Stream.SinOsc(samplerate*20/1024, 0.7):sub(1, 1024)))):gnuplot()
 function phase(spectrum, threshold)
 	threshold = threshold or 0.1
 	for i = 1, #spectrum do
@@ -212,9 +262,20 @@ function phase(spectrum, threshold)
 	return spectrum
 end
 
--- Inverse FFT.
--- Converts complex samples in the frequency domain to real-values samples in the
--- time domain (ie. sound).
+--- Transform fixed number of frequency bins to audio samples (inverse FFT).
+-- The spectrum is represented by complex numbers (C type: `double complex`).
+-- The spectrum is assumed to be conjuage symmetric and the redundant half is
+-- not required to be provided.
+-- @tparam {complex,...}|Stream spectrum
+--   The frequency bins to transform.
+--   This can be a Stream, but only if it is not infinite (it will be automatically
+--   converted to a table).
+--   This spectrum length must be such that the resulting number of audio samples
+--   is a power of 2.
+-- @treturn {number,...}
+--   The audio samples of size `(#spectrum-1)*2`.
+-- @see FFT
+-- @usage IFFT(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024))):gnuplot()
 function IFFT(spectrum)
 	assert(type(spectrum) == "table")
 	if spectrum.is_a_stream then spectrum = spectrum:totable() end
@@ -229,13 +290,27 @@ function IFFT(spectrum)
 	return tostream(FFT_backward(spectrum, out, scratch, twiddles))
 end
 
--- Analyze a potentially realtime stream
--- This gives a stream of frequency spectrums
--- (of half the length).
--- FIXME: It's unelegant to pass in the window function as an argument.
+--- Analyze frequencies of a potentially infinite realtime stream via FFT.
+-- This partitions the source stream into buffers of a given `size`,
+-- optionally maps a window function and eventually applies the forward FFT,
+-- resulting in a stream of frequency spectrums (STFT).
+-- @within Class Stream
+-- @int size The size of the FFT buffer. This must be a power of 2.
+-- @func[opt] window_fnc
+--   An optional window function to apply to the audio data before FFT conversion.
+-- @treturn Stream
+--   Stream of frequency spectrums (arrays of complex numbers, ie.
+--   C type `double complex`).
+--   These arrays will have size `size/2+1`.
+-- @see Hamming
+-- @see Hanning
+-- @see FFT
+-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play()
+--
+-- @fixme It's unelegant to pass in the window function as an argument.
 -- This could only be avoided by moving the partitioning out of this method
--- and repeating the buffer size once again. E.g.:
--- foo:partition(1024):map(Hamming):FFT(1024)...
+-- and repeating the buffer size once again.
+-- E.g.: `foo:partition(1024):map(Hamming):FFT(1024)...`.
 -- Of course, if we had another FFT implementation like Danielson-Lanczos,
 -- that does not require preallocation of a few arrays, this problem also wouldn't
 -- exist.
@@ -257,9 +332,21 @@ function Stream:FFT(size, window_fnc)
 	end)
 end
 
--- NOTE: Here `size` refers to the size of the time domain chunks,
--- ie. the same value you pass into Stream:FFT().
--- A different FFT implementation like Danielson-Lanczos that works entirely
+--- Synthesize audio samples from a stream of frequency spectrums via inverse FFT.
+-- This is the inverse of @{Stream:FFT}.
+-- Note that when performing FFT and IFFT on a real-time stream, as is usually done,
+-- this will introduce a latency of `size/samplerate` seconds.
+-- @within Class Stream
+-- @int size
+--   The size of the time domain chunks (ie. the same value passed into @{Stream:FFT}).
+--   This will be twice the size of the spectrum arrays minus 2.
+-- @treturn Stream
+--   A stream of real-valued audio samples synthesized from the frequency spectrums.
+-- @see Stream:FFT
+-- @see IFFT
+-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play()
+--
+-- @fixme A different FFT implementation like Danielson-Lanczos that works entirely
 -- inplace, might not need repitition of these sizes.
 function Stream:IFFT(size)
 	-- NOTE: The size could be inferred from the first spectrum array,
@@ -284,12 +371,21 @@ end
 -- See also the commented-out code in filters.lua.
 --
 
+--- Apply Hamming window to a fixed number of samples.
+-- @tparam {number,...}|Stream samples
+--   The audio samples to window.
+--   This can be a Stream, but only if it is not infinite (it will be automatically
+--   converted to a table).
+-- @treturn {number,...}
+--   The windowed samples.
+--   The conversion is **inplace**, so this may be the same table as `samples`.
+-- @see Hanning
+-- @see Stream:FFT
+-- @usage tostream(magnitude(FFT(Hamming(Stream.SinOsc(samplerate*20.5/1024):sub(1, 1024))))):gnuplot()
 function Hamming(samples)
 	assert(type(samples) == "table")
 	if samples.is_a_stream then samples = samples:totable() end
 
-	--assert(bit.band(#samples, #samples-1) == 0, "Array length needs to be a power of 2")
-
 	local cos = math.cos
 	local pi2 = 2*math.pi
 
@@ -301,7 +397,16 @@ function Hamming(samples)
 	return samples
 end
 
--- Also called Hann
+--- Apply Hann window to a fixed number of samples.
+-- @tparam {number,...}|Stream samples
+--   The audio samples to window.
+--   This can be a Stream, but only if it is not infinite (it will be automatically
+--   converted to a table).
+-- @treturn {number,...}
+--   The windowed samples.
+--   The conversion is **inplace**, so this may be the same table as `samples`.
+-- @see Hamming
+-- @see Stream:FFT
 function Hanning(samples)
 	assert(type(samples) == "table")
 	if samples.is_a_stream then samples = samples:totable() end
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2023-12-19 19:30:18 +0300
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2023-12-19 19:38:32 +0300
commit	2bbaaaf6c67f2f284e74cd46c804f7267f5439a6 (patch)
tree	8402b3fca30c2dc3b111e577c31ee86392d3f6a4
parent	664cd1590ecb716b60dc75411eb873b130b2f38b (diff)
download	applause2-2bbaaaf6c67f2f284e74cd46c804f7267f5439a6.tar.gz