Just unpacked here 📦

2021-05-14 16:35:08 +02:00 · 2021-05-14 16:35:08 +02:00 · 3b8b160a07
commit 3b8b160a07
44 changed files with 5327 additions and 0 deletions
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -0,0 +1,34 @@
+name: Rust
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions-rs/toolchain@v1
+      with:
+        toolchain: nightly-2021-04-01
+        override: false
+    - name: Packages
+      run: sudo apt-get install build-essential yasm libavutil-dev libavcodec-dev libavformat-dev libavfilter-dev libavfilter-dev libavdevice-dev libswresample-dev libfftw3-dev ffmpeg
+    - name: Build
+      run: cargo build --verbose
+    - name: Run tests
+      run: cargo test --verbose
+    - name: Run example tests
+      run: cargo test --verbose --examples
+    - name: Build benches
+      run: cargo +nightly-2021-04-01 bench --verbose --features=bench --no-run
+    - name: Build examples
+      run: cargo build --examples --verbose
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,44 @@
+[package]
+name = "bliss-rs"
+version = "0.1.0"
+authors = ["Polochon-street <polochonstreet@gmx.fr>"]
+edition = "2018"
+
+[features]
+build-ffmpeg = ["ffmpeg-next/build"]
+bench = []
+
+[dependencies]
+ripemd160 = "0.9.0"
+ndarray-npy = "0.8.0"
+ndarray = { version = "0.15.0", features = ["rayon"] }
+num_cpus = "1.13.0"
+ndarray-stats = "0.5.0"
+rustfft = "5.0.1"
+lazy_static = "1.4.0"
+rayon = "1.5.0"
+crossbeam = "0.8.0"
+noisy_float = "0.2.0"
+# Until either https://github.com/zmwangx/rust-ffmpeg/pull/60
+# or https://github.com/zmwangx/rust-ffmpeg/pull/62 is in
+ffmpeg-next = {git = "https://github.com/Polochon-street/rust-ffmpeg", branch = "stop-failing-unimplemented-arms"}
+log = "0.4.14"
+env_logger = "0.8.3"
+thiserror = "1.0.24"
+
+[dev-dependencies]
+mpd = "0.0.12"
+rusqlite = "0.25.0"
+dirs = "3.0.1"
+tempdir = "0.3.7"
+clap = "2.33.3"
+anyhow = "1.0.40"
+
+[dependencies.aubio-rs]
+git = "https://github.com/Polochon-street/aubio-rs"
+
+# Like this until https://github.com/aubio/aubio/issues/336 is solved one way
+# or the other
+[dependencies.aubio-lib]
+git = "https://github.com/Polochon-street/aubio-rs"
+branch = "aubio-tweaks"
--- a/README.md
+++ b/README.md
@ -0,0 +1,32 @@
+![build](https://github.com/Polochon-street/bliss-rs/workflows/Rust/badge.svg)
+
+# Bliss music analyser - Rust version
+Bliss-rs is the Rust improvement of [Bliss](https://github.com/Polochon-street/bliss). The data it
+outputs is not compatible with the ones used by Bliss, since it uses
+different, more accurate features, based on actual literature this time.
+
+Like Bliss, it ease the creation of « intelligent » playlists and/or continuous
+play, à la Spotify/Grooveshark Radio.
+
+## Usage
+Currently not usable.
+
+It's missing a binary / example to actually use the computed features.
+
+## Acknowledgements
+
+* This library relies heavily on [aubio](https://aubio.org/)'s
+  [Rust bindings](https://crates.io/crates/aubio-rs) for the spectral /
+  timbral analysis, so a big thanks to both the creators and contributors
+  of librosa, and to @katyo for making aubio bindings for Rust.
+* The first part of the chroma extraction is basically a rewrite of
+  [librosa](https://librosa.org/doc/latest/index.html)'s
+  [chroma feature extraction](https://librosa.org/doc/latest/generated/librosa.feature.chroma_stft.html?highlight=chroma#librosa.feature.chroma_stftfrom)
+  from python to Rust, with just as little features as needed. Thanks
+  to both creators and contributors as well.
+* Finally, a big thanks to
+  [Christof Weiss](https://www.audiolabs-erlangen.de/fau/assistant/weiss)
+  for pointing me in the right direction for the chroma feature summarization,
+  which are basically also a rewrite from Python to Rust of some of the
+  awesome notebooks by AudioLabs Erlangen, that you can find
+  [here](https://www.audiolabs-erlangen.de/resources/MIR/FMP/C0/C0.html).
--- a/data/capacity_fix.ogg
+++ b/data/capacity_fix.ogg
--- a/data/chroma-filter.npy
+++ b/data/chroma-filter.npy
--- a/data/chroma-interval.npy
+++ b/data/chroma-interval.npy
--- a/data/chroma-stft-normalized-expected.npy
+++ b/data/chroma-stft-normalized-expected.npy
--- a/data/chroma.npy
+++ b/data/chroma.npy
--- a/data/convolve.npy
+++ b/data/convolve.npy
--- a/data/convolve_odd.npy
+++ b/data/convolve_odd.npy
--- a/data/downsampled.npy
+++ b/data/downsampled.npy
--- a/data/estimate-tuning-pitch.npy
+++ b/data/estimate-tuning-pitch.npy
--- a/data/f_analysis.npy
+++ b/data/f_analysis.npy
--- a/data/f_analysis_normalized.npy
+++ b/data/f_analysis_normalized.npy
--- a/data/filtered_features.npy
+++ b/data/filtered_features.npy
--- a/data/interval-feature-matrix.npy
+++ b/data/interval-feature-matrix.npy
--- a/data/librosa-decoded.npy
+++ b/data/librosa-decoded.npy
--- a/data/librosa-stft.npy
+++ b/data/librosa-stft.npy
--- a/data/no_channel.wav
+++ b/data/no_channel.wav
--- a/data/piano.flac
+++ b/data/piano.flac
--- a/data/picture.jpg
+++ b/data/picture.jpg
--- a/data/picture.png
+++ b/data/picture.png
--- a/data/pitch-tuning.npy
+++ b/data/pitch-tuning.npy
--- a/data/s16_mono_22_5kHz.flac
+++ b/data/s16_mono_22_5kHz.flac
--- a/data/s16_stereo_22_5kHz.flac
+++ b/data/s16_stereo_22_5kHz.flac
--- a/data/s32_stereo_44_1_kHz.flac
+++ b/data/s32_stereo_44_1_kHz.flac
--- a/data/s32_stereo_44_1_kHz.mp3
+++ b/data/s32_stereo_44_1_kHz.mp3
--- a/data/sorted_features.npy
+++ b/data/sorted_features.npy
--- a/data/spectrum-chroma-mags.npy
+++ b/data/spectrum-chroma-mags.npy
--- a/data/spectrum-chroma-pitches.npy
+++ b/data/spectrum-chroma-pitches.npy
--- a/data/spectrum-chroma.npy
+++ b/data/spectrum-chroma.npy
--- a/data/tone_11080Hz.flac
+++ b/data/tone_11080Hz.flac
--- a/data/white_noise.flac
+++ b/data/white_noise.flac
--- a/examples/analyse.rs
+++ b/examples/analyse.rs
@ -0,0 +1,17 @@
+use bliss_rs::Song;
+use std::env;
+
+/**
+ * Simple utility to print the result of an Analysis.
+ *
+ * Takes a list of files to analyse an the result of the corresponding Analysis.
+ */
+fn main() {
+    let args: Vec<String> = env::args().skip(1).collect();
+    for path in &args {
+        match Song::new(&path) {
+            Ok(song) => println!("{}: {:?}", path, song.analysis,),
+            Err(e) => println!("{}: {}", path, e),
+        }
+    }
+}
--- a/examples/distance.rs
+++ b/examples/distance.rs
@ -0,0 +1,26 @@
+use bliss_rs::Song;
+use std::env;
+
+/**
+ * Simple utility to print distance between two songs according to bliss.
+ *
+ * Takes two file paths, and analyse the corresponding songs, printing
+ * the distance between the two files according to bliss.
+ */
+fn main() -> Result<(), String> {
+    let mut paths = env::args().skip(1).take(2);
+
+    let first_path = paths.next().ok_or("Help: ./distance <song1> <song2>")?;
+    let second_path = paths.next().ok_or("Help: ./distance <song1> <song2>")?;
+
+    let song1 = Song::new(&first_path).map_err(|x| x.to_string())?;
+    let song2 = Song::new(&second_path).map_err(|x| x.to_string())?;
+
+    println!(
+        "d({}, {}) = {}",
+        song1.path,
+        song2.path,
+        song1.distance(&song2)
+    );
+    Ok(())
+}
--- a/src/chroma.rs
+++ b/src/chroma.rs
@ -0,0 +1,638 @@
+//! Chroma feature extraction module.
+//!
+//! Contains functions to compute the chromagram of a song, and
+//! then from this chromagram extract the song's tone and mode
+//! (minor / major).
+extern crate aubio_lib;
+extern crate noisy_float;
+
+use crate::utils::stft;
+use crate::utils::{hz_to_octs_inplace, Normalize};
+use crate::BlissError;
+use ndarray::{arr1, arr2, concatenate, s, Array, Array1, Array2, Axis, Zip};
+use ndarray_stats::interpolate::Midpoint;
+use ndarray_stats::QuantileExt;
+use noisy_float::prelude::*;
+
+/**
+ * General object holding the chroma descriptor.
+ *
+ * Current chroma descriptors are interval features (see
+ * https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf).
+ *
+ * Contrary to the other descriptors that can be used with streaming
+ * without consequences, this one performs better if the full song is used at
+ * once.
+ */
+pub(crate) struct ChromaDesc {
+    sample_rate: u32,
+    n_chroma: u32,
+    values_chroma: Array2<f64>,
+}
+
+impl Normalize for ChromaDesc {
+    const MAX_VALUE: f32 = 0.12;
+    const MIN_VALUE: f32 = 0.;
+}
+
+impl ChromaDesc {
+    pub const WINDOW_SIZE: usize = 8192;
+
+    pub fn new(sample_rate: u32, n_chroma: u32) -> ChromaDesc {
+        ChromaDesc {
+            sample_rate,
+            n_chroma,
+            values_chroma: Array2::zeros((n_chroma as usize, 0)),
+        }
+    }
+
+    /**
+     * Compute and store the chroma of a signal.
+     *
+     * Passing a full song here once instead of streaming smaller parts of the
+     * song will greatly improve accuracy.
+     */
+    pub fn do_(&mut self, signal: &[f32]) -> Result<(), BlissError> {
+        let mut stft = stft(signal, ChromaDesc::WINDOW_SIZE, 2205);
+        let tuning = estimate_tuning(
+            self.sample_rate as u32,
+            &stft,
+            ChromaDesc::WINDOW_SIZE,
+            0.01,
+            12,
+        )?;
+        let chroma = chroma_stft(
+            self.sample_rate,
+            &mut stft,
+            ChromaDesc::WINDOW_SIZE,
+            self.n_chroma,
+            tuning,
+        )?;
+        self.values_chroma = concatenate![Axis(1), self.values_chroma, chroma];
+        Ok(())
+    }
+
+    /**
+     * Get the song's interval features.
+     *
+     * Return the 6 pitch class set categories, as well as the major, minor,
+     * diminished and augmented triads.
+     *
+     * See this paper https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf
+     * for more information ("Timbre-invariant Audio Features for Style Analysis of Classical
+     * Music").
+     */
+    pub fn get_values(&mut self) -> Vec<f32> {
+        chroma_interval_features(&self.values_chroma)
+            .mapv(|x| self.normalize(x as f32))
+            .to_vec()
+    }
+}
+
+// Functions below are Rust versions of python notebooks by AudioLabs Erlang
+// (https://www.audiolabs-erlangen.de/resources/MIR/FMP/C0/C0.html)
+fn chroma_interval_features(chroma: &Array2<f64>) -> Array1<f64> {
+    let chroma = normalize_feature_sequence(&chroma.mapv(|x| (x * 15.).exp()));
+    let templates = arr2(&[
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
+        [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
+        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
+        [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    ]);
+    let interval_feature_matrix = extract_interval_features(&chroma, &templates);
+    interval_feature_matrix.mean_axis(Axis(1)).unwrap()
+}
+
+fn extract_interval_features(chroma: &Array2<f64>, templates: &Array2<i32>) -> Array2<f64> {
+    let mut f_intervals: Array2<f64> = Array::zeros((chroma.shape()[1], templates.shape()[1]));
+    for (template, mut f_interval) in templates
+        .axis_iter(Axis(1))
+        .zip(f_intervals.axis_iter_mut(Axis(1)))
+    {
+        for shift in 0..12 {
+            let mut vec: Vec<i32> = template.to_vec();
+            vec.rotate_right(shift);
+            let rolled = arr1(&vec);
+            let power = Zip::from(chroma.t())
+                .and_broadcast(&rolled)
+                .map_collect(|&f, &s| f.powi(s))
+                .map_axis_mut(Axis(1), |x| x.product());
+            f_interval += &power;
+        }
+    }
+    f_intervals.t().to_owned()
+}
+
+fn normalize_feature_sequence(feature: &Array2<f64>) -> Array2<f64> {
+    let mut normalized_sequence = feature.to_owned();
+    for mut column in normalized_sequence.columns_mut() {
+        let mut sum = column.mapv(|x| x.abs()).sum();
+        if sum < 0.0001 {
+            sum = 1.;
+        }
+        column /= sum;
+    }
+
+    normalized_sequence
+}
+
+// All the functions below are more than heavily inspired from
+// librosa"s code: https://github.com/librosa/librosa/blob/main/librosa/feature/spectral.py#L1165
+// chroma(22050, n_fft=5, n_chroma=12)
+//
+// Could be precomputed, but it takes very little time to compute it
+// on the fly compared to the rest of the functions, and we'd lose the
+// possibility to tweak parameters.
+fn chroma_filter(
+    sample_rate: u32,
+    n_fft: usize,
+    n_chroma: u32,
+    tuning: f64,
+) -> Result<Array2<f64>, BlissError> {
+    let ctroct = 5.0;
+    let octwidth = 2.;
+    let n_chroma_float = f64::from(n_chroma);
+    let n_chroma2 = (n_chroma_float / 2.0).round() as u32;
+    let n_chroma2_float = f64::from(n_chroma2);
+
+    let frequencies = Array::linspace(0., f64::from(sample_rate), (n_fft + 1) as usize);
+
+    let mut freq_bins = frequencies;
+    hz_to_octs_inplace(&mut freq_bins, tuning, n_chroma);
+    freq_bins.mapv_inplace(|x| x * n_chroma_float);
+    freq_bins[0] = freq_bins[1] - 1.5 * n_chroma_float;
+
+    let mut binwidth_bins = Array::ones(freq_bins.raw_dim());
+    binwidth_bins.slice_mut(s![0..freq_bins.len() - 1]).assign(
+        &(&freq_bins.slice(s![1..]) - &freq_bins.slice(s![..-1])).mapv(|x| {
+            if x <= 1. {
+                1.
+            } else {
+                x
+            }
+        }),
+    );
+
+    let mut d: Array2<f64> = Array::zeros((n_chroma as usize, (&freq_bins).len()));
+    for (idx, mut row) in d.rows_mut().into_iter().enumerate() {
+        row.fill(idx as f64);
+    }
+    d = -d + &freq_bins;
+
+    d.mapv_inplace(|x| {
+        (x + n_chroma2_float + 10. * n_chroma_float) % n_chroma_float - n_chroma2_float
+    });
+    d = d / binwidth_bins;
+    d.mapv_inplace(|x| (-0.5 * (2. * x) * (2. * x)).exp());
+
+    let mut wts = d;
+    // Normalize by computing the l2-norm over the columns
+    for mut col in wts.columns_mut() {
+        let mut sum = col.mapv(|x| x * x).sum().sqrt();
+        if sum < f64::MIN_POSITIVE {
+            sum = 1.;
+        }
+        col /= sum;
+    }
+
+    freq_bins.mapv_inplace(|x| (-0.5 * ((x / n_chroma_float - ctroct) / octwidth).powi(2)).exp());
+
+    wts *= &freq_bins;
+
+    // np.roll(), np bro
+    let mut uninit: Vec<f64> = Vec::with_capacity((&wts).len());
+    unsafe {
+        uninit.set_len(wts.len());
+    }
+    let mut b = Array::from(uninit)
+        .into_shape(wts.dim())
+        .map_err(|e| BlissError::AnalysisError(format!("in chroma: {}", e.to_string())))?;
+    b.slice_mut(s![-3.., ..]).assign(&wts.slice(s![..3, ..]));
+    b.slice_mut(s![..-3, ..]).assign(&wts.slice(s![3.., ..]));
+
+    wts = b;
+    let non_aliased = (1 + n_fft / 2) as usize;
+    Ok(wts.slice_move(s![.., ..non_aliased]))
+}
+
+fn pip_track(
+    sample_rate: u32,
+    spectrum: &Array2<f64>,
+    n_fft: usize,
+) -> Result<(Vec<f64>, Vec<f64>), BlissError> {
+    let sample_rate_float = f64::from(sample_rate);
+    let fmin = 150.0_f64;
+    let fmax = 4000.0_f64.min(sample_rate_float / 2.0);
+    let threshold = 0.1;
+
+    let fft_freqs = Array::linspace(0., sample_rate_float / 2., 1 + n_fft / 2);
+
+    let length = spectrum.len_of(Axis(0));
+
+    // TODO>1.0 Make this a bitvec when that won't mean depending on a crate
+    let freq_mask = fft_freqs
+        .iter()
+        .map(|&f| (fmin <= f) && (f < fmax))
+        .collect::<Vec<bool>>();
+
+    let ref_value = spectrum.map_axis(Axis(0), |x| {
+        let first: f64 = *x.first().expect("empty spectrum axis");
+        let max = x.fold(first, |acc, &elem| if acc > elem { acc } else { elem });
+        threshold * max
+    });
+
+    // There will be at most taken_columns * length elements in pitches / mags
+    let taken_columns = freq_mask
+        .iter()
+        .fold(0, |acc, &x| if x { acc + 1 } else { acc });
+    let mut pitches = Vec::with_capacity(taken_columns * length);
+    let mut mags = Vec::with_capacity(taken_columns * length);
+
+    let beginning = freq_mask
+        .iter()
+        .position(|&b| b)
+        .ok_or(BlissError::AnalysisError("in chroma".to_string()))?;
+    let end = freq_mask
+        .iter()
+        .rposition(|&b| b)
+        .ok_or(BlissError::AnalysisError("in chroma".to_string()))?;
+
+    let zipped = Zip::indexed(spectrum.slice(s![beginning..end - 3, ..]))
+        .and(spectrum.slice(s![beginning + 1..end - 2, ..]))
+        .and(spectrum.slice(s![beginning + 2..end - 1, ..]));
+
+    // No need to handle the last column, since freq_mask[length - 1] is
+    // always going to be `false` for 22.5kHz
+    zipped.for_each(|(i, j), &before_elem, &elem, &after_elem| {
+        if elem > ref_value[j] && after_elem <= elem && before_elem < elem {
+            let avg = 0.5 * (after_elem - before_elem);
+            let mut shift = 2. * elem - after_elem - before_elem;
+            if shift.abs() < f64::MIN_POSITIVE {
+                shift += 1.;
+            }
+            shift = avg / shift;
+            pitches.push(((i + beginning + 1) as f64 + shift) * sample_rate_float / n_fft as f64);
+            mags.push(elem + 0.5 * avg * shift);
+        }
+    });
+
+    Ok((pitches, mags))
+}
+
+// Only use this with strictly positive `frequencies`.
+fn pitch_tuning(
+    frequencies: &mut Array1<f64>,
+    resolution: f64,
+    bins_per_octave: u32,
+) -> Result<f64, BlissError> {
+    if frequencies.is_empty() {
+        return Ok(0.0);
+    }
+    hz_to_octs_inplace(frequencies, 0.0, 12);
+    frequencies.mapv_inplace(|x| f64::from(bins_per_octave) * x % 1.0);
+
+    // Put everything between -0.5 and 0.5.
+    frequencies.mapv_inplace(|x| if x >= 0.5 { x - 1. } else { x });
+
+    let indexes = ((frequencies.to_owned() - -0.5) / resolution).mapv(|x| x as usize);
+    let mut counts: Array1<usize> = Array::zeros(((0.5 - -0.5) / resolution) as usize);
+    for &idx in indexes.iter() {
+        counts[idx] += 1;
+    }
+    let max_index = counts
+        .argmax()
+        .map_err(|e| BlissError::AnalysisError(format!("in chroma: {}", e.to_string())))?;
+
+    // Return the bin with the most reoccuring frequency.
+    Ok((-50. + (100. * resolution * max_index as f64)) / 100.)
+}
+
+fn estimate_tuning(
+    sample_rate: u32,
+    spectrum: &Array2<f64>,
+    n_fft: usize,
+    resolution: f64,
+    bins_per_octave: u32,
+) -> Result<f64, BlissError> {
+    let (pitch, mag) = pip_track(sample_rate, &spectrum, n_fft)?;
+
+    let (filtered_pitch, filtered_mag): (Vec<N64>, Vec<N64>) = pitch
+        .iter()
+        .zip(&mag)
+        .filter(|(&p, _)| p > 0.)
+        .map(|(x, y)| (n64(*x), n64(*y)))
+        .unzip();
+
+    let threshold: N64 = Array::from(filtered_mag.to_vec())
+        .quantile_axis_mut(Axis(0), n64(0.5), &Midpoint)
+        .map_err(|e| BlissError::AnalysisError(format!("in chroma: {}", e.to_string())))?
+        .into_scalar();
+
+    let mut pitch = filtered_pitch
+        .iter()
+        .zip(&filtered_mag)
+        .filter_map(|(&p, &m)| if m >= threshold { Some(p.into()) } else { None })
+        .collect::<Array1<f64>>();
+    pitch_tuning(&mut pitch, resolution, bins_per_octave)
+}
+
+fn chroma_stft(
+    sample_rate: u32,
+    spectrum: &mut Array2<f64>,
+    n_fft: usize,
+    n_chroma: u32,
+    tuning: f64,
+) -> Result<Array2<f64>, BlissError> {
+    spectrum.par_mapv_inplace(|x| x * x);
+    let mut raw_chroma = chroma_filter(sample_rate, n_fft, n_chroma, tuning)?;
+
+    raw_chroma = raw_chroma.dot(spectrum);
+    for mut row in raw_chroma.columns_mut() {
+        let mut sum = row.mapv(|x| x.abs()).sum();
+        if sum < f64::MIN_POSITIVE {
+            sum = 1.;
+        }
+        row /= sum;
+    }
+    Ok(raw_chroma)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::utils::stft;
+    use crate::{Song, SAMPLE_RATE};
+    use ndarray::{arr1, arr2, Array2};
+    use ndarray_npy::ReadNpyExt;
+    use std::fs::File;
+
+    #[test]
+    fn test_chroma_interval_features() {
+        let file = File::open("data/chroma.npy").unwrap();
+        let chroma = Array2::<f64>::read_npy(file).unwrap();
+        let features = chroma_interval_features(&chroma);
+        let expected_features = arr1(&[
+            0.03860284, 0.02185281, 0.04224379, 0.06385278, 0.07311148, 0.02512566, 0.00319899,
+            0.00311308, 0.00107433, 0.00241861,
+        ]);
+        for (expected, actual) in expected_features.iter().zip(&features) {
+            assert!(0.00000001 > (expected - actual.abs()));
+        }
+    }
+
+    #[test]
+    fn test_extract_interval_features() {
+        let file = File::open("data/chroma-interval.npy").unwrap();
+        let chroma = Array2::<f64>::read_npy(file).unwrap();
+        let templates = arr2(&[
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
+            [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
+            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
+            [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        ]);
+
+        let file = File::open("data/interval-feature-matrix.npy").unwrap();
+        let expected_interval_features = Array2::<f64>::read_npy(file).unwrap();
+
+        let interval_features = extract_interval_features(&chroma, &templates);
+        for (expected, actual) in expected_interval_features
+            .iter()
+            .zip(interval_features.iter())
+        {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_normalize_feature_sequence() {
+        let array = arr2(&[[0.1, 0.3, 0.4], [1.1, 0.53, 1.01]]);
+        let expected_array = arr2(&[
+            [0.08333333, 0.36144578, 0.28368794],
+            [0.91666667, 0.63855422, 0.71631206],
+        ]);
+
+        let normalized_array = normalize_feature_sequence(&array);
+
+        assert!(!array.is_empty() && !expected_array.is_empty());
+
+        for (expected, actual) in normalized_array.iter().zip(expected_array.iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_chroma_desc() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
+        chroma_desc.do_(&song.sample_array).unwrap();
+        let expected_values = vec![
+            -0.35661936,
+            -0.63578653,
+            -0.29593682,
+            0.06421304,
+            0.21852458,
+            -0.581239,
+            -0.9466835,
+            -0.9481153,
+            -0.9820945,
+            -0.95968974,
+        ];
+        for (expected, actual) in expected_values.iter().zip(chroma_desc.get_values().iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_chroma_stft_decode() {
+        let signal = Song::decode("data/s16_mono_22_5kHz.flac")
+            .unwrap()
+            .sample_array;
+        let mut stft = stft(&signal, 8192, 2205);
+
+        let file = File::open("data/chroma.npy").unwrap();
+        let expected_chroma = Array2::<f64>::read_npy(file).unwrap();
+
+        let chroma = chroma_stft(22050, &mut stft, 8192, 12, -0.04999999999999999).unwrap();
+
+        assert!(!chroma.is_empty() && !expected_chroma.is_empty());
+
+        for (expected, actual) in expected_chroma.iter().zip(chroma.iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_estimate_tuning() {
+        let file = File::open("data/spectrum-chroma.npy").unwrap();
+        let arr = Array2::<f64>::read_npy(file).unwrap();
+
+        let tuning = estimate_tuning(22050, &arr, 2048, 0.01, 12).unwrap();
+        assert!(0.000001 > (-0.09999999999999998 - tuning).abs());
+    }
+
+    #[test]
+    fn test_estimate_tuning_decode() {
+        let signal = Song::decode("data/s16_mono_22_5kHz.flac")
+            .unwrap()
+            .sample_array;
+        let stft = stft(&signal, 8192, 2205);
+
+        let tuning = estimate_tuning(22050, &stft, 8192, 0.01, 12).unwrap();
+        assert!(0.000001 > (-0.04999999999999999 - tuning).abs());
+    }
+
+    #[test]
+    fn test_pitch_tuning() {
+        let file = File::open("data/pitch-tuning.npy").unwrap();
+        let mut pitch = Array1::<f64>::read_npy(file).unwrap();
+
+        assert_eq!(-0.1, pitch_tuning(&mut pitch, 0.05, 12).unwrap());
+    }
+
+    #[test]
+    fn test_pitch_tuning_no_frequencies() {
+        let mut frequencies = arr1(&[]);
+        assert_eq!(0.0, pitch_tuning(&mut frequencies, 0.05, 12).unwrap());
+    }
+
+    #[test]
+    fn test_pip_track() {
+        let file = File::open("data/spectrum-chroma.npy").unwrap();
+        let spectrum = Array2::<f64>::read_npy(file).unwrap();
+
+        let mags_file = File::open("data/spectrum-chroma-mags.npy").unwrap();
+        let expected_mags = Array1::<f64>::read_npy(mags_file).unwrap();
+
+        let pitches_file = File::open("data/spectrum-chroma-pitches.npy").unwrap();
+        let expected_pitches = Array1::<f64>::read_npy(pitches_file).unwrap();
+
+        let (mut pitches, mut mags) = pip_track(22050, &spectrum, 2048).unwrap();
+        pitches.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        mags.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+        for (expected_pitches, actual_pitches) in expected_pitches.iter().zip(pitches.iter()) {
+            assert!(0.00000001 > (expected_pitches - actual_pitches).abs());
+        }
+        for (expected_mags, actual_mags) in expected_mags.iter().zip(mags.iter()) {
+            assert!(0.00000001 > (expected_mags - actual_mags).abs());
+        }
+    }
+
+    #[test]
+    fn test_chroma_filter() {
+        let file = File::open("data/chroma-filter.npy").unwrap();
+        let expected_filter = Array2::<f64>::read_npy(file).unwrap();
+
+        let filter = chroma_filter(22050, 2048, 12, -0.1).unwrap();
+
+        for (expected, actual) in expected_filter.iter().zip(filter.iter()) {
+            assert!(0.000000001 > (expected - actual).abs());
+        }
+    }
+}
+
+#[cfg(all(feature = "bench", test))]
+mod bench {
+    extern crate test;
+    use super::*;
+    use crate::utils::stft;
+    use crate::{Song, SAMPLE_RATE};
+    use ndarray::{arr2, Array1, Array2};
+    use ndarray_npy::ReadNpyExt;
+    use std::fs::File;
+    use test::Bencher;
+
+    #[bench]
+    fn bench_estimate_tuning(b: &mut Bencher) {
+        let file = File::open("data/spectrum-chroma.npy").unwrap();
+        let arr = Array2::<f64>::read_npy(file).unwrap();
+
+        b.iter(|| {
+            estimate_tuning(22050, &arr, 2048, 0.01, 12).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_pitch_tuning(b: &mut Bencher) {
+        let file = File::open("data/pitch-tuning.npy").unwrap();
+        let pitch = Array1::<f64>::read_npy(file).unwrap();
+        b.iter(|| {
+            pitch_tuning(&mut pitch.to_owned(), 0.05, 12).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_pip_track(b: &mut Bencher) {
+        let file = File::open("data/spectrum-chroma.npy").unwrap();
+        let spectrum = Array2::<f64>::read_npy(file).unwrap();
+
+        b.iter(|| {
+            pip_track(22050, &spectrum, 2048).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_chroma_filter(b: &mut Bencher) {
+        b.iter(|| {
+            chroma_filter(22050, 2048, 12, -0.1).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_normalize_feature_sequence(b: &mut Bencher) {
+        let array = arr2(&[[0.1, 0.3, 0.4], [1.1, 0.53, 1.01]]);
+        b.iter(|| {
+            normalize_feature_sequence(&array);
+        });
+    }
+
+    #[bench]
+    fn bench_chroma_desc(b: &mut Bencher) {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
+        let signal = song.sample_array;
+        b.iter(|| {
+            chroma_desc.do_(&signal).unwrap();
+            chroma_desc.get_values();
+        });
+    }
+
+    #[bench]
+    fn bench_chroma_stft(b: &mut Bencher) {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
+        let signal = song.sample_array;
+        b.iter(|| {
+            chroma_desc.do_(&signal).unwrap();
+            chroma_desc.get_values();
+        });
+    }
+
+    #[bench]
+    fn bench_chroma_stft_decode(b: &mut Bencher) {
+        let signal = Song::decode("data/s16_mono_22_5kHz.flac")
+            .unwrap()
+            .sample_array;
+        let mut stft = stft(&signal, 8192, 2205);
+
+        b.iter(|| {
+            chroma_stft(22050, &mut stft, 8192, 12, -0.04999999999999999).unwrap();
+        });
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,131 @@
+#![cfg_attr(feature = "bench", feature(test))]
+mod chroma;
+pub mod library;
+mod misc;
+mod song;
+mod temporal;
+mod timbral;
+mod utils;
+
+extern crate crossbeam;
+extern crate num_cpus;
+use thiserror::Error;
+
+const CHANNELS: u16 = 1;
+const SAMPLE_RATE: u32 = 22050;
+
+#[derive(Default, Debug, PartialEq, Clone)]
+/// Simple object used to represent a Song, with its path, analysis, and
+/// other metadata (artist, genre...)
+pub struct Song {
+    pub path: String,
+    pub artist: String,
+    pub title: String,
+    pub album: String,
+    pub track_number: String,
+    pub genre: String,
+    /// Vec containing analysis, in order: tempo, zero-crossing rate,
+    /// mean spectral centroid, std deviation spectral centroid,
+    /// mean spectral rolloff, std deviation spectral rolloff
+    /// mean spectral_flatness, std deviation spectral flatness,
+    /// mean loudness, std deviation loudness
+    /// chroma interval feature 1 to 10
+    pub analysis: Vec<f32>,
+}
+
+#[derive(Error, Debug, PartialEq)]
+pub enum BlissError {
+    #[error("Error happened while decoding file – {0}")]
+    DecodingError(String),
+    #[error("Error happened while analyzing file – {0}")]
+    AnalysisError(String),
+    #[error("Error happened with the music library provider - {0}")]
+    ProviderError(String),
+}
+
+/// Simple function to bulk analyze a set of songs represented by their
+/// absolute paths.
+///
+/// When making an extension for an audio player, prefer
+/// implementing the `Library` trait.
+#[doc(hidden)]
+pub fn bulk_analyse(paths: Vec<String>) -> Vec<Result<Song, BlissError>> {
+    let mut songs = Vec::with_capacity(paths.len());
+    let num_cpus = num_cpus::get();
+
+    crossbeam::scope(|s| {
+        let mut handles = Vec::with_capacity(paths.len() / num_cpus);
+        let mut chunk_number = paths.len() / num_cpus;
+        if chunk_number == 0 {
+            chunk_number = paths.len();
+        }
+        for chunk in paths.chunks(chunk_number) {
+            handles.push(s.spawn(move |_| {
+                let mut result = Vec::with_capacity(chunk.len());
+                for path in chunk {
+                    let song = Song::new(&path);
+                    result.push(song);
+                }
+                result
+            }));
+        }
+
+        for handle in handles {
+            songs.extend(handle.join().unwrap());
+        }
+    })
+    .unwrap();
+
+    songs
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bulk_analyse() {
+        let results = bulk_analyse(vec![
+            String::from("data/s16_mono_22_5kHz.flac"),
+            String::from("data/s16_mono_22_5kHz.flac"),
+            String::from("nonexistent"),
+            String::from("data/s16_stereo_22_5kHz.flac"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+            String::from("nonexistent"),
+        ]);
+        let mut errored_songs: Vec<String> = results
+            .iter()
+            .filter_map(|x| x.as_ref().err().map(|x| x.to_string()))
+            .collect();
+        errored_songs.sort_by(|a, b| a.cmp(b));
+
+        let mut analysed_songs: Vec<String> = results
+            .iter()
+            .filter_map(|x| x.as_ref().ok().map(|x| x.path.to_string()))
+            .collect();
+        analysed_songs.sort_by(|a, b| a.cmp(b));
+
+        assert_eq!(
+            vec![
+                String::from(
+                    "Error happened while decoding file – while opening format: ffmpeg::Error(2: No such file or directory)."
+                );
+                8
+            ],
+            errored_songs
+        );
+        assert_eq!(
+            vec![
+                String::from("data/s16_mono_22_5kHz.flac"),
+                String::from("data/s16_mono_22_5kHz.flac"),
+                String::from("data/s16_stereo_22_5kHz.flac"),
+            ],
+            analysed_songs,
+        );
+    }
+}
--- a/src/library.rs
+++ b/src/library.rs
@ -0,0 +1,389 @@
+//! Module containing the Library trait, useful to get started to implement
+//! a plug-in for an audio player.
+use crate::{BlissError, Song};
+use log::{error, info, warn};
+use ndarray::{arr1, Array};
+use noisy_float::prelude::*;
+use std::sync::mpsc;
+use std::sync::mpsc::{Receiver, Sender};
+use std::thread;
+
+/// Library trait to make creating plug-ins for existing audio players easier.
+pub trait Library {
+    /// Return the absolute path of all the songs in an
+    /// audio player's music library.
+    fn get_songs_paths(&self) -> Result<Vec<String>, BlissError>;
+    /// Store an analyzed Song object in some (cold) storage, e.g.
+    /// a database, a file...
+    fn store_song(&mut self, song: &Song) -> Result<(), BlissError>;
+    /// Log and / or store that an error happened while trying to decode and
+    /// analyze a song.
+    fn store_error_song(&mut self, song_path: String, error: BlissError) -> Result<(), BlissError>;
+    /// Retrieve a list of all the stored Songs.
+    ///
+    /// This should work only after having run `analyze_library` at least
+    /// once.
+    fn get_stored_songs(&self) -> Result<Vec<Song>, BlissError>;
+
+    /// Return a list of songs that are similar to ``first_song``.
+    ///
+    /// # Arguments
+    ///
+    /// * `first_song` - The song the playlist will be built from.
+    /// * `playlist_length` - The playlist length. If there are not enough
+    /// songs in the library, it will be truncated to the size of the library.
+    ///
+    /// # Returns
+    ///
+    /// A vector of `playlist_length` Songs, including `first_song`, that you
+    /// most likely want to plug in your audio player by using something like
+    /// `ret.map(|song| song.path.to_owned()).collect::<Vec<String>>()`.
+    fn playlist_from_song(
+        &self,
+        first_song: Song,
+        playlist_length: usize,
+    ) -> Result<Vec<Song>, BlissError> {
+        let analysis_current_song = arr1(&first_song.analysis.to_vec());
+        let mut songs = self.get_stored_songs()?;
+        let m = Array::eye(first_song.analysis.len());
+        songs.sort_by_cached_key(|song| {
+            n32((arr1(&song.analysis) - &analysis_current_song)
+                .dot(&m)
+                .dot(&(arr1(&song.analysis) - &analysis_current_song)))
+        });
+        Ok(songs
+            .into_iter()
+            .take(playlist_length)
+            .collect::<Vec<Song>>())
+    }
+
+    /// Analyze and store songs in `paths`, using `store_song` and
+    /// `store_error_song` implementations.
+    ///
+    /// Note: this is mostly useful for updating a song library. For the first
+    /// run, you probably want to use `analyze_library`.
+    fn analyze_paths(&mut self, paths: Vec<String>) -> Result<(), BlissError> {
+        if paths.is_empty() {
+            return Ok(());
+        }
+        let num_cpus = num_cpus::get();
+
+        let (tx, rx): (
+            Sender<(String, Result<Song, BlissError>)>,
+            Receiver<(String, Result<Song, BlissError>)>,
+        ) = mpsc::channel();
+        let mut handles = Vec::new();
+        let mut chunk_length = paths.len() / num_cpus;
+        if chunk_length == 0 {
+            chunk_length = paths.len();
+        }
+
+        for chunk in paths.chunks(chunk_length) {
+            let tx_thread = tx.clone();
+            let owned_chunk = chunk.to_owned();
+            let child = thread::spawn(move || {
+                for path in owned_chunk {
+                    info!("Analyzing file '{}'", path);
+                    let song = Song::new(&path);
+                    tx_thread.send((path.to_string(), song)).unwrap();
+                }
+                drop(tx_thread);
+            });
+            handles.push(child);
+        }
+        drop(tx);
+
+        for (path, song) in rx.iter() {
+            // A storage fail should just warn the user, but not abort the whole process
+            match song {
+                Ok(song) => {
+                    self.store_song(&song)
+                        .unwrap_or_else(|_| error!("Error while storing song '{}'", (&song).path));
+                    info!("Analyzed and stored song '{}' successfully.", (&song).path)
+                }
+                Err(e) => {
+                    self.store_error_song(path.to_string(), e)
+                        .unwrap_or_else(|e| {
+                            error!("Error while storing errored song '{}': {}", path, e)
+                        });
+                    warn!("Analysis of song '{}' failed. Storing error.", path)
+                }
+            }
+        }
+
+        for child in handles {
+            child
+                .join()
+                .map_err(|_| BlissError::AnalysisError(format!("in analysis")))?;
+        }
+        Ok(())
+    }
+
+    /// Analyzes a song library, using `get_songs_paths`, `store_song` and
+    /// `store_error_song` implementations.
+    fn analyze_library(&mut self) -> Result<(), BlissError> {
+        let paths = self
+            .get_songs_paths()
+            .map_err(|e| BlissError::ProviderError(e.to_string()))?;
+        self.analyze_paths(paths)?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[derive(Default)]
+    struct TestLibrary {
+        internal_storage: Vec<Song>,
+        failed_files: Vec<(String, String)>,
+    }
+
+    impl Library for TestLibrary {
+        fn get_songs_paths(&self) -> Result<Vec<String>, BlissError> {
+            Ok(vec![
+                String::from("./data/white_noise.flac"),
+                String::from("./data/s16_mono_22_5kHz.flac"),
+                String::from("not-existing.foo"),
+                String::from("definitely-not-existing.foo"),
+            ])
+        }
+
+        fn store_song(&mut self, song: &Song) -> Result<(), BlissError> {
+            self.internal_storage.push(song.to_owned());
+            Ok(())
+        }
+
+        fn store_error_song(
+            &mut self,
+            song_path: String,
+            error: BlissError,
+        ) -> Result<(), BlissError> {
+            self.failed_files.push((song_path, error.to_string()));
+            Ok(())
+        }
+
+        fn get_stored_songs(&self) -> Result<Vec<Song>, BlissError> {
+            Ok(self.internal_storage.to_owned())
+        }
+    }
+
+    #[derive(Default)]
+    struct FailingLibrary;
+
+    impl Library for FailingLibrary {
+        fn get_songs_paths(&self) -> Result<Vec<String>, BlissError> {
+            Err(BlissError::ProviderError(String::from(
+                "Could not get songs path",
+            )))
+        }
+
+        fn store_song(&mut self, _: &Song) -> Result<(), BlissError> {
+            Ok(())
+        }
+
+        fn get_stored_songs(&self) -> Result<Vec<Song>, BlissError> {
+            Err(BlissError::ProviderError(String::from(
+                "Could not get stored songs",
+            )))
+        }
+
+        fn store_error_song(&mut self, _: String, _: BlissError) -> Result<(), BlissError> {
+            Ok(())
+        }
+    }
+
+    #[derive(Default)]
+    struct FailingStorage;
+
+    impl Library for FailingStorage {
+        fn get_songs_paths(&self) -> Result<Vec<String>, BlissError> {
+            Ok(vec![
+                String::from("./data/white_noise.flac"),
+                String::from("./data/s16_mono_22_5kHz.flac"),
+                String::from("not-existing.foo"),
+                String::from("definitely-not-existing.foo"),
+            ])
+        }
+
+        fn store_song(&mut self, song: &Song) -> Result<(), BlissError> {
+            Err(BlissError::ProviderError(format!(
+                "Could not store song {}",
+                song.path
+            )))
+        }
+
+        fn get_stored_songs(&self) -> Result<Vec<Song>, BlissError> {
+            Ok(vec![])
+        }
+
+        fn store_error_song(
+            &mut self,
+            song_path: String,
+            error: BlissError,
+        ) -> Result<(), BlissError> {
+            Err(BlissError::ProviderError(format!(
+                "Could not store errored song: {}, with error: {}",
+                song_path, error
+            )))
+        }
+    }
+
+    #[test]
+    fn test_analyze_library_fail() {
+        let mut test_library = FailingLibrary {};
+        assert_eq!(
+            test_library.analyze_library(),
+            Err(BlissError::ProviderError(String::from(
+                "Error happened with the music library provider - Could not get songs path"
+            ))),
+        );
+    }
+
+    #[test]
+    fn test_playlist_from_song_fail() {
+        let test_library = FailingLibrary {};
+        let song = Song {
+            path: String::from("path-to-first"),
+            analysis: vec![0., 0., 0.],
+            ..Default::default()
+        };
+
+        assert_eq!(
+            test_library.playlist_from_song(song, 10),
+            Err(BlissError::ProviderError(String::from(
+                "Could not get stored songs"
+            ))),
+        );
+    }
+
+    #[test]
+    fn test_analyze_library_fail_storage() {
+        let mut test_library = FailingStorage {};
+
+        // A storage fail should just warn the user, but not abort the whole process
+        assert!(test_library.analyze_library().is_ok())
+    }
+
+    #[test]
+    fn test_analyze_library() {
+        let mut test_library = TestLibrary {
+            internal_storage: vec![],
+            failed_files: vec![],
+        };
+        test_library.analyze_library().unwrap();
+
+        let mut failed_files = test_library
+            .failed_files
+            .iter()
+            .map(|x| x.0.to_owned())
+            .collect::<Vec<String>>();
+        failed_files.sort();
+
+        assert_eq!(
+            failed_files,
+            vec![
+                String::from("definitely-not-existing.foo"),
+                String::from("not-existing.foo"),
+            ],
+        );
+
+        let mut songs = test_library
+            .internal_storage
+            .iter()
+            .map(|x| x.path.to_owned())
+            .collect::<Vec<String>>();
+        songs.sort();
+
+        assert_eq!(
+            songs,
+            vec![
+                String::from("./data/s16_mono_22_5kHz.flac"),
+                String::from("./data/white_noise.flac"),
+            ],
+        );
+
+        test_library
+            .internal_storage
+            .iter()
+            .for_each(|x| assert!(x.analysis.len() > 0));
+    }
+
+    #[test]
+    fn test_playlist_from_song() {
+        let mut test_library = TestLibrary::default();
+        let first_song = Song {
+            path: String::from("path-to-first"),
+            analysis: vec![0., 0., 0.],
+            ..Default::default()
+        };
+
+        let second_song = Song {
+            path: String::from("path-to-second"),
+            analysis: vec![0.1, 0., 0.],
+            ..Default::default()
+        };
+
+        let third_song = Song {
+            path: String::from("path-to-third"),
+            analysis: vec![10., 11., 10.],
+            ..Default::default()
+        };
+
+        let fourth_song = Song {
+            path: String::from("path-to-fourth"),
+            analysis: vec![20., 21., 20.],
+            ..Default::default()
+        };
+
+        test_library.internal_storage = vec![
+            first_song.to_owned(),
+            fourth_song.to_owned(),
+            third_song.to_owned(),
+            second_song.to_owned(),
+        ];
+        assert_eq!(
+            vec![first_song.to_owned(), second_song, third_song],
+            test_library.playlist_from_song(first_song, 3).unwrap()
+        );
+    }
+
+    #[test]
+    fn test_playlist_from_song_too_little_songs() {
+        let mut test_library = TestLibrary::default();
+        let first_song = Song {
+            path: String::from("path-to-first"),
+            analysis: vec![0., 0., 0.],
+            ..Default::default()
+        };
+
+        let second_song = Song {
+            path: String::from("path-to-second"),
+            analysis: vec![0.1, 0., 0.],
+            ..Default::default()
+        };
+
+        let third_song = Song {
+            path: String::from("path-to-third"),
+            analysis: vec![10., 11., 10.],
+            ..Default::default()
+        };
+
+        test_library.internal_storage = vec![
+            first_song.to_owned(),
+            second_song.to_owned(),
+            third_song.to_owned(),
+        ];
+        assert_eq!(
+            vec![first_song.to_owned(), second_song, third_song],
+            test_library.playlist_from_song(first_song, 200).unwrap()
+        );
+    }
+
+    #[test]
+    fn test_analyze_empty_path() {
+        let mut test_library = TestLibrary::default();
+        assert!(test_library.analyze_paths(vec![]).is_ok());
+    }
+}
--- a/src/misc.rs
+++ b/src/misc.rs
@ -0,0 +1,108 @@
+//! Miscellaneous feature extraction module.
+//!
+//! Contains various descriptors that don't fit in one of the
+//! existing categories.
+extern crate aubio_lib;
+
+use aubio_rs::level_lin;
+use ndarray::{arr1, Axis};
+
+use super::utils::{mean, Normalize};
+
+/**
+ * Loudness (in dB) detection object.
+ *
+ * It indicates how "loud" a recording of a song is. For a given audio signal,
+ * this value increases if the amplitude of the signal, and nothing else, is
+ * increased.
+ *
+ * Of course, this makes this result dependent of the recording, meaning
+ * the same song would yield different loudness on different recordings. Which
+ * is exactly what we want, given that this is not a music theory project, but
+ * one that aims at giving the best real-life results.
+ *
+ * Ranges between -90 dB (~silence) and 0 dB.
+ *
+ * (This is technically the sound pressure level of the track, but loudness is
+ * way more visual)
+ */
+#[derive(Default)]
+pub(crate) struct LoudnessDesc {
+    pub values: Vec<f32>,
+}
+
+impl LoudnessDesc {
+    pub const WINDOW_SIZE: usize = 1024;
+
+    pub fn do_(&mut self, chunk: &[f32]) {
+        let level = level_lin(chunk);
+        self.values.push(level);
+    }
+
+    pub fn get_value(&mut self) -> Vec<f32> {
+        let mut std_value = arr1(&self.values).std_axis(Axis(0), 0.).into_scalar();
+        let mut mean_value = mean(&self.values);
+        // Make sure the dB don't go less than -90dB
+        if mean_value < 1e-9 {
+            mean_value = 1e-9
+        };
+        if std_value < 1e-9 {
+            std_value = 1e-9
+        }
+        vec![
+            self.normalize(10.0 * mean_value.log10()),
+            self.normalize(10.0 * std_value.log10()),
+        ]
+    }
+}
+
+impl Normalize for LoudnessDesc {
+    const MAX_VALUE: f32 = 0.;
+    const MIN_VALUE: f32 = -90.;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Song;
+
+    #[test]
+    fn test_loudness() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut loudness_desc = LoudnessDesc::default();
+        for chunk in song.sample_array.chunks_exact(LoudnessDesc::WINDOW_SIZE) {
+            loudness_desc.do_(&chunk);
+        }
+        let expected_values = vec![0.271263, 0.2577181];
+        for (expected, actual) in expected_values.iter().zip(loudness_desc.get_value().iter()) {
+            assert!(0.01 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_loudness_boundaries() {
+        let mut loudness_desc = LoudnessDesc::default();
+        let silence_chunk = vec![0.; 1024];
+        loudness_desc.do_(&silence_chunk);
+        let expected_values = vec![-1., -1.];
+        for (expected, actual) in expected_values.iter().zip(loudness_desc.get_value().iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+
+        let mut loudness_desc = LoudnessDesc::default();
+        let silence_chunk = vec![1.; 1024];
+        loudness_desc.do_(&silence_chunk);
+        let expected_values = vec![1., -1.];
+        for (expected, actual) in expected_values.iter().zip(loudness_desc.get_value().iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+
+        let mut loudness_desc = LoudnessDesc::default();
+        let silence_chunk = vec![-1.; 1024];
+        loudness_desc.do_(&silence_chunk);
+        let expected_values = vec![1., -1.];
+        for (expected, actual) in expected_values.iter().zip(loudness_desc.get_value().iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+}
--- a/src/song.rs
+++ b/src/song.rs
@ -0,0 +1,606 @@
+//! Song decoding / analysis module.
+//!
+//! Use decoding, and features-extraction functions from other modules
+//! e.g. tempo features, spectral features, etc to build a Song and its
+//! corresponding Analysis.
+extern crate aubio_lib;
+
+extern crate crossbeam;
+extern crate ffmpeg_next as ffmpeg;
+extern crate ndarray;
+extern crate ndarray_npy;
+
+use super::CHANNELS;
+use crate::chroma::ChromaDesc;
+use crate::misc::LoudnessDesc;
+use crate::temporal::BPMDesc;
+use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc};
+use crate::{BlissError, Song, SAMPLE_RATE};
+use ::log::warn;
+use crossbeam::thread;
+use ffmpeg_next::codec::threading::{Config, Type as ThreadingType};
+use ffmpeg_next::software::resampling::context::Context;
+use ffmpeg_next::util;
+use ffmpeg_next::util::channel_layout::ChannelLayout;
+use ffmpeg_next::util::error::Error;
+use ffmpeg_next::util::error::EINVAL;
+use ffmpeg_next::util::format::sample::{Sample, Type};
+use ffmpeg_next::util::frame::audio::Audio;
+use ffmpeg_next::util::log;
+use ffmpeg_next::util::log::level::Level;
+use ndarray::{arr1, Array};
+use std::sync::mpsc;
+use std::sync::mpsc::Receiver;
+use std::thread as std_thread;
+
+fn push_to_sample_array(frame: &ffmpeg::frame::Audio, sample_array: &mut Vec<f32>) {
+    if frame.samples() == 0 {
+        return;
+    }
+    // Account for the padding
+    let actual_size = util::format::sample::Buffer::size(
+        Sample::F32(Type::Packed),
+        CHANNELS,
+        frame.samples(),
+        false,
+    );
+    let f32_frame: Vec<f32> = frame.data(0)[..actual_size]
+        .chunks_exact(4)
+        .map(|x| {
+            let mut a: [u8; 4] = [0; 4];
+            a.copy_from_slice(x);
+            f32::from_le_bytes(a)
+        })
+        .collect();
+    sample_array.extend_from_slice(&f32_frame);
+}
+
+#[derive(Default, Debug)]
+pub(crate) struct InternalSong {
+    pub path: String,
+    pub artist: String,
+    pub title: String,
+    pub album: String,
+    pub track_number: String,
+    pub genre: String,
+    pub sample_array: Vec<f32>,
+}
+
+fn resample_frame(
+    rx: Receiver<Audio>,
+    mut resample_context: Context,
+    mut sample_array: Vec<f32>,
+) -> Result<Vec<f32>, BlissError> {
+    let mut resampled = ffmpeg::frame::Audio::empty();
+    for decoded in rx.iter() {
+        resample_context
+            .run(&decoded, &mut resampled)
+            .map_err(|e| {
+                BlissError::DecodingError(format!("while trying to resample song: {:?}", e))
+            })?;
+        push_to_sample_array(&resampled, &mut sample_array);
+    }
+    loop {
+        match resample_context.flush(&mut resampled).map_err(|e| {
+            BlissError::DecodingError(format!("while trying to resample song: {:?}", e))
+        })? {
+            Some(_) => {
+                push_to_sample_array(&resampled, &mut sample_array);
+            }
+            None => {
+                if resampled.samples() == 0 {
+                    break;
+                }
+                push_to_sample_array(&resampled, &mut sample_array);
+            }
+        };
+    }
+    Ok(sample_array)
+}
+
+impl Song {
+    #[allow(dead_code)]
+    pub fn distance(&self, other: &Self) -> f32 {
+        let a1 = arr1(&self.analysis.to_vec());
+        let a2 = arr1(&other.analysis.to_vec());
+        // Could be any square symmetric positive semi-definite matrix;
+        // just no metric learning has been done yet.
+        // See https://lelele.io/thesis.pdf chapter 4.
+        let m = Array::eye(self.analysis.len());
+
+        (arr1(&self.analysis) - &a2).dot(&m).dot(&(&a1 - &a2))
+    }
+
+    pub fn new(path: &str) -> Result<Self, BlissError> {
+        let raw_song = Song::decode(&path)?;
+
+        Ok(Song {
+            path: raw_song.path,
+            artist: raw_song.artist,
+            title: raw_song.title,
+            album: raw_song.album,
+            track_number: raw_song.track_number,
+            genre: raw_song.genre,
+            analysis: Song::analyse(raw_song.sample_array)?,
+        })
+    }
+
+    /**
+     * Analyse a song decoded in `sample_array`, with one channel @ 22050 Hz.
+     *
+     * The current implementation doesn't make use of it,
+     * but the song can also be streamed wrt.
+     * each descriptor (with the exception of the chroma descriptor which
+     * yields worse results when streamed).
+     *
+     * Useful in the rare cases where the full song is not
+     * completely available.
+     **/
+    fn analyse(sample_array: Vec<f32>) -> Result<Vec<f32>, BlissError> {
+        let largest_window = vec![
+            BPMDesc::WINDOW_SIZE,
+            ChromaDesc::WINDOW_SIZE,
+            SpectralDesc::WINDOW_SIZE,
+            LoudnessDesc::WINDOW_SIZE,
+        ]
+        .into_iter()
+        .max()
+        .unwrap();
+        if sample_array.len() < largest_window {
+            return Err(BlissError::AnalysisError(String::from(
+                "empty or too short song.",
+            )));
+        }
+
+        thread::scope(|s| {
+            let child_tempo: thread::ScopedJoinHandle<'_, Result<f32, BlissError>> =
+                s.spawn(|_| {
+                    let mut tempo_desc = BPMDesc::new(SAMPLE_RATE)?;
+                    let windows = sample_array
+                        .windows(BPMDesc::WINDOW_SIZE)
+                        .step_by(BPMDesc::HOP_SIZE);
+
+                    for window in windows {
+                        tempo_desc.do_(&window)?;
+                    }
+                    Ok(tempo_desc.get_value())
+                });
+
+            let child_chroma: thread::ScopedJoinHandle<'_, Result<Vec<f32>, BlissError>> =
+                s.spawn(|_| {
+                    let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
+                    chroma_desc.do_(&sample_array)?;
+                    Ok(chroma_desc.get_values())
+                });
+
+            let child_timbral: thread::ScopedJoinHandle<
+                '_,
+                Result<(Vec<f32>, Vec<f32>, Vec<f32>), BlissError>,
+            > = s.spawn(|_| {
+                let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE)?;
+                let windows = sample_array
+                    .windows(SpectralDesc::WINDOW_SIZE)
+                    .step_by(SpectralDesc::HOP_SIZE);
+                for window in windows {
+                    spectral_desc.do_(&window)?;
+                }
+                let centroid = spectral_desc.get_centroid();
+                let rolloff = spectral_desc.get_rolloff();
+                let flatness = spectral_desc.get_flatness();
+                Ok((centroid, rolloff, flatness))
+            });
+
+            let child_zcr: thread::ScopedJoinHandle<'_, Result<f32, BlissError>> = s.spawn(|_| {
+                let mut zcr_desc = ZeroCrossingRateDesc::default();
+                zcr_desc.do_(&sample_array);
+                Ok(zcr_desc.get_value())
+            });
+
+            let child_loudness: thread::ScopedJoinHandle<'_, Result<Vec<f32>, BlissError>> = s
+                .spawn(|_| {
+                    let mut loudness_desc = LoudnessDesc::default();
+                    let windows = sample_array.chunks(LoudnessDesc::WINDOW_SIZE);
+
+                    for window in windows {
+                        loudness_desc.do_(&window);
+                    }
+                    Ok(loudness_desc.get_value())
+                });
+
+            // Non-streaming approach for that one
+            let tempo = child_tempo.join().unwrap()?;
+            let chroma = child_chroma.join().unwrap()?;
+            let (centroid, rolloff, flatness) = child_timbral.join().unwrap()?;
+            let loudness = child_loudness.join().unwrap()?;
+            let zcr = child_zcr.join().unwrap()?;
+
+            let mut result = vec![tempo, zcr];
+            result.extend_from_slice(&centroid);
+            result.extend_from_slice(&rolloff);
+            result.extend_from_slice(&flatness);
+            result.extend_from_slice(&loudness);
+            result.extend_from_slice(&chroma);
+            Ok(result)
+        })
+        .unwrap()
+    }
+
+    pub(crate) fn decode(path: &str) -> Result<InternalSong, BlissError> {
+        ffmpeg::init()
+            .map_err(|e| BlissError::DecodingError(format!("ffmpeg init error: {:?}.", e)))?;
+        log::set_level(Level::Quiet);
+        let mut song = InternalSong {
+            path: path.to_string(),
+            ..Default::default()
+        };
+        let mut format = ffmpeg::format::input(&path)
+            .map_err(|e| BlissError::DecodingError(format!("while opening format: {:?}.", e)))?;
+        let (mut codec, stream, expected_sample_number) = {
+            let stream = format
+                .streams()
+                .find(|s| s.codec().medium() == ffmpeg::media::Type::Audio)
+                .ok_or(BlissError::DecodingError(String::from(
+                    "No audio stream found.",
+                )))?;
+            stream.codec().set_threading(Config {
+                kind: ThreadingType::Frame,
+                count: 0,
+                safe: true,
+            });
+            let codec =
+                stream.codec().decoder().audio().map_err(|e| {
+                    BlissError::DecodingError(format!("when finding codec: {:?}.", e))
+                })?;
+            // Add SAMPLE_RATE to have one second margin to avoid reallocating if
+            // the duration is slightly more than estimated
+            // TODO>1.0 another way to get the exact number of samples is to decode
+            // everything once, compute the real number of samples from that,
+            // allocate the array with that number, and decode again. Check
+            // what's faster between reallocating, and just have one second
+            // leeway.
+            let expected_sample_number = (SAMPLE_RATE as f32 * stream.duration() as f32
+                / stream.time_base().denominator() as f32)
+                .ceil()
+                + SAMPLE_RATE as f32;
+            (codec, stream.index(), expected_sample_number)
+        };
+        let sample_array: Vec<f32> = Vec::with_capacity(expected_sample_number as usize);
+        if let Some(title) = format.metadata().get("title") {
+            song.title = title.to_string();
+        };
+        if let Some(artist) = format.metadata().get("artist") {
+            song.artist = artist.to_string();
+        };
+        if let Some(album) = format.metadata().get("album") {
+            song.album = album.to_string();
+        };
+        if let Some(genre) = format.metadata().get("genre") {
+            song.genre = genre.to_string();
+        };
+        if let Some(track_number) = format.metadata().get("track") {
+            song.track_number = track_number.to_string();
+        };
+        let in_channel_layout = {
+            if codec.channel_layout() == ChannelLayout::empty() {
+                ChannelLayout::default(codec.channels().into())
+            } else {
+                codec.channel_layout()
+            }
+        };
+        codec.set_channel_layout(in_channel_layout);
+        let resample_context = ffmpeg::software::resampling::context::Context::get(
+            codec.format(),
+            in_channel_layout,
+            codec.rate(),
+            Sample::F32(Type::Packed),
+            ffmpeg::util::channel_layout::ChannelLayout::MONO,
+            SAMPLE_RATE,
+        )
+        .map_err(|e| {
+            BlissError::DecodingError(format!(
+                "while trying to allocate resampling context: {:?}",
+                e
+            ))
+        })?;
+
+        let (tx, rx) = mpsc::channel();
+        let child = std_thread::spawn(move || resample_frame(rx, resample_context, sample_array));
+        for (s, packet) in format.packets() {
+            if s.index() != stream {
+                continue;
+            }
+            match codec.send_packet(&packet) {
+                Ok(_) => (),
+                Err(Error::Other { errno: EINVAL }) => {
+                    return Err(BlissError::DecodingError(String::from(
+                        "wrong codec opened.",
+                    )))
+                }
+                Err(Error::Eof) => {
+                    warn!("Premature EOF reached while decoding.");
+                    drop(tx);
+                    song.sample_array = child.join().unwrap()?;
+                    return Ok(song);
+                }
+                Err(e) => warn!("decoding error: {}", e),
+            };
+
+            loop {
+                let mut decoded = ffmpeg::frame::Audio::empty();
+                match codec.receive_frame(&mut decoded) {
+                    Ok(_) => {
+                        tx.send(decoded).map_err(|e| {
+                            BlissError::DecodingError(format!(
+                                "while sending decoded frame to the resampling thread: {:?}",
+                                e
+                            ))
+                        })?;
+                    }
+                    Err(_) => break,
+                }
+            }
+        }
+
+        // Flush the stream
+        let packet = ffmpeg::codec::packet::Packet::empty();
+        match codec.send_packet(&packet) {
+            Ok(_) => (),
+            Err(Error::Other { errno: EINVAL }) => {
+                return Err(BlissError::DecodingError(String::from(
+                    "wrong codec opened.",
+                )))
+            }
+            Err(Error::Eof) => {
+                warn!("Premature EOF reached while decoding.");
+                drop(tx);
+                song.sample_array = child.join().unwrap()?;
+                return Ok(song);
+            }
+            Err(e) => warn!("decoding error: {}", e),
+        };
+
+        loop {
+            let mut decoded = ffmpeg::frame::Audio::empty();
+            match codec.receive_frame(&mut decoded) {
+                Ok(_) => {
+                    tx.send(decoded).map_err(|e| {
+                        BlissError::DecodingError(format!(
+                            "while sending decoded frame to the resampling thread: {:?}",
+                            e
+                        ))
+                    })?;
+                }
+                Err(_) => break,
+            }
+        }
+
+        drop(tx);
+        song.sample_array = child.join().unwrap()?;
+        Ok(song)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ripemd160::{Digest, Ripemd160};
+
+    #[test]
+    fn test_analysis_too_small() {
+        let error = Song::analyse(vec![0.]).unwrap_err();
+        assert_eq!(
+            error,
+            BlissError::AnalysisError(String::from("empty or too short song."))
+        );
+
+        let error = Song::analyse(vec![]).unwrap_err();
+        assert_eq!(
+            error,
+            BlissError::AnalysisError(String::from("empty or too short song."))
+        );
+    }
+
+    #[test]
+    fn test_analyse() {
+        let song = Song::new("data/s16_mono_22_5kHz.flac").unwrap();
+        let expected_analysis = vec![
+            0.3846389,
+            -0.849141,
+            -0.75481045,
+            -0.8790748,
+            -0.63258266,
+            -0.7258959,
+            -0.775738,
+            -0.8146726,
+            0.2716726,
+            0.25779057,
+            -0.35661936,
+            -0.63578653,
+            -0.29593682,
+            0.06421304,
+            0.21852458,
+            -0.581239,
+            -0.9466835,
+            -0.9481153,
+            -0.9820945,
+            -0.95968974,
+        ];
+        for (x, y) in song.analysis.iter().zip(expected_analysis) {
+            assert!(0.01 > (x - y).abs());
+        }
+    }
+
+    fn _test_decode(path: &str, expected_hash: &[u8]) {
+        let song = Song::decode(path).unwrap();
+        let mut hasher = Ripemd160::new();
+        for sample in song.sample_array.iter() {
+            hasher.update(sample.to_le_bytes().to_vec());
+        }
+
+        assert_eq!(expected_hash, hasher.finalize().as_slice());
+    }
+
+    #[test]
+    fn test_tags() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        assert_eq!(song.artist, "David TMX");
+        assert_eq!(song.title, "Renaissance");
+        assert_eq!(song.album, "Renaissance");
+        assert_eq!(song.track_number, "02");
+        assert_eq!(song.genre, "Pop");
+    }
+
+    #[test]
+    fn test_resample_multi() {
+        let path = String::from("data/s32_stereo_44_1_kHz.flac");
+        let expected_hash = [
+            0xc5, 0xf8, 0x23, 0xce, 0x63, 0x2c, 0xf4, 0xa0, 0x72, 0x66, 0xbb, 0x49, 0xad, 0x84,
+            0xb6, 0xea, 0x48, 0x48, 0x9c, 0x50,
+        ];
+        _test_decode(&path, &expected_hash);
+    }
+
+    #[test]
+    fn test_resample_stereo() {
+        let path = String::from("data/s16_stereo_22_5kHz.flac");
+        let expected_hash = [
+            0x24, 0xed, 0x45, 0x58, 0x06, 0xbf, 0xfb, 0x05, 0x57, 0x5f, 0xdc, 0x4d, 0xb4, 0x9b,
+            0xa5, 0x2b, 0x05, 0x56, 0x10, 0x4f,
+        ];
+        _test_decode(&path, &expected_hash);
+    }
+
+    #[test]
+    fn test_decode_mono() {
+        let path = String::from("data/s16_mono_22_5kHz.flac");
+        // Obtained through
+        // ffmpeg -i data/s16_mono_22_5kHz.flac -ar 22050 -ac 1 -c:a pcm_f32le
+        // -f hash -hash ripemd160 -
+        let expected_hash = [
+            0x9d, 0x95, 0xa5, 0xf2, 0xd2, 0x9c, 0x68, 0xe8, 0x8a, 0x70, 0xcd, 0xf3, 0x54, 0x2c,
+            0x5b, 0x45, 0x98, 0xb4, 0xf3, 0xb4,
+        ];
+        _test_decode(&path, &expected_hash);
+    }
+
+    #[test]
+    fn test_decode_mp3() {
+        let path = String::from("data/s32_stereo_44_1_kHz.mp3");
+        // Obtained through
+        // ffmpeg -i data/s16_mono_22_5kHz.mp3 -ar 22050 -ac 1 -c:a pcm_f32le
+        // -f hash -hash ripemd160 -
+        let expected_hash = [
+            0x28, 0x25, 0x6b, 0x7b, 0x6e, 0x37, 0x1c, 0xcf, 0xc7, 0x06, 0xdf, 0x62, 0x8c, 0x0e,
+            0x91, 0xf7, 0xd6, 0x1f, 0xac, 0x5b,
+        ];
+        _test_decode(&path, &expected_hash);
+    }
+
+    #[test]
+    fn test_dont_panic_no_channel_layout() {
+        let path = String::from("data/no_channel.wav");
+        Song::decode(&path).unwrap();
+    }
+
+    #[test]
+    fn test_decode_right_capacity_vec() {
+        let path = String::from("data/s16_mono_22_5kHz.flac");
+        let song = Song::decode(&path).unwrap();
+        let sample_array = song.sample_array;
+        assert_eq!(
+            sample_array.len() + SAMPLE_RATE as usize,
+            sample_array.capacity()
+        );
+
+        let path = String::from("data/s32_stereo_44_1_kHz.flac");
+        let song = Song::decode(&path).unwrap();
+        let sample_array = song.sample_array;
+        assert_eq!(
+            sample_array.len() + SAMPLE_RATE as usize,
+            sample_array.capacity()
+        );
+
+        let path = String::from("data/capacity_fix.ogg");
+        let song = Song::decode(&path).unwrap();
+        let sample_array = song.sample_array;
+        assert!(sample_array.len() as f32 / sample_array.capacity() as f32 > 0.90);
+        assert!(sample_array.len() as f32 / (sample_array.capacity() as f32) < 1.);
+    }
+
+    #[test]
+    fn test_analysis_distance() {
+        let mut a = Song::default();
+        a.analysis = vec![
+            0.37860596,
+            -0.75483,
+            -0.85036564,
+            -0.6326486,
+            -0.77610075,
+            0.27126348,
+            -1.,
+            0.,
+            1.,
+        ];
+
+        let mut b = Song::default();
+        b.analysis = vec![
+            0.31255,
+            0.15483,
+            -0.15036564,
+            -0.0326486,
+            -0.87610075,
+            -0.27126348,
+            1.,
+            0.,
+            1.,
+        ];
+        assert_eq!(a.distance(&b), 5.986180)
+    }
+
+    #[test]
+    fn test_analysis_distance_indiscernible() {
+        let mut a = Song::default();
+        a.analysis = vec![
+            0.37860596,
+            -0.75483,
+            -0.85036564,
+            -0.6326486,
+            -0.77610075,
+            0.27126348,
+            -1.,
+            0.,
+            1.,
+        ];
+
+        assert_eq!(a.distance(&a), 0.)
+    }
+
+    #[test]
+    fn test_decode_errors() {
+        assert_eq!(
+            Song::decode("nonexistent").unwrap_err(),
+            BlissError::DecodingError(String::from(
+                "while opening format: ffmpeg::Error(2: No such file or directory)."
+            )),
+        );
+        assert_eq!(
+            Song::decode("data/picture.png").unwrap_err(),
+            BlissError::DecodingError(String::from("No audio stream found.")),
+        );
+    }
+}
+
+#[cfg(all(feature = "bench", test))]
+mod bench {
+    extern crate test;
+    use crate::Song;
+    use test::Bencher;
+
+    #[bench]
+    fn bench_resample_multi(b: &mut Bencher) {
+        let path = String::from("./data/s32_stereo_44_1_kHz.flac");
+        b.iter(|| {
+            Song::decode(&path).unwrap();
+        });
+    }
+}
--- a/src/temporal.rs
+++ b/src/temporal.rs
@ -0,0 +1,158 @@
+//! Temporal feature extraction module.
+//!
+//! Contains functions to extract & summarize the temporal aspects
+//! of a given Song.
+extern crate aubio_lib;
+
+use crate::utils::Normalize;
+use crate::BlissError;
+use aubio_rs::{OnsetMode, Tempo};
+use log::warn;
+use ndarray::arr1;
+use ndarray_stats::interpolate::Midpoint;
+use ndarray_stats::Quantile1dExt;
+use noisy_float::prelude::*;
+
+/**
+ * Beats per minutes ([BPM](https://en.wikipedia.org/wiki/Tempo#Measurement))
+ * detection object.
+ *
+ * It indicates the (subjective) "speed" of a music piece. The higher the BPM,
+ * the "quicker" the song will feel.
+ *
+ * It uses `WPhase`, a phase-deviation onset detection function to perform
+ * onset detection; it proved to be the best for finding out the BPM of a panel
+ * of songs I had, but it could very well be replaced by something better in the
+ * future.
+ *
+ * Ranges from 0 (theoretically...) to 206 BPM. (Even though aubio apparently
+ * has trouble to identify tempo > 190 BPM - did not investigate too much)
+ *
+ */
+pub(crate) struct BPMDesc {
+    aubio_obj: Tempo,
+    bpms: Vec<f32>,
+}
+
+// TODO>1.0 use the confidence value to discard this descriptor if confidence
+// is too low.
+impl BPMDesc {
+    pub const WINDOW_SIZE: usize = 512;
+    pub const HOP_SIZE: usize = BPMDesc::WINDOW_SIZE / 2;
+
+    pub fn new(sample_rate: u32) -> Result<Self, BlissError> {
+        Ok(BPMDesc {
+            aubio_obj: Tempo::new(
+                OnsetMode::SpecFlux,
+                BPMDesc::WINDOW_SIZE,
+                BPMDesc::HOP_SIZE,
+                sample_rate,
+            )
+            .map_err(|e| {
+                BlissError::AnalysisError(format!(
+                    "error while loading aubio tempo object: {}",
+                    e.to_string()
+                ))
+            })?,
+            bpms: Vec::new(),
+        })
+    }
+
+    pub fn do_(&mut self, chunk: &[f32]) -> Result<(), BlissError> {
+        let result = self.aubio_obj.do_result(chunk).map_err(|e| {
+            BlissError::AnalysisError(format!(
+                "aubio error while computing tempo {}",
+                e.to_string()
+            ))
+        })?;
+
+        if result > 0. {
+            self.bpms.push(self.aubio_obj.get_bpm());
+        }
+        Ok(())
+    }
+
+    /**
+     * Compute score related to tempo.
+     * Right now, basically returns the song's BPM.
+     *
+     * - `song` Song to compute score from
+     */
+    pub fn get_value(&mut self) -> f32 {
+        if self.bpms.is_empty() {
+            warn!("Set tempo value to zero because no beats were found.");
+            return -1.;
+        }
+        let median = arr1(&self.bpms)
+            .mapv(n32)
+            .quantile_mut(n64(0.5), &Midpoint)
+            .unwrap();
+        self.normalize(median.into())
+    }
+}
+
+impl Normalize for BPMDesc {
+    // See aubio/src/tempo/beattracking.c:387
+    // Should really be 413, needs testing
+    const MAX_VALUE: f32 = 206.;
+    const MIN_VALUE: f32 = 0.;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{Song, SAMPLE_RATE};
+
+    #[test]
+    fn test_tempo_real() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut tempo_desc = BPMDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(BPMDesc::HOP_SIZE) {
+            tempo_desc.do_(&chunk).unwrap();
+        }
+        assert!(0.01 > (0.378605 - tempo_desc.get_value()).abs());
+    }
+
+    #[test]
+    fn test_tempo_artificial() {
+        let mut tempo_desc = BPMDesc::new(22050).unwrap();
+        // This gives one beat every second, so 60 BPM
+        let mut one_chunk = vec![0.; 22000];
+        one_chunk.append(&mut vec![1.; 100]);
+        let chunks = std::iter::repeat(one_chunk.iter())
+            .take(100)
+            .flatten()
+            .cloned()
+            .collect::<Vec<f32>>();
+        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
+            tempo_desc.do_(&chunk).unwrap();
+        }
+
+        // -0.41 is 60 BPM normalized
+        assert!(0.01 > (-0.416853 - tempo_desc.get_value()).abs());
+    }
+
+    #[test]
+    fn test_tempo_boundaries() {
+        let mut tempo_desc = BPMDesc::new(10).unwrap();
+        let silence_chunk = vec![0.; 1024];
+        tempo_desc.do_(&silence_chunk).unwrap();
+        assert_eq!(-1., tempo_desc.get_value());
+
+        let mut tempo_desc = BPMDesc::new(22050).unwrap();
+        // The highest value I could obtain was with these params, even though
+        // apparently the higher bound is 206 BPM, but here I found ~189 BPM.
+        let mut one_chunk = vec![0.; 6989];
+        one_chunk.append(&mut vec![1.; 20]);
+        let chunks = std::iter::repeat(one_chunk.iter())
+            .take(500)
+            .flatten()
+            .cloned()
+            .collect::<Vec<f32>>();
+        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
+            tempo_desc.do_(&chunk).unwrap();
+        }
+        // 0.86 is 192BPM normalized
+        assert!(0.01 > (0.86 - tempo_desc.get_value()).abs());
+    }
+}
--- a/src/timbral.rs
+++ b/src/timbral.rs
@ -0,0 +1,463 @@
+//! Timbral feature extraction module.
+//!
+//! Contains functions to extract & summarize the zero-crossing rate,
+//! spectral centroid, spectral flatness and spectral roll-off of
+//! a given Song.
+extern crate aubio_lib;
+
+use aubio_rs::vec::CVec;
+use aubio_rs::{bin_to_freq, PVoc, SpecDesc, SpecShape};
+use ndarray::{arr1, Axis};
+
+use super::utils::{geometric_mean, mean, number_crossings, Normalize};
+use crate::{BlissError, SAMPLE_RATE};
+
+/**
+ * General object holding all the spectral descriptor.
+ *
+ * Holds 3 spectral descriptors together. It would be better conceptually
+ * to have 3 different spectral descriptor objects, but this avoids re-computing
+ * the same FFT three times.
+ *
+ * Current spectral descriptors are spectral centroid, spectral rolloff and
+ * spectral flatness (see `values_object` for a further description of the
+ * object.
+ *
+ * All descriptors are currently summarized by their mean only.
+ */
+pub(crate) struct SpectralDesc {
+    phase_vocoder: PVoc,
+    sample_rate: u32,
+
+    centroid_aubio_desc: SpecDesc,
+    rolloff_aubio_desc: SpecDesc,
+    values_centroid: Vec<f32>,
+    values_rolloff: Vec<f32>,
+    values_flatness: Vec<f32>,
+}
+
+impl SpectralDesc {
+    pub const WINDOW_SIZE: usize = 512;
+    pub const HOP_SIZE: usize = SpectralDesc::WINDOW_SIZE / 4;
+
+    /**
+     * Compute score related to the
+     * [spectral centroid](https://en.wikipedia.org/wiki/Spectral_centroid) values,
+     * obtained after repeatedly calling `do_` on all of the song's chunks.
+     *
+     * Spectral centroid is used to determine the "brightness" of a sound, i.e.
+     * how much high frequency there is in an audio signal.
+     *
+     * It of course depends of the instrument used: a piano-only track that makes
+     * use of high frequencies will still score less than a song using a lot of
+     * percussive sound, because the piano frequency range is lower.
+     *
+     * The value range is between 0 and `sample_rate / 2`.
+     */
+    pub fn get_centroid(&mut self) -> Vec<f32> {
+        vec![
+            self.normalize(mean(&self.values_centroid)),
+            self.normalize(
+                arr1(&self.values_centroid)
+                    .std_axis(Axis(0), 0.)
+                    .into_scalar(),
+            ),
+        ]
+    }
+
+    /**
+     * Compute score related to the spectral roll-off values, obtained
+     * after repeatedly calling `do_` on all of the song's chunks.
+     *
+     * Spectral roll-off is the bin frequency number below which a certain
+     * percentage of the spectral energy is found, here, 95%.
+     *
+     * It can be used to distinguish voiced speech (low roll-off) and unvoiced
+     * speech (high roll-off). It is also a good indication of the energy
+     * repartition of a song.
+     *
+     * The value range is between 0 and `sample_rate / 2`
+     */
+    pub fn get_rolloff(&mut self) -> Vec<f32> {
+        vec![
+            self.normalize(mean(&self.values_rolloff)),
+            self.normalize(
+                arr1(&self.values_rolloff)
+                    .std_axis(Axis(0), 0.)
+                    .into_scalar(),
+            ),
+        ]
+    }
+
+    /**
+     * Compute score related to the
+     * [spectral flatness](https://en.wikipedia.org/wiki/Spectral_flatness) values,
+     * obtained after repeatedly calling `do_` on all of the song's chunks.
+     *
+     * Spectral flatness is the ratio between the geometric mean of the spectrum
+     * and its arithmetic mean.
+     *
+     * It is used to distinguish between tone-like and noise-like signals.
+     * Tone-like audio is f.ex. a piano key, something that has one or more
+     * specific frequencies, while (white) noise has an equal distribution
+     * of intensity among all frequencies.
+     *
+     * The value range is between 0 and 1, since the geometric mean is always less
+     * than the arithmetic mean.
+     */
+    pub fn get_flatness(&mut self) -> Vec<f32> {
+        let max_value = 1.;
+        let min_value = 0.;
+        // Range is different from the other spectral algorithms, so normalizing
+        // manually here.
+        vec![
+            2. * (mean(&self.values_flatness) - min_value) / (max_value - min_value) - 1.,
+            2. * (arr1(&self.values_flatness)
+                .std_axis(Axis(0), 0.)
+                .into_scalar()
+                - min_value)
+                / (max_value - min_value)
+                - 1.,
+        ]
+    }
+
+    pub fn new(sample_rate: u32) -> Result<Self, BlissError> {
+        Ok(SpectralDesc {
+            centroid_aubio_desc: SpecDesc::new(SpecShape::Centroid, SpectralDesc::WINDOW_SIZE)
+                .map_err(|e| {
+                    BlissError::AnalysisError(format!(
+                        "error while loading aubio centroid object: {}",
+                        e.to_string()
+                    ))
+                })?,
+            rolloff_aubio_desc: SpecDesc::new(SpecShape::Rolloff, SpectralDesc::WINDOW_SIZE)
+                .map_err(|e| {
+                    BlissError::AnalysisError(format!(
+                        "error while loading aubio rolloff object: {}",
+                        e.to_string()
+                    ))
+                })?,
+            phase_vocoder: PVoc::new(SpectralDesc::WINDOW_SIZE, SpectralDesc::HOP_SIZE).map_err(
+                |e| {
+                    BlissError::AnalysisError(format!(
+                        "error while loading aubio pvoc object: {}",
+                        e.to_string()
+                    ))
+                },
+            )?,
+            values_centroid: Vec::new(),
+            values_rolloff: Vec::new(),
+            values_flatness: Vec::new(),
+            sample_rate,
+        })
+    }
+
+    /**
+     * Compute all the descriptors' value for the given chunk.
+     *
+     * After using this on all the song's chunks, you can call
+     * `get_centroid`, `get_flatness` and `get_rolloff` to get the respective
+     * descriptors' values.
+     */
+    pub fn do_(&mut self, chunk: &[f32]) -> Result<(), BlissError> {
+        let mut fftgrain: Vec<f32> = vec![0.0; SpectralDesc::WINDOW_SIZE];
+        self.phase_vocoder
+            .do_(chunk, fftgrain.as_mut_slice())
+            .map_err(|e| {
+                BlissError::AnalysisError(format!(
+                    "error while processing aubio pv object: {}",
+                    e.to_string()
+                ))
+            })?;
+
+        let bin = self
+            .centroid_aubio_desc
+            .do_result(fftgrain.as_slice())
+            .map_err(|e| {
+                BlissError::AnalysisError(format!(
+                    "error while processing aubio centroid object: {}",
+                    e.to_string()
+                ))
+            })?;
+
+        let freq = bin_to_freq(
+            bin,
+            self.sample_rate as f32,
+            SpectralDesc::WINDOW_SIZE as f32,
+        );
+        self.values_centroid.push(freq);
+
+        let mut bin = self
+            .rolloff_aubio_desc
+            .do_result(fftgrain.as_slice())
+            .unwrap();
+
+        // Until https://github.com/aubio/aubio/pull/318 is in
+        if bin > SpectralDesc::WINDOW_SIZE as f32 / 2. {
+            bin = SpectralDesc::WINDOW_SIZE as f32 / 2.;
+        }
+
+        let freq = bin_to_freq(
+            bin,
+            self.sample_rate as f32,
+            SpectralDesc::WINDOW_SIZE as f32,
+        );
+        self.values_rolloff.push(freq);
+
+        let cvec: CVec = fftgrain.as_slice().into();
+        let geo_mean = geometric_mean(&cvec.norm());
+        if geo_mean == 0.0 {
+            self.values_flatness.push(0.0);
+            return Ok(());
+        }
+        let flatness = geo_mean / mean(&cvec.norm());
+        self.values_flatness.push(flatness);
+        Ok(())
+    }
+}
+
+impl Normalize for SpectralDesc {
+    const MAX_VALUE: f32 = SAMPLE_RATE as f32 / 2.;
+    const MIN_VALUE: f32 = 0.;
+}
+
+/**
+ * [Zero-crossing rate](https://en.wikipedia.org/wiki/Zero-crossing_rate)
+ * detection object.
+ *
+ * Zero-crossing rate is mostly used to detect percussive sounds in an audio
+ * signal, as well as whether an audio signal contains speech or not.
+ *
+ * It is a good metric to differentiate between songs with people speaking clearly,
+ * (e.g. slam) and instrumental songs.
+ *
+ * The value range is between 0 and 1.
+ */
+#[derive(Default)]
+pub(crate) struct ZeroCrossingRateDesc {
+    values: Vec<u32>,
+    number_samples: usize,
+}
+
+impl ZeroCrossingRateDesc {
+    #[allow(dead_code)]
+    pub fn new(_sample_rate: u32) -> Self {
+        ZeroCrossingRateDesc::default()
+    }
+
+    /// Count the number of zero-crossings for the current `chunk`.
+    pub fn do_(&mut self, chunk: &[f32]) {
+        self.values.push(number_crossings(chunk));
+        self.number_samples += chunk.len();
+    }
+
+    /// Sum the number of zero-crossings witnessed and divide by
+    /// the total number of samples.
+    pub fn get_value(&mut self) -> f32 {
+        self.normalize((self.values.iter().sum::<u32>()) as f32 / self.number_samples as f32)
+    }
+}
+
+impl Normalize for ZeroCrossingRateDesc {
+    const MAX_VALUE: f32 = 1.;
+    const MIN_VALUE: f32 = 0.;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Song;
+
+    #[test]
+    fn test_zcr_boundaries() {
+        let mut zcr_desc = ZeroCrossingRateDesc::default();
+        let chunk = vec![0.; 1024];
+        zcr_desc.do_(&chunk);
+        assert_eq!(-1., zcr_desc.get_value());
+
+        let one_chunk = vec![-1., 1.];
+        let chunks = std::iter::repeat(one_chunk.iter())
+            .take(512)
+            .flatten()
+            .cloned()
+            .collect::<Vec<f32>>();
+        let mut zcr_desc = ZeroCrossingRateDesc::default();
+        zcr_desc.do_(&chunks);
+        assert!(0.001 > (0.9980469 - zcr_desc.get_value()).abs());
+    }
+
+    #[test]
+    fn test_zcr() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut zcr_desc = ZeroCrossingRateDesc::default();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            zcr_desc.do_(&chunk);
+        }
+        assert!(0.001 > (-0.85036 - zcr_desc.get_value()).abs());
+    }
+
+    #[test]
+    fn test_spectral_flatness_boundaries() {
+        let mut spectral_desc = SpectralDesc::new(10).unwrap();
+        let chunk = vec![0.; 1024];
+
+        let expected_values = vec![-1., -1.];
+        spectral_desc.do_(&chunk).unwrap();
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_flatness().iter())
+        {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+
+        let song = Song::decode("data/white_noise.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(22050).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        // White noise - as close to 1 as possible
+        let expected_values = vec![0.6706717, -0.9685736];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_flatness().iter())
+        {
+            assert!(0.001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_spectral_flatness() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        // Spectral flatness mean value computed here with phase vocoder before normalization: 0.111949615
+        // Essentia value with spectrum / hann window: 0.11197535695207445
+        let expected_values = vec![-0.77610075, -0.8148179];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_flatness().iter())
+        {
+            assert!(0.01 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_spectral_roll_off_boundaries() {
+        let mut spectral_desc = SpectralDesc::new(10).unwrap();
+        let chunk = vec![0.; 512];
+
+        let expected_values = vec![-1., -1.];
+        spectral_desc.do_(&chunk).unwrap();
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_rolloff().iter())
+        {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+
+        let song = Song::decode("data/tone_11080Hz.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        let expected_values = vec![0.9967681, -0.99615175];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_rolloff().iter())
+        {
+            assert!(0.0001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_spectral_roll_off() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        let expected_values = vec![-0.6326486, -0.7260933];
+        // Roll-off mean value computed here with phase vocoder before normalization: 2026.7644
+        // Essentia value with spectrum / hann window: 1979.632683520047
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_rolloff().iter())
+        {
+            assert!(0.01 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_spectral_centroid() {
+        let song = Song::decode("data/s16_mono_22_5kHz.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        // Spectral centroid mean value computed here with phase vocoder before normalization: 1354.2273
+        // Essentia value with spectrum / hann window: 1351
+        let expected_values = vec![-0.75483, -0.87916887];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_centroid().iter())
+        {
+            assert!(0.0001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_spectral_centroid_boundaries() {
+        let mut spectral_desc = SpectralDesc::new(10).unwrap();
+        let chunk = vec![0.; 512];
+
+        spectral_desc.do_(&chunk).unwrap();
+        let expected_values = vec![-1., -1.];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_centroid().iter())
+        {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+        let song = Song::decode("data/tone_11080Hz.flac").unwrap();
+        let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
+        for chunk in song.sample_array.chunks_exact(SpectralDesc::HOP_SIZE) {
+            spectral_desc.do_(&chunk).unwrap();
+        }
+        let expected_values = vec![0.97266, -0.9609926];
+        for (expected, actual) in expected_values
+            .iter()
+            .zip(spectral_desc.get_centroid().iter())
+        {
+            assert!(0.00001 > (expected - actual).abs());
+        }
+    }
+}
+
+#[cfg(all(feature = "bench", test))]
+mod bench {
+    extern crate test;
+    use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc};
+    use test::Bencher;
+
+    #[bench]
+    fn bench_spectral_desc(b: &mut Bencher) {
+        let mut spectral_desc = SpectralDesc::new(10).unwrap();
+        let chunk = vec![0.; 512];
+
+        b.iter(|| {
+            spectral_desc.do_(&chunk).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_zcr_desc(b: &mut Bencher) {
+        let mut zcr_desc = ZeroCrossingRateDesc::new(10);
+        let chunk = vec![0.; 512];
+
+        b.iter(|| {
+            zcr_desc.do_(&chunk);
+        });
+    }
+}
--- a/src/utils.rs
+++ b/src/utils.rs
@ -0,0 +1,821 @@
+extern crate rustfft;
+use ndarray::{arr1, s, Array, Array1, Array2};
+use rustfft::num_complex::Complex;
+use rustfft::num_traits::Zero;
+use rustfft::FftPlanner;
+extern crate ffmpeg_next as ffmpeg;
+use log::warn;
+use std::f32::consts::PI;
+
+fn reflect_pad(array: &[f32], pad: usize) -> Vec<f32> {
+    let prefix = array[1..=pad].iter().rev().copied().collect::<Vec<f32>>();
+    let suffix = array[(array.len() - 2) - pad + 1..array.len() - 1]
+        .iter()
+        .rev()
+        .copied()
+        .collect::<Vec<f32>>();
+    let mut output = Vec::with_capacity(prefix.len() + array.len() + suffix.len());
+
+    output.extend(prefix);
+    output.extend(array);
+    output.extend(suffix);
+    output
+}
+
+pub(crate) fn stft(signal: &[f32], window_length: usize, hop_length: usize) -> Array2<f64> {
+    // Take advantage of raw-major order to have contiguous window for the
+    // `assign`, reversing the axes to have the expected shape at the end only.
+    let mut stft = Array2::zeros((
+        (signal.len() as f32 / hop_length as f32).ceil() as usize,
+        window_length / 2 + 1,
+    ));
+    let signal = reflect_pad(&signal, window_length / 2);
+
+    // Periodic, so window_size + 1
+    let mut hann_window = Array::zeros(window_length + 1);
+    for n in 0..window_length {
+        hann_window[[n]] = 0.5 - 0.5 * f32::cos(2. * n as f32 * PI / (window_length as f32));
+    }
+    hann_window = hann_window.slice_move(s![0..window_length]);
+    let mut planner = FftPlanner::new();
+    let fft = planner.plan_fft_forward(window_length);
+
+    for (window, mut stft_col) in signal
+        .windows(window_length)
+        .step_by(hop_length)
+        .zip(stft.rows_mut())
+    {
+        let mut signal = (arr1(&window) * &hann_window).mapv(|x| Complex::new(x, 0.));
+        match signal.as_slice_mut() {
+            Some(s) => fft.process(s),
+            None => {
+                warn!("non-contiguous slice found for stft; expect slow performances.");
+                fft.process(&mut signal.to_vec());
+            }
+        };
+        stft_col.assign(
+            &signal
+                .slice(s![..window_length / 2 + 1])
+                .mapv(|x| (x.re * x.re + x.im * x.im).sqrt() as f64),
+        );
+    }
+    stft.permuted_axes((1, 0))
+}
+
+pub(crate) fn mean<T: Clone + Into<f32>>(input: &[T]) -> f32 {
+    input.iter().map(|x| x.clone().into() as f32).sum::<f32>() / input.len() as f32
+}
+
+pub(crate) trait Normalize {
+    const MAX_VALUE: f32;
+    const MIN_VALUE: f32;
+
+    fn normalize(&self, value: f32) -> f32 {
+        2. * (value - Self::MIN_VALUE) / (Self::MAX_VALUE - Self::MIN_VALUE) - 1.
+    }
+}
+
+// Essentia algorithm
+// https://github.com/MTG/essentia/blob/master/src/algorithms/temporal/zerocrossingrate.cpp
+pub(crate) fn number_crossings(input: &[f32]) -> u32 {
+    let mut crossings = 0;
+
+    let mut was_positive = input[0] > 0.;
+
+    for &sample in input {
+        let is_positive = sample > 0.;
+        if was_positive != is_positive {
+            crossings += 1;
+            was_positive = is_positive;
+        }
+    }
+
+    crossings
+}
+
+// Only works for input of size 256 (or at least of size a multiple
+// of 8), with values belonging to [0; 2^65].
+// This finely optimized geometric mean courtesy of
+// Jacques-Henri Jourdan (https://jhjourdan.mketjh.fr/)
+pub(crate) fn geometric_mean(input: &[f32]) -> f32 {
+    let mut exponents: i32 = 0;
+    let mut mantissas: f64 = 1.;
+    for ch in input.chunks_exact(8) {
+        let mut m;
+        m = (ch[0] as f64 * ch[1] as f64) * (ch[2] as f64 * ch[3] as f64);
+        m *= 3.27339060789614187e150; // 2^500 : avoid underflows and denormals
+        m *= (ch[4] as f64 * ch[5] as f64) * (ch[6] as f64 * ch[7] as f64);
+        if m == 0. {
+            return 0.;
+        }
+        exponents += (m.to_bits() >> 52) as i32;
+        mantissas *= f64::from_bits((m.to_bits() & 0xFFFFFFFFFFFFF) | 0x3FF0000000000000);
+    }
+
+    let n = input.len() as u32;
+    ((((mantissas as f32).log2() + exponents as f32) as f32) / n as f32 - (1023. + 500.) / 8.)
+        .exp2()
+}
+
+pub(crate) fn hz_to_octs_inplace(
+    frequencies: &mut Array1<f64>,
+    tuning: f64,
+    bins_per_octave: u32,
+) -> &mut Array1<f64> {
+    let a440 = 440.0 * (2_f64.powf(tuning / f64::from(bins_per_octave)) as f64);
+
+    *frequencies /= a440 / 16.;
+    frequencies.mapv_inplace(f64::log2);
+    frequencies
+}
+
+#[allow(dead_code)]
+pub(crate) fn convolve(input: &Array1<f64>, kernel: &Array1<f64>) -> Array1<f64> {
+    let mut common_length = input.len() + kernel.len();
+    if (common_length % 2) != 0 {
+        common_length -= 1;
+    }
+    let mut padded_input = Array::from_elem(common_length, Complex::zero());
+    padded_input
+        .slice_mut(s![..input.len()])
+        .assign(&input.mapv(|x| Complex::new(x, 0.)));
+    let mut padded_kernel = Array::from_elem(common_length, Complex::zero());
+    padded_kernel
+        .slice_mut(s![..kernel.len()])
+        .assign(&kernel.mapv(|x| Complex::new(x, 0.)));
+
+    let mut planner = FftPlanner::new();
+    let forward = planner.plan_fft_forward(common_length);
+    forward.process(padded_input.as_slice_mut().unwrap());
+    forward.process(padded_kernel.as_slice_mut().unwrap());
+
+    let mut multiplication = padded_input * padded_kernel;
+
+    let mut planner = FftPlanner::new();
+    let back = planner.plan_fft_inverse(common_length);
+    back.process(multiplication.as_slice_mut().unwrap());
+
+    let multiplication_length = multiplication.len() as f64;
+    let multiplication = multiplication
+        .slice_move(s![
+            (kernel.len() - 1) / 2..(kernel.len() - 1) / 2 + input.len()
+        ])
+        .mapv(|x| x.re);
+    multiplication / multiplication_length
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Song;
+    use ndarray::Array2;
+    use ndarray::{arr1, Array};
+    use ndarray_npy::ReadNpyExt;
+    use std::fs::File;
+
+    #[test]
+    fn test_convolve() {
+        let file = File::open("data/convolve.npy").unwrap();
+        let expected_convolve = Array1::<f64>::read_npy(file).unwrap();
+        let input: Array1<f64> = Array::range(0., 1000., 0.5);
+        let kernel: Array1<f64> = Array::ones(100);
+
+        let output = convolve(&input, &kernel);
+        for (expected, actual) in expected_convolve.iter().zip(output.iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+
+        let input: Array1<f64> = Array::range(0., 1000., 0.5);
+        let file = File::open("data/convolve_odd.npy").unwrap();
+        let expected_convolve = Array1::<f64>::read_npy(file).unwrap();
+        let kernel: Array1<f64> = Array::ones(99);
+
+        let output = convolve(&input, &kernel);
+        for (expected, actual) in expected_convolve.iter().zip(output.iter()) {
+            assert!(0.0000001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_mean() {
+        let numbers = vec![0.0, 1.0, 2.0, 3.0, 4.0];
+        assert_eq!(2.0, mean(&numbers));
+    }
+
+    #[test]
+    fn test_geometric_mean() {
+        let numbers = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
+        assert_eq!(0.0, geometric_mean(&numbers));
+
+        let numbers = vec![4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0];
+        assert!(0.0001 > (2.0 - geometric_mean(&numbers)).abs());
+
+        // never going to happen, but just in case
+        let numbers = vec![256., 4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0];
+        assert!(0.0001 > (3.668016172818685 - geometric_mean(&numbers)).abs());
+
+        let subnormal = vec![4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0e-40_f32];
+        assert!(0.0001 > (1.8340080864093417e-05 - geometric_mean(&subnormal)).abs());
+
+        let maximum = vec![2_f32.powi(65); 256];
+        assert!(0.0001 > (2_f32.powi(65) - geometric_mean(&maximum).abs()));
+
+        let input = [
+            0.024454033,
+            0.08809689,
+            0.44554362,
+            0.82753503,
+            0.15822093,
+            1.4442245,
+            3.6971385,
+            3.6789556,
+            1.5981572,
+            1.0172718,
+            1.4436096,
+            3.1457102,
+            2.7641108,
+            0.8395235,
+            0.24896829,
+            0.07063173,
+            0.3554194,
+            0.3520014,
+            0.7973651,
+            0.6619708,
+            0.784104,
+            0.8767957,
+            0.28738266,
+            0.04884128,
+            0.3227065,
+            0.33490747,
+            0.18588875,
+            0.13544942,
+            0.14017746,
+            0.11181582,
+            0.15263161,
+            0.22199312,
+            0.056798387,
+            0.08389257,
+            0.07000965,
+            0.20290329,
+            0.37071738,
+            0.23154318,
+            0.02334859,
+            0.013220183,
+            0.035887096,
+            0.02950549,
+            0.09033857,
+            0.17679504,
+            0.08142187,
+            0.0033268086,
+            0.012269007,
+            0.016257336,
+            0.027027424,
+            0.017253408,
+            0.017230038,
+            0.021678915,
+            0.018645158,
+            0.005417136,
+            0.0066501745,
+            0.020159671,
+            0.026623515,
+            0.0051667937,
+            0.016880387,
+            0.0099352235,
+            0.011079361,
+            0.013200151,
+            0.0053205723,
+            0.0050702896,
+            0.008130498,
+            0.009006041,
+            0.0036024998,
+            0.0064403876,
+            0.004656151,
+            0.0025131858,
+            0.0030845597,
+            0.008722531,
+            0.017871628,
+            0.022656294,
+            0.017539924,
+            0.0094395885,
+            0.00308572,
+            0.0013586166,
+            0.0027467872,
+            0.0054130103,
+            0.004140312,
+            0.00014358714,
+            0.0013718408,
+            0.004472961,
+            0.003769122,
+            0.0032591296,
+            0.00363724,
+            0.0024453322,
+            0.00059036893,
+            0.00064789865,
+            0.001745297,
+            0.0008671655,
+            0.0021562362,
+            0.0010756068,
+            0.0020091995,
+            0.0015373885,
+            0.0009846204,
+            0.00029200249,
+            0.0009211624,
+            0.0005351118,
+            0.0014912765,
+            0.0020651375,
+            0.00066112226,
+            0.00085005426,
+            0.0019005901,
+            0.0006395845,
+            0.002262803,
+            0.0030940182,
+            0.0020891617,
+            0.001215059,
+            0.0013114084,
+            0.000470959,
+            0.0006654807,
+            0.00143032,
+            0.0017918893,
+            0.00086320075,
+            0.0005604455,
+            0.00082841754,
+            0.0006694539,
+            0.000822765,
+            0.0006165758,
+            0.001189319,
+            0.0007300245,
+            0.0006237481,
+            0.0012076444,
+            0.0014746742,
+            0.002033916,
+            0.0015001699,
+            0.00052051,
+            0.00044564332,
+            0.00055846275,
+            0.00089778664,
+            0.00080524705,
+            0.00072653644,
+            0.0006730526,
+            0.0009940645,
+            0.0011093937,
+            0.0012950997,
+            0.0009826822,
+            0.0008766518,
+            0.0016549287,
+            0.00092906435,
+            0.00029130623,
+            0.00025049047,
+            0.00022848802,
+            0.00026967315,
+            0.00023737509,
+            0.0009694061,
+            0.0010638118,
+            0.00079342886,
+            0.00059083506,
+            0.0004763899,
+            0.0009516641,
+            0.00069223146,
+            0.0005571137,
+            0.0008517697,
+            0.0010710277,
+            0.0006102439,
+            0.00074687623,
+            0.00084989844,
+            0.0004958062,
+            0.000526994,
+            0.00021524922,
+            0.000096684314,
+            0.0006545544,
+            0.0012206973,
+            0.0012103583,
+            0.00092045433,
+            0.0009248435,
+            0.0008121284,
+            0.00023953256,
+            0.0009318224,
+            0.0010439663,
+            0.00048373415,
+            0.00029895222,
+            0.0004844254,
+            0.0006668295,
+            0.0009983985,
+            0.0008604897,
+            0.00018315323,
+            0.0003091808,
+            0.0005426462,
+            0.0010403915,
+            0.0007554566,
+            0.0002846017,
+            0.0006009793,
+            0.0007650569,
+            0.00056281046,
+            0.00034661655,
+            0.00023622432,
+            0.0005987106,
+            0.00029568427,
+            0.00038697806,
+            0.000584258,
+            0.0005670976,
+            0.0006136444,
+            0.0005645493,
+            0.00023538452,
+            0.0002855746,
+            0.00038535293,
+            0.00043193565,
+            0.0007312465,
+            0.0006030728,
+            0.0010331308,
+            0.0011952162,
+            0.0008245007,
+            0.00042218363,
+            0.00082176016,
+            0.001132246,
+            0.00089140673,
+            0.0006351588,
+            0.00037268156,
+            0.00023035,
+            0.0006286493,
+            0.0008061599,
+            0.00066162215,
+            0.00022713901,
+            0.00021469496,
+            0.0006654577,
+            0.000513901,
+            0.00039176678,
+            0.0010790947,
+            0.0007353637,
+            0.00017166573,
+            0.00043964887,
+            0.0002951453,
+            0.00017704708,
+            0.00018295897,
+            0.00092653604,
+            0.0008324083,
+            0.0008041684,
+            0.0011318093,
+            0.0011871496,
+            0.0008069488,
+            0.00062862475,
+            0.0005913861,
+            0.0004721823,
+            0.00016365231,
+            0.00017787657,
+            0.00042536375,
+            0.0005736993,
+            0.00043467924,
+            0.00009028294,
+            0.00017257355,
+            0.0005019574,
+            0.0006147168,
+            0.0002167805,
+            0.0001489743,
+            0.000055081473,
+            0.00029626413,
+            0.00037805567,
+            0.00014736196,
+            0.00026251364,
+            0.00016211842,
+            0.0001853477,
+            0.0001387354,
+        ];
+        assert!(0.00000001 > (0.0025750597 - geometric_mean(&input)).abs());
+    }
+
+    #[test]
+    fn test_hz_to_octs_inplace() {
+        let mut frequencies = arr1(&[32., 64., 128., 256.]);
+        let expected = arr1(&[0.16864029, 1.16864029, 2.16864029, 3.16864029]);
+
+        hz_to_octs_inplace(&mut frequencies, 0.5, 10)
+            .iter()
+            .zip(expected.iter())
+            .for_each(|(x, y)| assert!(0.0001 > (x - y).abs()));
+    }
+
+    #[test]
+    fn test_compute_stft() {
+        let file = File::open("data/librosa-stft.npy").unwrap();
+        let expected_stft = Array2::<f32>::read_npy(file).unwrap().mapv(|x| x as f64);
+
+        let song = Song::decode("data/piano.flac").unwrap();
+
+        let stft = stft(&song.sample_array, 2048, 512);
+
+        assert!(!stft.is_empty() && !expected_stft.is_empty());
+        for (expected, actual) in expected_stft.iter().zip(stft.iter()) {
+            assert!(0.0001 > (expected - actual).abs());
+        }
+    }
+
+    #[test]
+    fn test_reflect_pad() {
+        let array = Array::range(0., 100000., 1.);
+
+        let output = reflect_pad(array.as_slice().unwrap(), 3);
+        assert_eq!(&output[..4], &[3.0, 2.0, 1.0, 0.]);
+        assert_eq!(&output[3..100003], array.to_vec());
+        assert_eq!(&output[100003..100006], &[99998.0, 99997.0, 99996.0]);
+    }
+}
+
+#[cfg(all(feature = "bench", test))]
+mod bench {
+    extern crate test;
+    use super::*;
+    use crate::Song;
+    use ndarray::Array;
+    use test::Bencher;
+
+    #[bench]
+    fn bench_convolve(b: &mut Bencher) {
+        let input: Array1<f64> = Array::range(0., 1000., 0.5);
+        let kernel: Array1<f64> = Array::ones(100);
+
+        b.iter(|| {
+            convolve(&input, &kernel);
+        });
+    }
+
+    #[bench]
+    fn bench_compute_stft(b: &mut Bencher) {
+        let signal = Song::decode("data/piano.flac").unwrap().sample_array;
+
+        b.iter(|| {
+            stft(&signal, 2048, 512);
+        });
+    }
+
+    #[bench]
+    fn bench_reflect_pad(b: &mut Bencher) {
+        let array = Array::range(0., 1000000., 1.);
+
+        b.iter(|| {
+            reflect_pad(array.as_slice().unwrap(), 3);
+        });
+    }
+
+    #[bench]
+    fn bench_geometric_mean(b: &mut Bencher) {
+        let input = [
+            0.024454033,
+            0.08809689,
+            0.44554362,
+            0.82753503,
+            0.15822093,
+            1.4442245,
+            3.6971385,
+            3.6789556,
+            1.5981572,
+            1.0172718,
+            1.4436096,
+            3.1457102,
+            2.7641108,
+            0.8395235,
+            0.24896829,
+            0.07063173,
+            0.3554194,
+            0.3520014,
+            0.7973651,
+            0.6619708,
+            0.784104,
+            0.8767957,
+            0.28738266,
+            0.04884128,
+            0.3227065,
+            0.33490747,
+            0.18588875,
+            0.13544942,
+            0.14017746,
+            0.11181582,
+            0.15263161,
+            0.22199312,
+            0.056798387,
+            0.08389257,
+            0.07000965,
+            0.20290329,
+            0.37071738,
+            0.23154318,
+            0.02334859,
+            0.013220183,
+            0.035887096,
+            0.02950549,
+            0.09033857,
+            0.17679504,
+            0.08142187,
+            0.0033268086,
+            0.012269007,
+            0.016257336,
+            0.027027424,
+            0.017253408,
+            0.017230038,
+            0.021678915,
+            0.018645158,
+            0.005417136,
+            0.0066501745,
+            0.020159671,
+            0.026623515,
+            0.0051667937,
+            0.016880387,
+            0.0099352235,
+            0.011079361,
+            0.013200151,
+            0.0053205723,
+            0.0050702896,
+            0.008130498,
+            0.009006041,
+            0.0036024998,
+            0.0064403876,
+            0.004656151,
+            0.0025131858,
+            0.0030845597,
+            0.008722531,
+            0.017871628,
+            0.022656294,
+            0.017539924,
+            0.0094395885,
+            0.00308572,
+            0.0013586166,
+            0.0027467872,
+            0.0054130103,
+            0.004140312,
+            0.00014358714,
+            0.0013718408,
+            0.004472961,
+            0.003769122,
+            0.0032591296,
+            0.00363724,
+            0.0024453322,
+            0.00059036893,
+            0.00064789865,
+            0.001745297,
+            0.0008671655,
+            0.0021562362,
+            0.0010756068,
+            0.0020091995,
+            0.0015373885,
+            0.0009846204,
+            0.00029200249,
+            0.0009211624,
+            0.0005351118,
+            0.0014912765,
+            0.0020651375,
+            0.00066112226,
+            0.00085005426,
+            0.0019005901,
+            0.0006395845,
+            0.002262803,
+            0.0030940182,
+            0.0020891617,
+            0.001215059,
+            0.0013114084,
+            0.000470959,
+            0.0006654807,
+            0.00143032,
+            0.0017918893,
+            0.00086320075,
+            0.0005604455,
+            0.00082841754,
+            0.0006694539,
+            0.000822765,
+            0.0006165758,
+            0.001189319,
+            0.0007300245,
+            0.0006237481,
+            0.0012076444,
+            0.0014746742,
+            0.002033916,
+            0.0015001699,
+            0.00052051,
+            0.00044564332,
+            0.00055846275,
+            0.00089778664,
+            0.00080524705,
+            0.00072653644,
+            0.0006730526,
+            0.0009940645,
+            0.0011093937,
+            0.0012950997,
+            0.0009826822,
+            0.0008766518,
+            0.0016549287,
+            0.00092906435,
+            0.00029130623,
+            0.00025049047,
+            0.00022848802,
+            0.00026967315,
+            0.00023737509,
+            0.0009694061,
+            0.0010638118,
+            0.00079342886,
+            0.00059083506,
+            0.0004763899,
+            0.0009516641,
+            0.00069223146,
+            0.0005571137,
+            0.0008517697,
+            0.0010710277,
+            0.0006102439,
+            0.00074687623,
+            0.00084989844,
+            0.0004958062,
+            0.000526994,
+            0.00021524922,
+            0.000096684314,
+            0.0006545544,
+            0.0012206973,
+            0.0012103583,
+            0.00092045433,
+            0.0009248435,
+            0.0008121284,
+            0.00023953256,
+            0.0009318224,
+            0.0010439663,
+            0.00048373415,
+            0.00029895222,
+            0.0004844254,
+            0.0006668295,
+            0.0009983985,
+            0.0008604897,
+            0.00018315323,
+            0.0003091808,
+            0.0005426462,
+            0.0010403915,
+            0.0007554566,
+            0.0002846017,
+            0.0006009793,
+            0.0007650569,
+            0.00056281046,
+            0.00034661655,
+            0.00023622432,
+            0.0005987106,
+            0.00029568427,
+            0.00038697806,
+            0.000584258,
+            0.0005670976,
+            0.0006136444,
+            0.0005645493,
+            0.00023538452,
+            0.0002855746,
+            0.00038535293,
+            0.00043193565,
+            0.0007312465,
+            0.0006030728,
+            0.0010331308,
+            0.0011952162,
+            0.0008245007,
+            0.00042218363,
+            0.00082176016,
+            0.001132246,
+            0.00089140673,
+            0.0006351588,
+            0.00037268156,
+            0.00023035,
+            0.0006286493,
+            0.0008061599,
+            0.00066162215,
+            0.00022713901,
+            0.00021469496,
+            0.0006654577,
+            0.000513901,
+            0.00039176678,
+            0.0010790947,
+            0.0007353637,
+            0.00017166573,
+            0.00043964887,
+            0.0002951453,
+            0.00017704708,
+            0.00018295897,
+            0.00092653604,
+            0.0008324083,
+            0.0008041684,
+            0.0011318093,
+            0.0011871496,
+            0.0008069488,
+            0.00062862475,
+            0.0005913861,
+            0.0004721823,
+            0.00016365231,
+            0.00017787657,
+            0.00042536375,
+            0.0005736993,
+            0.00043467924,
+            0.00009028294,
+            0.00017257355,
+            0.0005019574,
+            0.0006147168,
+            0.0002167805,
+            0.0001489743,
+            0.000055081473,
+            0.00029626413,
+            0.00037805567,
+            0.00014736196,
+            0.00026251364,
+            0.00016211842,
+            0.0001853477,
+            0.0001387354,
+        ];
+        b.iter(|| {
+            geometric_mean(&input);
+        });
+    }
+}