From dd997510d3d12dacd09f835462b6ff3f6dbfabbd Mon Sep 17 00:00:00 2001 From: Polochon-street Date: Mon, 23 Aug 2021 17:33:17 +0200 Subject: [PATCH] Fix speed of "song to song" sorting method --- .github/workflows/rust.yml | 2 +- CHANGELOG.md | 5 +++++ Cargo.lock | 4 ++-- src/distance.rs | 43 +++++++++++++++++++++++++------------- src/library.rs | 2 +- src/song.rs | 10 ++++----- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7d10dce..a68e2f0 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -23,7 +23,7 @@ jobs: toolchain: nightly-2021-04-01 override: false - name: Packages - run: sudo apt-get install build-essential yasm libavutil-dev libavcodec-dev libavformat-dev libavfilter-dev libavfilter-dev libavdevice-dev libswresample-dev libfftw3-dev ffmpeg + run: sudo apt-get update && sudo apt-get install build-essential yasm libavutil-dev libavcodec-dev libavformat-dev libavfilter-dev libavfilter-dev libavdevice-dev libswresample-dev libfftw3-dev ffmpeg - name: Build run: cargo build --verbose - name: Run tests diff --git a/CHANGELOG.md b/CHANGELOG.md index a9a38d2..0140ed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## bliss 0.4.0 +* Make the song-to-song custom sorting method faster. +* Rename `to_vec` and `to_arr1` to `as_vec` and `as_arr1` . +* Add a playlist_dedup function. + ## bliss 0.3.5 * Add custom sorting methods for playlist-making. diff --git a/Cargo.lock b/Cargo.lock index 827d37d..15f7e57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -882,9 +882,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", "rand_chacha", diff --git a/src/distance.rs b/src/distance.rs index 0db1cd3..5e5d1db 100644 --- a/src/distance.rs +++ b/src/distance.rs @@ -12,6 +12,7 @@ use crate::Library; use crate::Song; use crate::NUMBER_FEATURES; use ndarray::{Array, Array1}; +use ndarray_stats::QuantileExt; use noisy_float::prelude::*; /// Convenience trait for user-defined distance metrics. @@ -39,34 +40,35 @@ pub fn cosine_distance(a: &Array1, b: &Array1) -> f32 { } /// Sort `songs` in place by putting songs close to `first_song` first -/// using the `distance` metric. Deduplicate identical songs. +/// using the `distance` metric. pub fn closest_to_first_song( first_song: &Song, songs: &mut Vec, distance: impl DistanceMetric, ) { songs.sort_by_cached_key(|song| n32(first_song.custom_distance(song, &distance))); - songs.dedup_by_key(|song| n32(first_song.custom_distance(song, &distance))); } /// Sort `songs` in place using the `distance` metric and ordering by -/// the smallest distance between each song. Deduplicate identical songs. +/// the smallest distance between each song. /// /// If the generated playlist is `[song1, song2, song3, song4]`, it means /// song2 is closest to song1, song3 is closest to song2, and song4 is closest /// to song3. +/// +/// Note that this has a tendency to go from one style to the other very fast, +/// and it can be slow on big libraries. pub fn song_to_song(first_song: &Song, songs: &mut Vec, distance: impl DistanceMetric) { - let mut new_songs = vec![first_song.to_owned()]; + let mut new_songs = Vec::with_capacity(songs.len()); let mut song = first_song.to_owned(); - loop { - if songs.is_empty() { - break; - } - songs - .retain(|s| n32(song.custom_distance(s, &distance)) != 0.); - songs.sort_by_key(|s| n32(song.custom_distance(s, &distance))); - song = songs.remove(0); + + while !songs.is_empty() { + let distances: Array1 = + Array::from_shape_fn(songs.len(), |i| song.custom_distance(&songs[i], &distance)); + let idx = distances.argmin().unwrap(); + song = songs[idx].to_owned(); new_songs.push(song.to_owned()); + songs.retain(|s| s != &song); } *songs = new_songs; } @@ -126,7 +128,13 @@ mod test { song_to_song(&first_song, &mut songs, euclidean_distance); assert_eq!( songs, - vec![first_song, second_song, third_song, fourth_song], + vec![ + first_song, + first_song_dupe.to_owned(), + second_song, + third_song, + fourth_song + ], ); } @@ -187,7 +195,14 @@ mod test { closest_to_first_song(&first_song, &mut songs, euclidean_distance); assert_eq!( songs, - vec![first_song, second_song, fourth_song, third_song], + vec![ + first_song, + first_song_dupe, + second_song, + fourth_song, + fifth_song, + third_song + ], ); } diff --git a/src/library.rs b/src/library.rs index c26bc42..9ad5a25 100644 --- a/src/library.rs +++ b/src/library.rs @@ -5,7 +5,7 @@ //! MPD](https://github.com/Polochon-street/blissify-rs) could also be useful. #[cfg(doc)] use crate::distance; -use crate::distance::{closest_to_first_song, DistanceMetric, euclidean_distance}; +use crate::distance::{closest_to_first_song, euclidean_distance, DistanceMetric}; use crate::{BlissError, BlissResult, Song}; use log::{debug, error, info}; use std::sync::mpsc; diff --git a/src/song.rs b/src/song.rs index abc4128..cd98517 100644 --- a/src/song.rs +++ b/src/song.rs @@ -142,7 +142,7 @@ impl fmt::Debug for Analysis { debug_struct.field(&format!("{:?}", feature), &self[feature]); } debug_struct.finish()?; - f.write_str(&format!(" /* {:?} */", &self.to_vec())) + f.write_str(&format!(" /* {:?} */", &self.as_vec())) } } @@ -161,7 +161,7 @@ impl Analysis { /// Return an ndarray `Array1` representing the analysis' features. /// /// Particularly useful if you want to make a custom distance metric. - pub fn to_arr1(&self) -> Array1 { + pub fn as_arr1(&self) -> Array1 { arr1(&self.internal_analysis) } @@ -169,7 +169,7 @@ impl Analysis { /// /// Particularly useful if you want iterate through the values to store /// them somewhere. - pub fn to_vec(&self) -> Vec { + pub fn as_vec(&self) -> Vec { self.internal_analysis.to_vec() } @@ -187,7 +187,7 @@ impl Analysis { /// Note that almost all distance metrics you will find obey these /// properties, so don't sweat it too much. pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 { - distance(&self.to_arr1(), &other.to_arr1()) + distance(&self.as_arr1(), &other.as_arr1()) } } @@ -654,7 +654,7 @@ mod tests { -0.9820945, -0.95968974, ]; - for (x, y) in song.analysis.to_vec().iter().zip(expected_analysis) { + for (x, y) in song.analysis.as_vec().iter().zip(expected_analysis) { assert!(0.01 > (x - y).abs()); } }