diff --git a/CHANGELOG.md b/CHANGELOG.md index 6de40c8..7d5c14b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ #Changelog ## bliss 0.5.0 +* Remove the unusued Library trait, and extract a few useful functions from + there (`analyze_paths`, `closest_to_album_group`. +* Rename `distance` module to `playlist`. * Remove all traces of the "analyse" word vs "analyze" to make the codebase more coherent. * Rename `Song::new` to `Song::from_path`. diff --git a/examples/playlist.rs b/examples/playlist.rs index 6e0b5eb..214152c 100644 --- a/examples/playlist.rs +++ b/examples/playlist.rs @@ -1,9 +1,9 @@ #[cfg(feature = "serde")] use anyhow::Result; #[cfg(feature = "serde")] -use bliss_audio::distance::{closest_to_first_song, dedup_playlist, euclidean_distance}; +use bliss_audio::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance}; #[cfg(feature = "serde")] -use bliss_audio::{library::analyze_paths_streaming, Song}; +use bliss_audio::{analyze_paths, Song}; #[cfg(feature = "serde")] use clap::{App, Arg}; #[cfg(feature = "serde")] @@ -66,16 +66,16 @@ fn main() -> Result<()> { .map(|x| x.to_string_lossy().to_string()) .collect::>(); - let rx = analyze_paths_streaming( + let song_iterator = analyze_paths( paths .iter() .filter(|p| !analyzed_paths.contains(&PathBuf::from(p))) .map(|p| p.to_owned()) .collect(), - )?; + ); let first_song = Song::from_path(file)?; let mut analyzed_songs = vec![first_song.to_owned()]; - for (path, result) in rx.iter() { + for (path, result) in song_iterator { match result { Ok(song) => analyzed_songs.push(song), Err(e) => println!("error analyzing {}: {}", path, e), diff --git a/src/lib.rs b/src/lib.rs index ed436f0..cf0951d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,19 +7,14 @@ //! other metadata fields (album, genre...). //! Analyzing a song is as simple as running `Song::from_path("/path/to/song")`. //! -//! The [analysis](Song::analysis) field of each song is an array of f32, which makes the -//! comparison between songs easy, by just using euclidean distance (see -//! [distance](Song::distance) for instance). +//! The [analysis](Song::analysis) field of each song is an array of f32, which +//! makes the comparison between songs easy, by just using e.g. euclidean +//! distance (see [distance](Song::distance) for instance). //! //! Once several songs have been analyzed, making a playlist from one Song //! is as easy as computing distances between that song and the rest, and ordering //! the songs by distance, ascending. //! -//! It is also convenient to make plug-ins for existing audio players. -//! It should be as easy as implementing the necessary traits for [Library]. -//! A reference implementation for the MPD player is available -//! [here](https://github.com/Polochon-street/blissify-rs) -//! //! # Examples //! //! ## Analyze & compute the distance between two songs @@ -65,9 +60,8 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] mod chroma; -pub mod distance; -pub mod library; mod misc; +pub mod playlist; mod song; mod temporal; mod timbral; @@ -78,9 +72,11 @@ extern crate num_cpus; #[cfg(feature = "serde")] #[macro_use] extern crate serde; +use log::info; +use std::sync::mpsc; +use std::thread; use thiserror::Error; -pub use library::Library; pub use song::{Analysis, AnalysisIndex, Song, NUMBER_FEATURES}; const CHANNELS: u16 = 1; @@ -94,54 +90,73 @@ pub const FEATURES_VERSION: u16 = 1; /// Umbrella type for bliss error types pub enum BlissError { #[error("error happened while decoding file – {0}")] - /// An error happened while decoding an (audio) file + /// An error happened while decoding an (audio) file. DecodingError(String), #[error("error happened while analyzing file – {0}")] - /// An error happened during the analysis of the samples by bliss + /// An error happened during the analysis of the song's samples by bliss. AnalysisError(String), #[error("error happened with the music library provider - {0}")] /// An error happened with the music library provider. - /// Useful to report errors when you implement the [Library] trait. + /// Useful to report errors when you implement bliss for an audio player. ProviderError(String), } /// bliss error type pub type BlissResult = Result; -/// Simple function to bulk analyze a set of songs represented by their -/// absolute paths. +/// Analyze songs in `paths`, and return the analyzed [Song] objects through an +/// [mpsc::IntoIter] /// -/// When making an extension for an audio player, prefer -/// implementing the `Library` trait. -#[doc(hidden)] -pub fn bulk_analyze(paths: Vec) -> Vec> { - let mut songs = Vec::with_capacity(paths.len()); +/// Returns an iterator, whose items are a tuple made of +/// the song path (to display to the user in case the analysis failed), +/// and a Result. +/// +/// * Example: +/// ```no_run +/// use bliss_audio::{analyze_paths, BlissResult}; +/// +/// fn main() -> BlissResult<()> { +/// let paths = vec![String::from("/path/to/song1"), String::from("/path/to/song2")]; +/// for (path, result) in analyze_paths(paths) { +/// match result { +/// Ok(song) => println!("Do something with analyzed song {} with title {:?}", song.path.display(), song.title), +/// Err(e) => println!("Song at {} could not be analyzed. Failed with: {}", path, e), +/// } +/// } +/// Ok(()) +/// } +/// ``` +pub fn analyze_paths(paths: Vec) -> mpsc::IntoIter<(String, BlissResult)> { let num_cpus = num_cpus::get(); - crossbeam::scope(|s| { - let mut handles = Vec::with_capacity(paths.len() / num_cpus); - let mut chunk_number = paths.len() / num_cpus; - if chunk_number == 0 { - chunk_number = paths.len(); - } - for chunk in paths.chunks(chunk_number) { - handles.push(s.spawn(move |_| { - let mut result = Vec::with_capacity(chunk.len()); - for path in chunk { - let song = Song::from_path(&path); - result.push(song); - } - result - })); - } + #[allow(clippy::type_complexity)] + let (tx, rx): ( + mpsc::Sender<(String, BlissResult)>, + mpsc::Receiver<(String, BlissResult)>, + ) = mpsc::channel(); + if paths.is_empty() { + return rx.into_iter(); + } + let mut handles = Vec::new(); + let mut chunk_length = paths.len() / num_cpus; + if chunk_length == 0 { + chunk_length = paths.len(); + } - for handle in handles { - songs.extend(handle.join().unwrap()); - } - }) - .unwrap(); + for chunk in paths.chunks(chunk_length) { + let tx_thread = tx.clone(); + let owned_chunk = chunk.to_owned(); + let child = thread::spawn(move || { + for path in owned_chunk { + info!("Analyzing file '{}'", path); + let song = Song::from_path(&path); + tx_thread.send((path.to_string(), song)).unwrap(); + } + }); + handles.push(child); + } - songs + rx.into_iter() } #[cfg(test)] @@ -161,52 +176,28 @@ mod tests { } #[test] - fn test_bulk_analyze() { - let results = bulk_analyze(vec![ - String::from("data/s16_mono_22_5kHz.flac"), - String::from("data/s16_mono_22_5kHz.flac"), - String::from("nonexistent"), - String::from("data/s16_stereo_22_5kHz.flac"), - String::from("nonexistent"), - String::from("nonexistent"), - String::from("nonexistent"), - String::from("nonexistent"), - String::from("nonexistent"), - String::from("nonexistent"), - String::from("nonexistent"), - ]); - let mut errored_songs: Vec = results - .iter() - .filter_map(|x| x.as_ref().err().map(|x| x.to_string())) - .collect(); - errored_songs.sort_by(|a, b| a.cmp(b)); - - let mut analyzed_songs: Vec = results - .iter() - .filter_map(|x| { - x.as_ref() - .ok() - .map(|x| x.path.to_str().unwrap().to_string()) + fn test_analyze_paths() { + let paths = vec![ + String::from("./data/s16_mono_22_5kHz.flac"), + String::from("./data/white_noise.flac"), + String::from("definitely-not-existing.foo"), + String::from("not-existing.foo"), + ]; + let mut results = analyze_paths(paths) + .map(|x| match &x.1 { + Ok(s) => (true, s.path.to_string_lossy().to_string()), + Err(_) => (false, x.0.to_owned()), }) - .collect(); - analyzed_songs.sort_by(|a, b| a.cmp(b)); - + .collect::>(); + results.sort(); assert_eq!( + results, vec![ - String::from( - "error happened while decoding file – while opening format: ffmpeg::Error(2: No such file or directory)." - ); - 8 + (false, String::from("definitely-not-existing.foo")), + (false, String::from("not-existing.foo")), + (true, String::from("./data/s16_mono_22_5kHz.flac")), + (true, String::from("./data/white_noise.flac")), ], - errored_songs - ); - assert_eq!( - vec![ - String::from("data/s16_mono_22_5kHz.flac"), - String::from("data/s16_mono_22_5kHz.flac"), - String::from("data/s16_stereo_22_5kHz.flac"), - ], - analyzed_songs, ); } } diff --git a/src/library.rs b/src/library.rs deleted file mode 100644 index 1591e8a..0000000 --- a/src/library.rs +++ /dev/null @@ -1,829 +0,0 @@ -//! Module containing the Library trait, useful to get started to implement -//! a plug-in for an audio player. -//! -//! Looking at the [reference implementation for -//! MPD](https://github.com/Polochon-street/blissify-rs) could also be useful. -#[cfg(doc)] -use crate::distance; -use crate::distance::{closest_to_first_song, euclidean_distance, DistanceMetric}; -use crate::{BlissError, BlissResult, Song}; -use log::{debug, error, info}; -use ndarray::{Array, Array2, Axis}; -use noisy_float::prelude::n32; -use std::collections::HashMap; -use std::sync::mpsc; -use std::sync::mpsc::{Receiver, Sender}; -use std::thread; - -/// Library trait to make creating plug-ins for existing audio players easier. -pub trait Library { - /// Return the absolute path of all the songs in an - /// audio player's music library. - fn get_songs_paths(&self) -> BlissResult>; - /// Store an analyzed Song object in some (cold) storage, e.g. - /// a database, a file... - fn store_song(&mut self, song: &Song) -> BlissResult<()>; - /// Log and / or store that an error happened while trying to decode and - /// analyze a song. - fn store_error_song(&mut self, song_path: String, error: BlissError) -> BlissResult<()>; - /// Retrieve a list of all the stored Songs. - /// - /// This should work only after having run `analyze_library` at least - /// once. - fn get_stored_songs(&self) -> BlissResult>; - - /// Return a list of `number_albums` albums that are similar - /// to `album`, discarding songs that don't belong to an album. - /// - /// # Arguments - /// - /// * `album` - The album the playlist will be built from. - /// * `number_albums` - The number of albums to queue. - /// - /// # Returns - /// - /// A vector of songs, including `first_song`, that you - /// most likely want to plug in your audio player by using something like - /// `ret.map(|song| song.path.to_owned()).collect::>()`. - fn playlist_from_songs_album( - &self, - first_album: &str, - playlist_length: usize, - ) -> BlissResult> { - let songs = self.get_stored_songs()?; - let mut albums_analysis: HashMap<&str, Array2> = HashMap::new(); - let mut albums = Vec::new(); - - for song in &songs { - if let Some(album) = &song.album { - if let Some(analysis) = albums_analysis.get_mut(album as &str) { - analysis - .push_row(song.analysis.as_arr1().view()) - .map_err(|e| { - BlissError::ProviderError(format!("while computing distances: {}", e)) - })?; - } else { - let mut array = Array::zeros((1, song.analysis.as_arr1().len())); - array.assign(&song.analysis.as_arr1()); - albums_analysis.insert(album, array); - } - } - } - let mut first_analysis = None; - for (album, analysis) in albums_analysis.iter() { - let mean_analysis = analysis - .mean_axis(Axis(0)) - .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; - let album = album.to_owned(); - albums.push((album, mean_analysis.to_owned())); - if album == first_album { - first_analysis = Some(mean_analysis); - } - } - - if first_analysis.is_none() { - return Err(BlissError::ProviderError(format!( - "Could not find album \"{}\".", - first_album - ))); - } - albums.sort_by_key(|(_, analysis)| { - n32(euclidean_distance( - first_analysis.as_ref().unwrap(), - analysis, - )) - }); - let albums = albums.get(..playlist_length).unwrap_or(&albums); - let mut playlist = Vec::new(); - for (album, _) in albums { - let mut al = songs - .iter() - .filter(|s| s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string()) - .map(|s| s.to_owned()) - .collect::>(); - al.sort_by(|s1, s2| { - let track_number1 = s1 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - let track_number2 = s2 - .track_number - .to_owned() - .unwrap_or_else(|| String::from("")); - if let Ok(x) = track_number1.parse::() { - if let Ok(y) = track_number2.parse::() { - return x.cmp(&y); - } - } - s1.track_number.cmp(&s2.track_number) - }); - playlist.extend_from_slice(&al); - } - Ok(playlist) - } - - /// Return a list of `playlist_length` songs that are similar - /// to ``first_song``, deduplicating identical songs. - /// - /// # Arguments - /// - /// * `first_song` - The song the playlist will be built from. - /// * `playlist_length` - The playlist length. If there are not enough - /// songs in the library, it will be truncated to the size of the library. - /// - /// # Returns - /// - /// A vector of `playlist_length` songs, including `first_song`, that you - /// most likely want to plug in your audio player by using something like - /// `ret.map(|song| song.path.to_owned()).collect::>()`. - // TODO return an iterator and not a Vec - fn playlist_from_song( - &self, - first_song: Song, - playlist_length: usize, - ) -> BlissResult> { - let playlist = self.playlist_from_song_custom( - first_song, - playlist_length, - euclidean_distance, - closest_to_first_song, - )?; - - debug!( - "Playlist created: {}", - playlist - .iter() - .map(|s| format!("{:?}", &s)) - .collect::>() - .join("\n"), - ); - Ok(playlist) - } - - /// Return a list of songs that are similar to ``first_song``, using a - /// custom distance metric and deduplicating indentical songs. - /// - /// # Arguments - /// - /// * `first_song` - The song the playlist will be built from. - /// * `playlist_length` - The playlist length. If there are not enough - /// songs in the library, it will be truncated to the size of the library. - /// * `distance` - a user-supplied valid distance metric, either taken - /// from the [distance](distance) module, or made from scratch. - /// - /// # Returns - /// - /// A vector of `playlist_length` Songs, including `first_song`, that you - /// most likely want to plug in your audio player by using something like - /// `ret.map(|song| song.path.to_owned()).collect::>()`. - /// - /// # Custom distance example - /// - /// ``` - /// use ndarray::Array1; - /// - /// fn manhattan_distance(a: &Array1, b: &Array1) -> f32 { - /// (a - b).mapv(|x| x.abs()).sum() - /// } - /// ``` - fn playlist_from_song_custom_distance( - &self, - first_song: Song, - playlist_length: usize, - distance: impl DistanceMetric, - ) -> BlissResult> { - let playlist = self.playlist_from_song_custom( - first_song, - playlist_length, - distance, - closest_to_first_song, - )?; - - debug!( - "Playlist created: {}", - playlist - .iter() - .map(|s| format!("{:?}", &s)) - .collect::>() - .join("\n"), - ); - Ok(playlist) - } - - /// Return a playlist of songs, starting with `first_song`, sorted using - /// the custom `sort` function, and the custom `distance` metric. - /// - /// # Arguments - /// - /// * `first_song` - The song the playlist will be built from. - /// * `playlist_length` - The playlist length. If there are not enough - /// songs in the library, it will be truncated to the size of the library. - /// * `distance` - a user-supplied valid distance metric, either taken - /// from the [distance](distance) module, or made from scratch. - /// * `sort` - a user-supplied sorting function that uses the `distance` - /// metric, either taken from the [distance module](distance), or made - /// from scratch. - /// - /// # Returns - /// - /// A vector of `playlist_length` Songs, including `first_song`, that you - /// most likely want to plug in your audio player by using something like - /// `ret.map(|song| song.path.to_owned()).collect::>()`. - fn playlist_from_song_custom( - &self, - first_song: Song, - playlist_length: usize, - distance: G, - mut sort: F, - ) -> BlissResult> - where - F: FnMut(&Song, &mut Vec, G), - G: DistanceMetric, - { - let mut songs = self.get_stored_songs()?; - sort(&first_song, &mut songs, distance); - Ok(songs - .into_iter() - .take(playlist_length) - .collect::>()) - } - - /// Analyze and store songs in `paths`, using `store_song` and - /// `store_error_song` implementations. - /// - /// note: this is mostly useful for updating a song library. for the first - /// run, you probably want to use `analyze_library`. - fn analyze_paths(&mut self, paths: Vec) -> BlissResult<()> { - if paths.is_empty() { - return Ok(()); - } - let num_cpus = num_cpus::get(); - - #[allow(clippy::type_complexity)] - let (tx, rx): ( - Sender<(String, BlissResult)>, - Receiver<(String, BlissResult)>, - ) = mpsc::channel(); - let mut handles = Vec::new(); - let mut chunk_length = paths.len() / num_cpus; - if chunk_length == 0 { - chunk_length = paths.len(); - } - - for chunk in paths.chunks(chunk_length) { - let tx_thread = tx.clone(); - let owned_chunk = chunk.to_owned(); - let child = thread::spawn(move || { - for path in owned_chunk { - info!("Analyzing file '{}'", path); - let song = Song::from_path(&path); - tx_thread.send((path.to_string(), song)).unwrap(); - } - drop(tx_thread); - }); - handles.push(child); - } - drop(tx); - - for (path, song) in rx.iter() { - // A storage fail should just warn the user, but not abort the whole process - match song { - Ok(song) => { - self.store_song(&song).unwrap_or_else(|e| { - error!("Error while storing song '{}': {}", song.path.display(), e) - }); - info!( - "Analyzed and stored song '{}' successfully.", - song.path.display() - ) - } - Err(e) => { - self.store_error_song(path.to_string(), e.to_owned()) - .unwrap_or_else(|e| { - error!("Error while storing errored song '{}': {}", path, e) - }); - error!( - "Analysis of song '{}': {} failed. Error has been stored.", - path, e - ) - } - } - } - - for child in handles { - child - .join() - .map_err(|_| BlissError::AnalysisError("in analysis".to_string()))?; - } - Ok(()) - } - - /// Analyzes a song library, using `get_songs_paths`, `store_song` and - /// `store_error_song` implementations. - fn analyze_library(&mut self) -> BlissResult<()> { - let paths = self - .get_songs_paths() - .map_err(|e| BlissError::ProviderError(e.to_string()))?; - self.analyze_paths(paths)?; - Ok(()) - } - - /// Analyze an entire library using `get_songs_paths`, but instead of - /// storing songs using [store_song](Library::store_song) - /// and [store_error_song](Library::store_error_song). - /// - /// Returns an iterable [Receiver], whose items are a tuple made of - /// the song path (to display to the user in case the analysis failed), - /// and a Result. - fn analyze_library_streaming(&mut self) -> BlissResult)>> { - let paths = self - .get_songs_paths() - .map_err(|e| BlissError::ProviderError(e.to_string()))?; - analyze_paths_streaming(paths) - } -} - -/// Analyze songs in `paths`, and return the analyzed [Song] objects through a -/// [Receiver]. -/// -/// Returns an iterable [Receiver], whose items are a tuple made of -/// the song path (to display to the user in case the analysis failed), -/// and a Result. -/// -/// Note: this is mostly useful for updating a song library, while displaying -/// status to the user (since you have access to each song object). For the -/// first run, you probably want to use `analyze_library`. -/// -/// * Example: -/// ```no_run -/// use bliss_audio::{library::analyze_paths_streaming, BlissResult}; -/// -/// fn main() -> BlissResult<()> { -/// let paths = vec![String::from("/path/to/song1"), String::from("/path/to/song2")]; -/// let rx = analyze_paths_streaming(paths)?; -/// for (path, result) in rx.iter() { -/// match result { -/// Ok(song) => println!("Do something with analyzed song {} with title {:?}", song.path.display(), song.title), -/// Err(e) => println!("Song at {} could not be analyzed. Failed with: {}", path, e), -/// } -/// } -/// Ok(()) -/// } -/// ``` -pub fn analyze_paths_streaming( - paths: Vec, -) -> BlissResult)>> { - let num_cpus = num_cpus::get(); - - #[allow(clippy::type_complexity)] - let (tx, rx): ( - Sender<(String, BlissResult)>, - Receiver<(String, BlissResult)>, - ) = mpsc::channel(); - if paths.is_empty() { - return Ok(rx); - } - let mut handles = Vec::new(); - let mut chunk_length = paths.len() / num_cpus; - if chunk_length == 0 { - chunk_length = paths.len(); - } - - for chunk in paths.chunks(chunk_length) { - let tx_thread = tx.clone(); - let owned_chunk = chunk.to_owned(); - let child = thread::spawn(move || { - for path in owned_chunk { - info!("Analyzing file '{}'", path); - let song = Song::from_path(&path); - tx_thread.send((path.to_string(), song)).unwrap(); - } - }); - handles.push(child); - } - - Ok(rx) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::song::Analysis; - use ndarray::Array1; - use std::path::Path; - - #[derive(Default)] - struct TestLibrary { - internal_storage: Vec, - failed_files: Vec<(String, String)>, - } - - impl Library for TestLibrary { - fn get_songs_paths(&self) -> BlissResult> { - Ok(vec![ - String::from("./data/white_noise.flac"), - String::from("./data/s16_mono_22_5kHz.flac"), - String::from("not-existing.foo"), - String::from("definitely-not-existing.foo"), - ]) - } - - fn store_song(&mut self, song: &Song) -> BlissResult<()> { - self.internal_storage.push(song.to_owned()); - Ok(()) - } - - fn store_error_song(&mut self, song_path: String, error: BlissError) -> BlissResult<()> { - self.failed_files.push((song_path, error.to_string())); - Ok(()) - } - - fn get_stored_songs(&self) -> BlissResult> { - Ok(self.internal_storage.to_owned()) - } - } - - #[derive(Default)] - struct FailingLibrary; - - impl Library for FailingLibrary { - fn get_songs_paths(&self) -> BlissResult> { - Err(BlissError::ProviderError(String::from( - "Could not get songs path", - ))) - } - - fn store_song(&mut self, _: &Song) -> BlissResult<()> { - Ok(()) - } - - fn get_stored_songs(&self) -> BlissResult> { - Err(BlissError::ProviderError(String::from( - "Could not get stored songs", - ))) - } - - fn store_error_song(&mut self, _: String, _: BlissError) -> BlissResult<()> { - Ok(()) - } - } - - #[derive(Default)] - struct FailingStorage; - - impl Library for FailingStorage { - fn get_songs_paths(&self) -> BlissResult> { - Ok(vec![ - String::from("./data/white_noise.flac"), - String::from("./data/s16_mono_22_5kHz.flac"), - String::from("not-existing.foo"), - String::from("definitely-not-existing.foo"), - ]) - } - - fn store_song(&mut self, song: &Song) -> BlissResult<()> { - Err(BlissError::ProviderError(format!( - "Could not store song {}", - song.path.display() - ))) - } - - fn get_stored_songs(&self) -> BlissResult> { - Ok(vec![]) - } - - fn store_error_song(&mut self, song_path: String, error: BlissError) -> BlissResult<()> { - Err(BlissError::ProviderError(format!( - "Could not store errored song: {}, with error: {}", - song_path, error - ))) - } - } - - #[test] - fn test_analyze_library_fail() { - let mut test_library = FailingLibrary {}; - assert_eq!( - test_library.analyze_library(), - Err(BlissError::ProviderError(String::from( - "error happened with the music library provider - Could not get songs path" - ))), - ); - } - - #[test] - fn test_playlist_from_song_fail() { - let test_library = FailingLibrary {}; - let song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - ..Default::default() - }; - - assert_eq!( - test_library.playlist_from_song(song, 10), - Err(BlissError::ProviderError(String::from( - "Could not get stored songs" - ))), - ); - } - - #[test] - fn test_analyze_library_fail_storage() { - let mut test_library = FailingStorage {}; - - // A storage fail should just warn the user, but not abort the whole process - assert!(test_library.analyze_library().is_ok()) - } - - #[test] - fn test_analyze_library_streaming() { - let mut test_library = TestLibrary { - internal_storage: vec![], - failed_files: vec![], - }; - let rx = test_library.analyze_library_streaming().unwrap(); - - let mut result = rx.iter().collect::)>>(); - result.sort_by_key(|k| k.0.to_owned()); - let expected = result - .iter() - .map(|x| match &x.1 { - Ok(s) => (true, s.path.to_string_lossy().to_string()), - Err(_) => (false, x.0.to_owned()), - }) - .collect::>(); - assert_eq!( - vec![ - (true, String::from("./data/s16_mono_22_5kHz.flac")), - (true, String::from("./data/white_noise.flac")), - (false, String::from("definitely-not-existing.foo")), - (false, String::from("not-existing.foo")), - ], - expected, - ); - } - - #[test] - fn test_analyze_library() { - let mut test_library = TestLibrary { - internal_storage: vec![], - failed_files: vec![], - }; - test_library.analyze_library().unwrap(); - - let mut failed_files = test_library - .failed_files - .iter() - .map(|x| x.0.to_owned()) - .collect::>(); - failed_files.sort(); - - assert_eq!( - failed_files, - vec![ - String::from("definitely-not-existing.foo"), - String::from("not-existing.foo"), - ], - ); - - let mut songs = test_library - .internal_storage - .iter() - .map(|x| x.path.to_str().unwrap().to_string()) - .collect::>(); - songs.sort(); - - assert_eq!( - songs, - vec![ - String::from("./data/s16_mono_22_5kHz.flac"), - String::from("./data/white_noise.flac"), - ], - ); - } - - #[test] - fn test_playlist_from_album() { - let mut test_library = TestLibrary::default(); - let first_song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - album: Some(String::from("Album")), - track_number: Some(String::from("01")), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.1; 20]), - album: Some(String::from("Another Album")), - track_number: Some(String::from("10")), - ..Default::default() - }; - - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([10.; 20]), - album: Some(String::from("Album")), - track_number: Some(String::from("02")), - ..Default::default() - }; - - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([20.; 20]), - album: Some(String::from("Another Album")), - track_number: Some(String::from("01")), - ..Default::default() - }; - let fifth_song = Song { - path: Path::new("path-to-fifth").to_path_buf(), - analysis: Analysis::new([20.; 20]), - album: None, - ..Default::default() - }; - - test_library.internal_storage = vec![ - first_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - second_song.to_owned(), - fifth_song.to_owned(), - ]; - assert_eq!( - vec![first_song, third_song, fourth_song, second_song], - test_library.playlist_from_songs_album("Album", 3).unwrap() - ); - } - - #[test] - fn test_playlist_from_song() { - let mut test_library = TestLibrary::default(); - let first_song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.1; 20]), - ..Default::default() - }; - - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([10.; 20]), - ..Default::default() - }; - - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([20.; 20]), - ..Default::default() - }; - - test_library.internal_storage = vec![ - first_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - second_song.to_owned(), - ]; - assert_eq!( - vec![first_song.to_owned(), second_song, third_song], - test_library.playlist_from_song(first_song, 3).unwrap() - ); - } - - #[test] - fn test_playlist_from_song_too_little_songs() { - let mut test_library = TestLibrary::default(); - let first_song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.1; 20]), - ..Default::default() - }; - - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([10.; 20]), - ..Default::default() - }; - - test_library.internal_storage = vec![ - first_song.to_owned(), - second_song.to_owned(), - third_song.to_owned(), - ]; - assert_eq!( - vec![first_song.to_owned(), second_song, third_song], - test_library.playlist_from_song(first_song, 200).unwrap() - ); - } - - #[test] - fn test_analyze_empty_path() { - let mut test_library = TestLibrary::default(); - assert!(test_library.analyze_paths(vec![]).is_ok()); - } - - fn custom_distance(a: &Array1, b: &Array1) -> f32 { - if a == b { - return 0.; - } - 1. / (a.first().unwrap() - b.first().unwrap()).abs() - } - - #[test] - fn test_playlist_from_song_custom_distance() { - let mut test_library = TestLibrary::default(); - let first_song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.1; 20]), - ..Default::default() - }; - - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([10.; 20]), - ..Default::default() - }; - - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([20.; 20]), - ..Default::default() - }; - - test_library.internal_storage = vec![ - first_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - second_song.to_owned(), - ]; - assert_eq!( - vec![first_song.to_owned(), fourth_song, third_song], - test_library - .playlist_from_song_custom_distance(first_song, 3, custom_distance) - .unwrap() - ); - } - - fn custom_sort(_: &Song, songs: &mut Vec, _: impl DistanceMetric) { - songs.sort_by_key(|song| song.path.to_owned()); - } - - #[test] - fn test_playlist_from_song_custom() { - let mut test_library = TestLibrary::default(); - let first_song = Song { - path: Path::new("path-to-first").to_path_buf(), - analysis: Analysis::new([0.; 20]), - ..Default::default() - }; - - let second_song = Song { - path: Path::new("path-to-second").to_path_buf(), - analysis: Analysis::new([0.1; 20]), - ..Default::default() - }; - - let third_song = Song { - path: Path::new("path-to-third").to_path_buf(), - analysis: Analysis::new([10.; 20]), - ..Default::default() - }; - - let fourth_song = Song { - path: Path::new("path-to-fourth").to_path_buf(), - analysis: Analysis::new([20.; 20]), - ..Default::default() - }; - - test_library.internal_storage = vec![ - first_song.to_owned(), - fourth_song.to_owned(), - third_song.to_owned(), - second_song.to_owned(), - ]; - assert_eq!( - vec![first_song.to_owned(), fourth_song, second_song], - test_library - .playlist_from_song_custom(first_song, 3, custom_distance, custom_sort) - .unwrap() - ); - } -} diff --git a/src/distance.rs b/src/playlist.rs similarity index 69% rename from src/distance.rs rename to src/playlist.rs index 6e42cc3..3b943fc 100644 --- a/src/distance.rs +++ b/src/playlist.rs @@ -1,19 +1,17 @@ -//! Module containing various distance metric functions. +//! Module containing various functions to build playlists, as well as various +//! distance metrics. //! -//! All of these functions are intended to be used with the +//! All of the distance functions are intended to be used with the //! [custom_distance](Song::custom_distance) method, or with -//! [playlist_from_songs_custom_distance](Library::playlist_from_song_custom_distance). //! //! They will yield different styles of playlists, so don't hesitate to //! experiment with them if the default (euclidean distance for now) doesn't //! suit you. -#[cfg(doc)] -use crate::Library; -use crate::Song; -use crate::NUMBER_FEATURES; -use ndarray::{Array, Array1}; +use crate::{BlissError, BlissResult, Song, NUMBER_FEATURES}; +use ndarray::{Array, Array1, Array2, Axis}; use ndarray_stats::QuantileExt; use noisy_float::prelude::*; +use std::collections::HashMap; /// Convenience trait for user-defined distance metrics. pub trait DistanceMetric: Fn(&Array1, &Array1) -> f32 {} @@ -117,6 +115,92 @@ pub fn dedup_playlist_custom_distance( }); } +/// Return a list of albums in a `pool` of songs that are similar to +/// songs in `group`, discarding songs that don't belong to an album. +/// It basically makes an "album" playlist from the `pool` of songs. +/// +/// Songs from `group` would usually just be songs from an album, but not +/// necessarily - they are discarded from `pool` no matter what. +/// +/// # Arguments +/// +/// * `group` - A small group of songs, e.g. an album. +/// * `pool` - A pool of songs to find similar songs in, e.g. a user's song +/// library. +/// +/// # Returns +/// +/// A vector of songs, including `group` at the beginning, that you +/// most likely want to plug in your audio player by using something like +/// `ret.map(|song| song.path.to_owned()).collect::>()`. +pub fn closest_album_to_group(group: Vec, pool: Vec) -> BlissResult> { + let mut albums_analysis: HashMap<&str, Array2> = HashMap::new(); + let mut albums = Vec::new(); + + // Remove songs from the group from the pool. + let pool = pool + .into_iter() + .filter(|s| !group.contains(s)) + .collect::>(); + for song in &pool { + if let Some(album) = &song.album { + if let Some(analysis) = albums_analysis.get_mut(album as &str) { + analysis + .push_row(song.analysis.as_arr1().view()) + .map_err(|e| { + BlissError::ProviderError(format!("while computing distances: {}", e)) + })?; + } else { + let mut array = Array::zeros((1, song.analysis.as_arr1().len())); + array.assign(&song.analysis.as_arr1()); + albums_analysis.insert(album, array); + } + } + } + let mut group_analysis = Array::zeros((group.len(), NUMBER_FEATURES)); + for (song, mut column) in group.iter().zip(group_analysis.axis_iter_mut(Axis(0))) { + column.assign(&song.analysis.as_arr1()); + } + let first_analysis = group_analysis + .mean_axis(Axis(0)) + .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; + for (album, analysis) in albums_analysis.iter() { + let mean_analysis = analysis + .mean_axis(Axis(0)) + .ok_or_else(|| BlissError::ProviderError(String::from("Mean of empty slice")))?; + let album = album.to_owned(); + albums.push((album, mean_analysis.to_owned())); + } + + albums.sort_by_key(|(_, analysis)| n32(euclidean_distance(&first_analysis, analysis))); + let mut playlist = group; + for (album, _) in albums { + let mut al = pool + .iter() + .filter(|s| s.album.is_some() && s.album.as_ref().unwrap() == &album.to_string()) + .map(|s| s.to_owned()) + .collect::>(); + al.sort_by(|s1, s2| { + let track_number1 = s1 + .track_number + .to_owned() + .unwrap_or_else(|| String::from("")); + let track_number2 = s2 + .track_number + .to_owned() + .unwrap_or_else(|| String::from("")); + if let Ok(x) = track_number1.parse::() { + if let Ok(y) = track_number2.parse::() { + return x.cmp(&y); + } + } + s1.track_number.cmp(&s2.track_number) + }); + playlist.extend_from_slice(&al); + } + Ok(playlist) +} + #[cfg(test)] mod test { use super::*; @@ -227,7 +311,7 @@ mod test { vec![ first_song.to_owned(), second_song.to_owned(), - fourth_song.to_owned() + fourth_song.to_owned(), ] ); } @@ -389,4 +473,68 @@ mod test { assert_eq!(cosine_distance(&a, &b), 0.); assert_eq!(cosine_distance(&a, &b), 0.); } + + #[test] + fn test_closest_to_group() { + let first_song = Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([0.; 20]), + album: Some(String::from("Album")), + artist: Some(String::from("Artist")), + track_number: Some(String::from("01")), + ..Default::default() + }; + + let second_song = Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([0.1; 20]), + album: Some(String::from("Another Album")), + artist: Some(String::from("Artist")), + track_number: Some(String::from("10")), + ..Default::default() + }; + + let third_song = Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([10.; 20]), + album: Some(String::from("Album")), + artist: Some(String::from("Another Artist")), + track_number: Some(String::from("02")), + ..Default::default() + }; + + let fourth_song = Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([20.; 20]), + album: Some(String::from("Another Album")), + artist: Some(String::from("Another Artist")), + track_number: Some(String::from("01")), + ..Default::default() + }; + let fifth_song = Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([40.; 20]), + artist: Some(String::from("Third Artist")), + album: None, + ..Default::default() + }; + + let pool = vec![ + first_song.to_owned(), + fourth_song.to_owned(), + third_song.to_owned(), + second_song.to_owned(), + fifth_song.to_owned(), + ]; + let group = vec![first_song.to_owned(), third_song.to_owned()]; + assert_eq!( + vec![ + first_song.to_owned(), + third_song.to_owned(), + fourth_song.to_owned(), + second_song.to_owned() + ], + closest_album_to_group(group, pool.to_owned()).unwrap(), + ); + } } diff --git a/src/song.rs b/src/song.rs index b4efe9b..c032fd8 100644 --- a/src/song.rs +++ b/src/song.rs @@ -13,8 +13,10 @@ extern crate ndarray; extern crate ndarray_npy; use crate::chroma::ChromaDesc; -use crate::distance::{euclidean_distance, DistanceMetric}; use crate::misc::LoudnessDesc; +#[cfg(doc)] +use crate::playlist; +use crate::playlist::{closest_to_first_song, dedup_playlist, euclidean_distance, DistanceMetric}; use crate::temporal::BPMDesc; use crate::timbral::{SpectralDesc, ZeroCrossingRateDesc}; use crate::{BlissError, BlissResult, SAMPLE_RATE}; @@ -228,6 +230,44 @@ impl Song { self.analysis.custom_distance(&other.analysis, distance) } + /// Orders songs in `pool` by proximity to `self`, using the distance + /// metric `distance` to compute the order. + /// Basically return a playlist from songs in `pool`, starting + /// from `self`, using `distance` (some distance metrics can + /// be found in the [playlist] module). + /// + /// Note that contrary to [Song::closest_from_pool], `self` is NOT added + /// to the beginning of the returned vector. + /// + /// No deduplication is ran either; if you're looking for something easy + /// that works "out of the box", use [Song::closest_from_pool]. + pub fn closest_from_pool_custom( + &self, + pool: Vec, + distance: impl DistanceMetric, + ) -> Vec { + let mut pool = pool; + closest_to_first_song(self, &mut pool, distance); + pool + } + + /// Order songs in `pool` by proximity to `self`. + /// Convenience method to return a playlist from songs in `pool`, + /// starting from `self`. + /// + /// The distance is already chosen, deduplication is ran, and the first song + /// is added to the top of the playlist, to make everything easier. + /// + /// If you want more control over which distance metric is chosen, + /// run deduplication manually, etc, use [Song::closest_from_pool_custom]. + pub fn closest_from_pool(&self, pool: Vec) -> Vec { + let mut playlist = vec![self.to_owned()]; + playlist.extend_from_slice(&pool); + closest_to_first_song(self, &mut playlist, euclidean_distance); + dedup_playlist(&mut playlist, None); + playlist + } + /// Returns a decoded [Song] given a file path, or an error if the song /// could not be analyzed for some reason. /// @@ -848,6 +888,7 @@ mod tests { fn dummy_distance(_: &Array1, _: &Array1) -> f32 { 0. } + #[test] fn test_custom_distance() { let mut a = Song::default(); @@ -865,6 +906,84 @@ mod tests { ]); assert_eq!(a.custom_distance(&b, dummy_distance), 0.); } + + #[test] + fn test_closest_from_pool() { + let song = Song { + path: Path::new("path-to-first").to_path_buf(), + analysis: Analysis::new([ + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ]), + ..Default::default() + }; + let first_song_dupe = Song { + path: Path::new("path-to-dupe").to_path_buf(), + analysis: Analysis::new([ + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ]), + ..Default::default() + }; + + let second_song = Song { + path: Path::new("path-to-second").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1.9, 1., 1., 1., + ]), + ..Default::default() + }; + let third_song = Song { + path: Path::new("path-to-third").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.5, 1., 1., 1., + ]), + ..Default::default() + }; + let fourth_song = Song { + path: Path::new("path-to-fourth").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., + ]), + ..Default::default() + }; + let fifth_song = Song { + path: Path::new("path-to-fifth").to_path_buf(), + analysis: Analysis::new([ + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1., 1., + ]), + ..Default::default() + }; + + let songs = vec![ + song.to_owned(), + first_song_dupe.to_owned(), + second_song.to_owned(), + third_song.to_owned(), + fourth_song.to_owned(), + fifth_song.to_owned(), + ]; + let playlist = song.closest_from_pool(songs.to_owned()); + assert_eq!( + playlist, + vec![ + song.to_owned(), + second_song.to_owned(), + fourth_song.to_owned(), + third_song.to_owned(), + ], + ); + let playlist = song.closest_from_pool_custom(songs, euclidean_distance); + assert_eq!( + playlist, + vec![ + song, + first_song_dupe, + second_song, + fourth_song, + fifth_song, + third_song + ], + ); + } } #[cfg(all(feature = "bench", test))]