From e4aec77f9ac05680d8efbdd9374145f5beca13df Mon Sep 17 00:00:00 2001 From: "NGnius (Graham)" Date: Sat, 18 Jun 2022 21:46:33 -0400 Subject: [PATCH] Add negation to like filter and improve string sanitisation (again) --- Cargo.lock | 7 ++ mps-interpreter/Cargo.toml | 3 +- .../vocabulary/filters/field_like_filter.rs | 36 +++++- mps-interpreter/src/lib.rs | 2 + .../src/processing/music_analysis.rs | 112 +++++++++++++++--- mps-interpreter/tests/single_line.rs | 5 + src/help.rs | 1 + 7 files changed, 143 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2e54be..bd473a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1245,6 +1245,7 @@ dependencies = [ "rusqlite", "shellexpand", "symphonia 0.5.0", + "unidecode", ] [[package]] @@ -2607,6 +2608,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +[[package]] +name = "unidecode" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc" + [[package]] name = "utf8-ranges" version = "0.1.3" diff --git a/mps-interpreter/Cargo.toml b/mps-interpreter/Cargo.toml index 02df3b3..abaaec1 100644 --- a/mps-interpreter/Cargo.toml +++ b/mps-interpreter/Cargo.toml @@ -17,6 +17,7 @@ rand = { version = "0.8" } shellexpand = { version = "2", optional = true } bliss-audio-symphonia = { version = "0.4", optional = true, path = "../bliss-rs" } mpd = { version = "0.0.12", optional = true } +unidecode = { version = "0.3.0", optional = true } [dev-dependencies] criterion = "0.3" @@ -28,5 +29,5 @@ harness = false [features] default = [ "music_library", "ergonomics", "advanced" ] music_library = [ "symphonia", "mpd" ] # song metadata parsing and database auto-population -ergonomics = ["shellexpand"] # niceties like ~ in pathes +ergonomics = ["shellexpand", "unidecode"] # niceties like ~ in paths and unicode string sanitisation advanced = ["bliss-audio-symphonia"] # advanced language features like bliss playlist generation diff --git a/mps-interpreter/src/lang/vocabulary/filters/field_like_filter.rs b/mps-interpreter/src/lang/vocabulary/filters/field_like_filter.rs index a9639de..a140f65 100644 --- a/mps-interpreter/src/lang/vocabulary/filters/field_like_filter.rs +++ b/mps-interpreter/src/lang/vocabulary/filters/field_like_filter.rs @@ -2,7 +2,7 @@ use std::collections::VecDeque; use std::fmt::{Debug, Display, Error, Formatter}; use super::field_filter::{FieldFilterErrorHandling, VariableOrValue}; -use crate::lang::utility::{assert_name, assert_token, assert_token_raw, check_name}; +use crate::lang::utility::{assert_token, assert_token_raw, check_name}; use crate::lang::MpsLanguageDictionary; use crate::lang::MpsTypePrimitive; use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory}; @@ -17,11 +17,14 @@ pub struct FieldLikeFilter { field_name: String, field_errors: FieldFilterErrorHandling, val: VariableOrValue, + negate: bool, } impl FieldLikeFilter { fn sanitise_string(s: &str) -> String { - s.replace(|c: char| c.is_whitespace() || c == '_' || c == '-', " ") + #[cfg(feature = "unidecode")] + let s = unidecode::unidecode(s); + s.replace(|c: char| c.is_whitespace() || c == '_' || c == '-', "") .replace(|c: char| !(c.is_whitespace() || c.is_alphanumeric()), "") .to_lowercase() } @@ -54,7 +57,12 @@ impl MpsFilterPredicate for FieldLikeFilter { if let Some(field) = music_item_lut.field(&self.field_name) { let field_str = Self::sanitise_string(&field.as_str()); let var_str = Self::sanitise_string(variable); - Ok(field_str.contains(&var_str)) + let matches = field_str.contains(&var_str); + if self.negate { + Ok(!matches) + } else { + Ok(matches) + } } else { match self.field_errors { FieldFilterErrorHandling::Error => Err(RuntimeMsg(format!( @@ -83,11 +91,11 @@ impl MpsFilterFactory for FieldLikeFilterFactory { let tokens_len = tokens.len(); (tokens_len >= 2 // field like variable && tokens[0].is_name() - && check_name("like", tokens[1])) + && (check_name("like", tokens[1]) || check_name("unlike", tokens[1]))) || (tokens_len >= 3 // field? like variable OR field! like variable && tokens[0].is_name() && (tokens[1].is_interrogation() || tokens[1].is_exclamation()) - && check_name("like", tokens[2])) + && (check_name("like", tokens[2]) || check_name("unlike", tokens[2]))) } fn build_filter( @@ -112,7 +120,21 @@ impl MpsFilterFactory for FieldLikeFilterFactory { } else { FieldFilterErrorHandling::Error }; - assert_name("like", tokens)?; + let name = assert_token( + |t| match t { + MpsToken::Name(s) => { + match &s as _ { + "unlike" | "like" => Some(s), + _ => None, + } + }, + _ => None + }, + MpsToken::Literal("like|unlike".into()), + tokens, + )?; + let is_negated = name == "unlike"; + //assert_name("like", tokens)?; if tokens[0].is_literal() { let literal = assert_token( |t| match t { @@ -128,6 +150,7 @@ impl MpsFilterFactory for FieldLikeFilterFactory { field_name: field, field_errors: error_handling, val: value, + negate: is_negated, }) } else { let variable = VariableOrValue::Variable(assert_token( @@ -143,6 +166,7 @@ impl MpsFilterFactory for FieldLikeFilterFactory { field_name: field, field_errors: FieldFilterErrorHandling::Error, val: variable, + negate: is_negated, }) } } diff --git a/mps-interpreter/src/lib.rs b/mps-interpreter/src/lib.rs index ae561e7..ffe0c3e 100644 --- a/mps-interpreter/src/lib.rs +++ b/mps-interpreter/src/lib.rs @@ -45,6 +45,8 @@ //! //! ### field like something //! +//! ### field unlike something +//! //! ### field matches some_regex //! //! ### field != something diff --git a/mps-interpreter/src/processing/music_analysis.rs b/mps-interpreter/src/processing/music_analysis.rs index 278490c..f0393d8 100644 --- a/mps-interpreter/src/processing/music_analysis.rs +++ b/mps-interpreter/src/processing/music_analysis.rs @@ -78,7 +78,18 @@ impl MpsDefaultAnalyzer { fn get_path(item: &MpsItem) -> Result<&str, RuntimeMsg> { if let Some(path) = item.field(PATH_FIELD) { if let MpsTypePrimitive::String(path) = path { - Ok(path) + if path.starts_with("file://") { + //println!("path guess: `{}`", path.get(7..).unwrap()); + Ok(path.get(7..).unwrap()) + } else if !path.contains("://") { + Ok(path) + } else { + Err(RuntimeMsg(format!( + "Field {} on item is not a supported URI, it's {}", + PATH_FIELD, path + ))) + } + } else { Err(RuntimeMsg(format!( "Field {} on item is not String, it's {}", @@ -116,17 +127,25 @@ impl MpsMusicAnalyzer for MpsDefaultAnalyzer { let path_from = Self::get_path(from)?; let path_to = Self::get_path(to)?; for response in self.responses.iter() { - if let ResponseType::Distance { - path1, - path2, - distance, - } = response - { - if path1 == path_from && path2 == path_to { - return match distance { - Ok(d) => Ok(d as f64), - Err(e) => Err(RuntimeMsg(format!("Bliss error: {}", e))), - }; + match response { + ResponseType::Distance { + path1, + path2, + distance, + } => { + //println!("Got distance from `{}` to `{}`: {}", path1, path2, distance.as_ref().ok().unwrap_or(&f32::INFINITY)); + if path1 == path_from && path2 == path_to { + return match distance { + Ok(d) => Ok(d as f64), + Err(e) => Err(RuntimeMsg(format!("Bliss error: {}", e))), + }; + } + } + ResponseType::Song { .. } => {}, + ResponseType::UnsupportedSong { path, msg } => { + if path == path_to || path == path_from { + return Err(RuntimeMsg(format!("Bliss error: {}", msg))); + } } } } @@ -191,6 +210,10 @@ enum ResponseType { path: String, song: Result, }, + UnsupportedSong { + path: String, + msg: String, + } } #[cfg(feature = "bliss-audio-symphonia")] @@ -225,10 +248,11 @@ impl CacheThread { distance, } => { self.insert_distance(path1, path2, distance); - } + }, ResponseType::Song { path, song } => { self.insert_song(path, song); - } + }, + ResponseType::UnsupportedSong { .. } => {}, } } } @@ -285,6 +309,12 @@ impl CacheThread { } else { self.insert_song(path2, song); } + }, + ResponseType::UnsupportedSong {path: unsupported_path, ..} => { + self.song_in_progress.remove(&unsupported_path); + if path == unsupported_path { + return None; + } } } } @@ -358,12 +388,18 @@ impl CacheThread { distance, } => { self.insert_distance(path1, path2, distance); - } + }, ResponseType::Song { path: path2, song } => { self.insert_song(path2.clone(), song.clone()); if self.song_in_progress.len() <= available_parallelism { break 'inner4; } + }, + ResponseType::UnsupportedSong {path: unsupported_path, ..} => { + self.song_in_progress.remove(&unsupported_path); + if self.song_in_progress.len() <= available_parallelism { + break 'inner4; + } } } } @@ -409,6 +445,18 @@ impl CacheThread { } ResponseType::Song { path, song } => { self.insert_song(path, song); + }, + ResponseType::UnsupportedSong { path: unsupported_path, msg } => { + self.song_in_progress.remove(&unsupported_path); + if let Err(_) = self.responses.send(ResponseType::UnsupportedSong { + path: unsupported_path.clone(), + msg: msg + }) { + return true; + } + if unsupported_path == key.0 || unsupported_path == key.1 { + break 'inner1; + } } } } @@ -424,6 +472,20 @@ impl CacheThread { worker_tx: &Sender, worker_results: &Receiver, ) -> bool { + let path = if path.starts_with("file://") { + //println!("path guess: `{}`", path.get(7..).unwrap()); + path.get(7..).unwrap().to_owned() + } else if !path.contains("://") { + path + } else { + if let Err(_) = self.responses.send(ResponseType::UnsupportedSong { + msg: format!("Song path is not a supported URI, it's `{}`", path), + path: path, + }) { + return true; + } + return false; + }; if let Some(song) = self.song_cache.get(&path) { if ack { let song = song.to_owned(); @@ -460,6 +522,12 @@ impl CacheThread { if self.song_in_progress.len() <= available_parallelism { break 'inner2; } + }, + ResponseType::UnsupportedSong { path, .. } => { + self.song_in_progress.remove(&path); + if self.song_in_progress.len() <= available_parallelism { + break 'inner2; + } } } } @@ -493,7 +561,19 @@ impl CacheThread { path: path, song: song, }) { - return false; + return true; + } + break 'inner3; + } + } + ResponseType::UnsupportedSong { path: unsupported_path, msg } => { + self.song_in_progress.remove(&unsupported_path); + if unsupported_path == path { + if let Err(_) = self.responses.send(ResponseType::UnsupportedSong { + path: unsupported_path, + msg: msg + }) { + return true; } break 'inner3; } diff --git a/mps-interpreter/tests/single_line.rs b/mps-interpreter/tests/single_line.rs index 8dedc2a..468cbb6 100644 --- a/mps-interpreter/tests/single_line.rs +++ b/mps-interpreter/tests/single_line.rs @@ -350,6 +350,11 @@ fn execute_likefilter_line() -> Result<(), MpsError> { "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(album like `24K Magic`)", false, true, + )?; + execute_single_line( + "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(album unlike `24K Magic`)", + true, + true, ) } diff --git a/src/help.rs b/src/help.rs index 2576e74..d06b739 100644 --- a/src/help.rs +++ b/src/help.rs @@ -61,6 +61,7 @@ Operations to reduce the items in an iterable: iterable.(filter) field == something field like something + field unlike something field matches some_regex field != something field >= something