Add negation to like filter and improve string sanitisation (again)

This commit is contained in:
NGnius (Graham) 2022-06-18 21:46:33 -04:00
parent e4535399f9
commit e4aec77f9a
7 changed files with 143 additions and 23 deletions

7
Cargo.lock generated
View file

@ -1245,6 +1245,7 @@ dependencies = [
"rusqlite", "rusqlite",
"shellexpand", "shellexpand",
"symphonia 0.5.0", "symphonia 0.5.0",
"unidecode",
] ]
[[package]] [[package]]
@ -2607,6 +2608,12 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
[[package]]
name = "unidecode"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc"
[[package]] [[package]]
name = "utf8-ranges" name = "utf8-ranges"
version = "0.1.3" version = "0.1.3"

View file

@ -17,6 +17,7 @@ rand = { version = "0.8" }
shellexpand = { version = "2", optional = true } shellexpand = { version = "2", optional = true }
bliss-audio-symphonia = { version = "0.4", optional = true, path = "../bliss-rs" } bliss-audio-symphonia = { version = "0.4", optional = true, path = "../bliss-rs" }
mpd = { version = "0.0.12", optional = true } mpd = { version = "0.0.12", optional = true }
unidecode = { version = "0.3.0", optional = true }
[dev-dependencies] [dev-dependencies]
criterion = "0.3" criterion = "0.3"
@ -28,5 +29,5 @@ harness = false
[features] [features]
default = [ "music_library", "ergonomics", "advanced" ] default = [ "music_library", "ergonomics", "advanced" ]
music_library = [ "symphonia", "mpd" ] # song metadata parsing and database auto-population music_library = [ "symphonia", "mpd" ] # song metadata parsing and database auto-population
ergonomics = ["shellexpand"] # niceties like ~ in pathes ergonomics = ["shellexpand", "unidecode"] # niceties like ~ in paths and unicode string sanitisation
advanced = ["bliss-audio-symphonia"] # advanced language features like bliss playlist generation advanced = ["bliss-audio-symphonia"] # advanced language features like bliss playlist generation

View file

@ -2,7 +2,7 @@ use std::collections::VecDeque;
use std::fmt::{Debug, Display, Error, Formatter}; use std::fmt::{Debug, Display, Error, Formatter};
use super::field_filter::{FieldFilterErrorHandling, VariableOrValue}; use super::field_filter::{FieldFilterErrorHandling, VariableOrValue};
use crate::lang::utility::{assert_name, assert_token, assert_token_raw, check_name}; use crate::lang::utility::{assert_token, assert_token_raw, check_name};
use crate::lang::MpsLanguageDictionary; use crate::lang::MpsLanguageDictionary;
use crate::lang::MpsTypePrimitive; use crate::lang::MpsTypePrimitive;
use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory}; use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory};
@ -17,10 +17,13 @@ pub struct FieldLikeFilter {
field_name: String, field_name: String,
field_errors: FieldFilterErrorHandling, field_errors: FieldFilterErrorHandling,
val: VariableOrValue, val: VariableOrValue,
negate: bool,
} }
impl FieldLikeFilter { impl FieldLikeFilter {
fn sanitise_string(s: &str) -> String { fn sanitise_string(s: &str) -> String {
#[cfg(feature = "unidecode")]
let s = unidecode::unidecode(s);
s.replace(|c: char| c.is_whitespace() || c == '_' || c == '-', "") s.replace(|c: char| c.is_whitespace() || c == '_' || c == '-', "")
.replace(|c: char| !(c.is_whitespace() || c.is_alphanumeric()), "") .replace(|c: char| !(c.is_whitespace() || c.is_alphanumeric()), "")
.to_lowercase() .to_lowercase()
@ -54,7 +57,12 @@ impl MpsFilterPredicate for FieldLikeFilter {
if let Some(field) = music_item_lut.field(&self.field_name) { if let Some(field) = music_item_lut.field(&self.field_name) {
let field_str = Self::sanitise_string(&field.as_str()); let field_str = Self::sanitise_string(&field.as_str());
let var_str = Self::sanitise_string(variable); let var_str = Self::sanitise_string(variable);
Ok(field_str.contains(&var_str)) let matches = field_str.contains(&var_str);
if self.negate {
Ok(!matches)
} else {
Ok(matches)
}
} else { } else {
match self.field_errors { match self.field_errors {
FieldFilterErrorHandling::Error => Err(RuntimeMsg(format!( FieldFilterErrorHandling::Error => Err(RuntimeMsg(format!(
@ -83,11 +91,11 @@ impl MpsFilterFactory<FieldLikeFilter> for FieldLikeFilterFactory {
let tokens_len = tokens.len(); let tokens_len = tokens.len();
(tokens_len >= 2 // field like variable (tokens_len >= 2 // field like variable
&& tokens[0].is_name() && tokens[0].is_name()
&& check_name("like", tokens[1])) && (check_name("like", tokens[1]) || check_name("unlike", tokens[1])))
|| (tokens_len >= 3 // field? like variable OR field! like variable || (tokens_len >= 3 // field? like variable OR field! like variable
&& tokens[0].is_name() && tokens[0].is_name()
&& (tokens[1].is_interrogation() || tokens[1].is_exclamation()) && (tokens[1].is_interrogation() || tokens[1].is_exclamation())
&& check_name("like", tokens[2])) && (check_name("like", tokens[2]) || check_name("unlike", tokens[2])))
} }
fn build_filter( fn build_filter(
@ -112,7 +120,21 @@ impl MpsFilterFactory<FieldLikeFilter> for FieldLikeFilterFactory {
} else { } else {
FieldFilterErrorHandling::Error FieldFilterErrorHandling::Error
}; };
assert_name("like", tokens)?; let name = assert_token(
|t| match t {
MpsToken::Name(s) => {
match &s as _ {
"unlike" | "like" => Some(s),
_ => None,
}
},
_ => None
},
MpsToken::Literal("like|unlike".into()),
tokens,
)?;
let is_negated = name == "unlike";
//assert_name("like", tokens)?;
if tokens[0].is_literal() { if tokens[0].is_literal() {
let literal = assert_token( let literal = assert_token(
|t| match t { |t| match t {
@ -128,6 +150,7 @@ impl MpsFilterFactory<FieldLikeFilter> for FieldLikeFilterFactory {
field_name: field, field_name: field,
field_errors: error_handling, field_errors: error_handling,
val: value, val: value,
negate: is_negated,
}) })
} else { } else {
let variable = VariableOrValue::Variable(assert_token( let variable = VariableOrValue::Variable(assert_token(
@ -143,6 +166,7 @@ impl MpsFilterFactory<FieldLikeFilter> for FieldLikeFilterFactory {
field_name: field, field_name: field,
field_errors: FieldFilterErrorHandling::Error, field_errors: FieldFilterErrorHandling::Error,
val: variable, val: variable,
negate: is_negated,
}) })
} }
} }

View file

@ -45,6 +45,8 @@
//! //!
//! ### field like something //! ### field like something
//! //!
//! ### field unlike something
//!
//! ### field matches some_regex //! ### field matches some_regex
//! //!
//! ### field != something //! ### field != something

View file

@ -78,7 +78,18 @@ impl MpsDefaultAnalyzer {
fn get_path(item: &MpsItem) -> Result<&str, RuntimeMsg> { fn get_path(item: &MpsItem) -> Result<&str, RuntimeMsg> {
if let Some(path) = item.field(PATH_FIELD) { if let Some(path) = item.field(PATH_FIELD) {
if let MpsTypePrimitive::String(path) = path { if let MpsTypePrimitive::String(path) = path {
if path.starts_with("file://") {
//println!("path guess: `{}`", path.get(7..).unwrap());
Ok(path.get(7..).unwrap())
} else if !path.contains("://") {
Ok(path) Ok(path)
} else {
Err(RuntimeMsg(format!(
"Field {} on item is not a supported URI, it's {}",
PATH_FIELD, path
)))
}
} else { } else {
Err(RuntimeMsg(format!( Err(RuntimeMsg(format!(
"Field {} on item is not String, it's {}", "Field {} on item is not String, it's {}",
@ -116,12 +127,13 @@ impl MpsMusicAnalyzer for MpsDefaultAnalyzer {
let path_from = Self::get_path(from)?; let path_from = Self::get_path(from)?;
let path_to = Self::get_path(to)?; let path_to = Self::get_path(to)?;
for response in self.responses.iter() { for response in self.responses.iter() {
if let ResponseType::Distance { match response {
ResponseType::Distance {
path1, path1,
path2, path2,
distance, distance,
} = response } => {
{ //println!("Got distance from `{}` to `{}`: {}", path1, path2, distance.as_ref().ok().unwrap_or(&f32::INFINITY));
if path1 == path_from && path2 == path_to { if path1 == path_from && path2 == path_to {
return match distance { return match distance {
Ok(d) => Ok(d as f64), Ok(d) => Ok(d as f64),
@ -129,6 +141,13 @@ impl MpsMusicAnalyzer for MpsDefaultAnalyzer {
}; };
} }
} }
ResponseType::Song { .. } => {},
ResponseType::UnsupportedSong { path, msg } => {
if path == path_to || path == path_from {
return Err(RuntimeMsg(format!("Bliss error: {}", msg)));
}
}
}
} }
Err(RuntimeMsg( Err(RuntimeMsg(
"Channel closed without response: internal error".to_owned(), "Channel closed without response: internal error".to_owned(),
@ -191,6 +210,10 @@ enum ResponseType {
path: String, path: String,
song: Result<Song, BlissError>, song: Result<Song, BlissError>,
}, },
UnsupportedSong {
path: String,
msg: String,
}
} }
#[cfg(feature = "bliss-audio-symphonia")] #[cfg(feature = "bliss-audio-symphonia")]
@ -225,10 +248,11 @@ impl CacheThread {
distance, distance,
} => { } => {
self.insert_distance(path1, path2, distance); self.insert_distance(path1, path2, distance);
} },
ResponseType::Song { path, song } => { ResponseType::Song { path, song } => {
self.insert_song(path, song); self.insert_song(path, song);
} },
ResponseType::UnsupportedSong { .. } => {},
} }
} }
} }
@ -285,6 +309,12 @@ impl CacheThread {
} else { } else {
self.insert_song(path2, song); self.insert_song(path2, song);
} }
},
ResponseType::UnsupportedSong {path: unsupported_path, ..} => {
self.song_in_progress.remove(&unsupported_path);
if path == unsupported_path {
return None;
}
} }
} }
} }
@ -358,12 +388,18 @@ impl CacheThread {
distance, distance,
} => { } => {
self.insert_distance(path1, path2, distance); self.insert_distance(path1, path2, distance);
} },
ResponseType::Song { path: path2, song } => { ResponseType::Song { path: path2, song } => {
self.insert_song(path2.clone(), song.clone()); self.insert_song(path2.clone(), song.clone());
if self.song_in_progress.len() <= available_parallelism { if self.song_in_progress.len() <= available_parallelism {
break 'inner4; break 'inner4;
} }
},
ResponseType::UnsupportedSong {path: unsupported_path, ..} => {
self.song_in_progress.remove(&unsupported_path);
if self.song_in_progress.len() <= available_parallelism {
break 'inner4;
}
} }
} }
} }
@ -409,6 +445,18 @@ impl CacheThread {
} }
ResponseType::Song { path, song } => { ResponseType::Song { path, song } => {
self.insert_song(path, song); self.insert_song(path, song);
},
ResponseType::UnsupportedSong { path: unsupported_path, msg } => {
self.song_in_progress.remove(&unsupported_path);
if let Err(_) = self.responses.send(ResponseType::UnsupportedSong {
path: unsupported_path.clone(),
msg: msg
}) {
return true;
}
if unsupported_path == key.0 || unsupported_path == key.1 {
break 'inner1;
}
} }
} }
} }
@ -424,6 +472,20 @@ impl CacheThread {
worker_tx: &Sender<ResponseType>, worker_tx: &Sender<ResponseType>,
worker_results: &Receiver<ResponseType>, worker_results: &Receiver<ResponseType>,
) -> bool { ) -> bool {
let path = if path.starts_with("file://") {
//println!("path guess: `{}`", path.get(7..).unwrap());
path.get(7..).unwrap().to_owned()
} else if !path.contains("://") {
path
} else {
if let Err(_) = self.responses.send(ResponseType::UnsupportedSong {
msg: format!("Song path is not a supported URI, it's `{}`", path),
path: path,
}) {
return true;
}
return false;
};
if let Some(song) = self.song_cache.get(&path) { if let Some(song) = self.song_cache.get(&path) {
if ack { if ack {
let song = song.to_owned(); let song = song.to_owned();
@ -460,6 +522,12 @@ impl CacheThread {
if self.song_in_progress.len() <= available_parallelism { if self.song_in_progress.len() <= available_parallelism {
break 'inner2; break 'inner2;
} }
},
ResponseType::UnsupportedSong { path, .. } => {
self.song_in_progress.remove(&path);
if self.song_in_progress.len() <= available_parallelism {
break 'inner2;
}
} }
} }
} }
@ -493,7 +561,19 @@ impl CacheThread {
path: path, path: path,
song: song, song: song,
}) { }) {
return false; return true;
}
break 'inner3;
}
}
ResponseType::UnsupportedSong { path: unsupported_path, msg } => {
self.song_in_progress.remove(&unsupported_path);
if unsupported_path == path {
if let Err(_) = self.responses.send(ResponseType::UnsupportedSong {
path: unsupported_path,
msg: msg
}) {
return true;
} }
break 'inner3; break 'inner3;
} }

View file

@ -350,6 +350,11 @@ fn execute_likefilter_line() -> Result<(), MpsError> {
"files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(album like `24K Magic`)", "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(album like `24K Magic`)",
false, false,
true, true,
)?;
execute_single_line(
"files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(album unlike `24K Magic`)",
true,
true,
) )
} }

View file

@ -61,6 +61,7 @@ Operations to reduce the items in an iterable: iterable.(filter)
field == something field == something
field like something field like something
field unlike something
field matches some_regex field matches some_regex
field != something field != something
field >= something field >= something