From 19748d33acdfdad9fc73735461f5da335737a2d6 Mon Sep 17 00:00:00 2001 From: "NGnius (Graham)" Date: Wed, 2 Feb 2022 13:12:56 -0500 Subject: [PATCH] Add regex pattern field filter --- mps-interpreter/README.md | 3 + mps-interpreter/src/interpretor.rs | 1 + .../vocabulary/filters/field_match_filter.rs | 166 ++++++++++++++++++ .../src/lang/vocabulary/filters/mod.rs | 4 + mps-interpreter/src/lib.rs | 3 + mps-interpreter/tests/single_line.rs | 14 ++ src/help.rs | 1 + 7 files changed, 192 insertions(+) create mode 100644 mps-interpreter/src/lang/vocabulary/filters/field_match_filter.rs diff --git a/mps-interpreter/README.md b/mps-interpreter/README.md index 5a969a5..f6cf072 100644 --- a/mps-interpreter/README.md +++ b/mps-interpreter/README.md @@ -50,6 +50,8 @@ E.g. `files(folder="~/Music/", recursive=true).(title == "Romantic Traffic");` i #### field like something +#### field matches some_regex + #### field != something #### field >= something @@ -62,6 +64,7 @@ E.g. `files(folder="~/Music/", recursive=true).(title == "Romantic Traffic");` i Compare all items, keeping only those that match the condition. Valid field names are those of the MpsMusicItem (title, artist, album, genre, track, etc.), though this will change when proper object support is added. Optionally, a ? or ! can be added to the end of the field name to skip items whose field is missing/incomparable, or keep all items whose field is missing/incomparable (respectively). + #### start..end -- e.g. `iterable.(0..42);` Keep only the items that are at the start index up to the end index. Start and/or end may be omitted to start/stop at the iterable's existing start/end (respectively). This stops once the end condition is met, leaving the rest of the iterator unconsumed. diff --git a/mps-interpreter/src/interpretor.rs b/mps-interpreter/src/interpretor.rs index f4aeb95..6f2db4c 100644 --- a/mps-interpreter/src/interpretor.rs +++ b/mps-interpreter/src/interpretor.rs @@ -156,6 +156,7 @@ pub(crate) fn standard_vocab(vocabulary: &mut MpsLanguageDictionary) { .add(crate::lang::vocabulary::filters::index_filter()) .add(crate::lang::vocabulary::filters::range_filter()) .add(crate::lang::vocabulary::filters::field_like_filter()) + .add(crate::lang::vocabulary::filters::field_re_filter()) // sorters .add(crate::lang::vocabulary::sorters::empty_sort()) .add(crate::lang::vocabulary::sorters::shuffle_sort()) // accepts valid field ~(shuffle) diff --git a/mps-interpreter/src/lang/vocabulary/filters/field_match_filter.rs b/mps-interpreter/src/lang/vocabulary/filters/field_match_filter.rs new file mode 100644 index 0000000..786e707 --- /dev/null +++ b/mps-interpreter/src/lang/vocabulary/filters/field_match_filter.rs @@ -0,0 +1,166 @@ +use std::collections::VecDeque; +use std::fmt::{Debug, Display, Error, Formatter}; + +use regex::Regex; + +use super::field_filter::{FieldFilterErrorHandling, VariableOrValue}; +use crate::lang::utility::{assert_name, assert_token, assert_token_raw, check_name}; +use crate::lang::MpsLanguageDictionary; +use crate::lang::MpsTypePrimitive; +use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory}; +use crate::lang::{RuntimeMsg, SyntaxError}; +use crate::processing::general::MpsType; +use crate::tokens::MpsToken; +use crate::MpsContext; +use crate::MpsItem; + +#[derive(Debug, Clone)] +pub struct FieldRegexFilter { + field_name: String, + field_errors: FieldFilterErrorHandling, + val: VariableOrValue, + regex_cache: Option<(String, Regex)>, +} + +impl Display for FieldRegexFilter { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match &self.val { + VariableOrValue::Variable(name) => write!(f, "{} matches {}", self.field_name, name), + VariableOrValue::Value(t) => write!(f, "{} matches {}", self.field_name, t), + } + } +} + +impl MpsFilterPredicate for FieldRegexFilter { + fn matches( + &mut self, + music_item_lut: &MpsItem, + ctx: &mut MpsContext, + ) -> Result { + let variable = match &self.val { + VariableOrValue::Variable(name) => match ctx.variables.get(name)? { + MpsType::Primitive(MpsTypePrimitive::String(s)) => Ok(s), + _ => Err(RuntimeMsg(format!("Variable {} is not comparable", name))), + }, + VariableOrValue::Value(MpsTypePrimitive::String(s)) => Ok(s), + // non-string values will be stopped at parse-time, so this should never occur + _ => Err(RuntimeMsg("Value is not type String".to_string())), + }?; + let pattern = if let Some((_, regex_c)) = &self.regex_cache { + regex_c + } else { + let regex_c = Regex::new(variable).map_err(|e| RuntimeMsg(format!("Regex compile error: {}", e)))?; + self.regex_cache = Some((variable.clone(), regex_c)); + &self.regex_cache.as_ref().unwrap().1 + }; + if let Some(field) = music_item_lut.field(&self.field_name) { + let field_str = field.as_str(); + Ok(pattern.is_match(&field_str)) + } else { + match self.field_errors { + FieldFilterErrorHandling::Error => Err(RuntimeMsg(format!( + "Field {} does not exist", + &self.field_name + ))), + FieldFilterErrorHandling::Ignore => Ok(false), + FieldFilterErrorHandling::Include => Ok(true), + } + } + } + + fn is_complete(&self) -> bool { + false + } + + fn reset(&mut self) -> Result<(), RuntimeMsg> { + Ok(()) + } +} + +pub struct FieldRegexFilterFactory; + +impl MpsFilterFactory for FieldRegexFilterFactory { + fn is_filter(&self, tokens: &VecDeque<&MpsToken>) -> bool { + let tokens_len = tokens.len(); + (tokens_len == 3 // field like variable + && tokens[0].is_name() + && check_name("matches", tokens[1]) + && (tokens[2].is_name() || tokens[2].is_literal())) + || (tokens_len == 4 // field? like variable OR field! like variable + && tokens[0].is_name() + && (tokens[1].is_interrogation() || tokens[1].is_exclamation()) + && check_name("matches", tokens[2]) + && (tokens[3].is_name() || tokens[3].is_literal())) + } + + fn build_filter( + &self, + tokens: &mut VecDeque, + _dict: &MpsLanguageDictionary, + ) -> Result { + let field = assert_token( + |t| match t { + MpsToken::Name(n) => Some(n), + _ => None, + }, + MpsToken::Name("field_name".into()), + tokens, + )?; + let error_handling = if tokens[0].is_interrogation() { + assert_token_raw(MpsToken::Interrogation, tokens)?; + FieldFilterErrorHandling::Ignore + } else if tokens[0].is_exclamation() { + assert_token_raw(MpsToken::Exclamation, tokens)?; + FieldFilterErrorHandling::Include + } else { + FieldFilterErrorHandling::Error + }; + assert_name("matches", tokens)?; + if tokens[0].is_literal() { + let literal = assert_token( + |t| match t { + MpsToken::Literal(n) => Some(n), + _ => None, + }, + MpsToken::Literal("regex_string".into()), + tokens, + )?; + let regex_c = Regex::new(&literal).map_err(|_| SyntaxError { + line: 0, + token: MpsToken::Literal("[valid regex]".to_string()), + got: Some(MpsToken::Literal(literal.clone())) + })?; + let compiled_cache = (literal.clone(), regex_c); + let value = VariableOrValue::Value(MpsTypePrimitive::String(literal)); + Ok(FieldRegexFilter { + field_name: field, + field_errors: error_handling, + val: value, + regex_cache: Some(compiled_cache), + }) + } else { + let variable = VariableOrValue::Variable(assert_token( + |t| match t { + MpsToken::Name(n) => Some(n), + _ => None, + }, + MpsToken::Name("variable_name".into()), + tokens, + )?); + Ok(FieldRegexFilter { + field_name: field, + field_errors: FieldFilterErrorHandling::Error, + val: variable, + regex_cache: None, + }) + } + } +} + +pub type FieldRegexFilterStatementFactory = + MpsFilterStatementFactory; + +#[inline(always)] +pub fn field_re_filter() -> FieldRegexFilterStatementFactory { + FieldRegexFilterStatementFactory::new(FieldRegexFilterFactory) +} diff --git a/mps-interpreter/src/lang/vocabulary/filters/mod.rs b/mps-interpreter/src/lang/vocabulary/filters/mod.rs index 520eeb7..e88817f 100644 --- a/mps-interpreter/src/lang/vocabulary/filters/mod.rs +++ b/mps-interpreter/src/lang/vocabulary/filters/mod.rs @@ -2,6 +2,7 @@ mod empty_filter; mod field_filter; mod field_filter_maybe; mod field_like_filter; +mod field_match_filter; mod index_filter; mod range_filter; pub(crate) mod utility; @@ -19,6 +20,9 @@ pub use field_filter_maybe::{ pub use field_like_filter::{ field_like_filter, FieldLikeFilterFactory, FieldLikeFilterStatementFactory, }; +pub use field_match_filter::{ + field_re_filter, FieldRegexFilterFactory, FieldRegexFilterStatementFactory, +}; pub use index_filter::{ index_filter, IndexFilter, IndexFilterFactory, IndexFilterStatementFactory, }; diff --git a/mps-interpreter/src/lib.rs b/mps-interpreter/src/lib.rs index aab43a7..42a5eb5 100644 --- a/mps-interpreter/src/lib.rs +++ b/mps-interpreter/src/lib.rs @@ -48,6 +48,8 @@ //! //! ### field like something //! +//! ### field matches some_regex +//! //! ### field != something //! //! ### field >= something @@ -60,6 +62,7 @@ //! //! Compare all items, keeping only those that match the condition. Valid field names are those of the MpsMusicItem (title, artist, album, genre, track, etc.), though this will change when proper object support is added. Optionally, a ? or ! can be added to the end of the field name to skip items whose field is missing/incomparable, or keep all items whose field is missing/incomparable (respectively). //! +//! //! ### start..end -- e.g. `iterable.(0..42);` //! //! Keep only the items that are at the start index up to the end index. Start and/or end may be omitted to start/stop at the iterable's existing start/end (respectively). This stops once the end condition is met, leaving the rest of the iterator unconsumed. diff --git a/mps-interpreter/tests/single_line.rs b/mps-interpreter/tests/single_line.rs index 5748959..7e1ece6 100644 --- a/mps-interpreter/tests/single_line.rs +++ b/mps-interpreter/tests/single_line.rs @@ -425,3 +425,17 @@ fn execute_unionfn_line() -> Result<(), Box> { true ) } + +#[test] +fn execute_regexfilter_line() -> Result<(), Box> { + execute_single_line( + "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(title matches `24K\\\\s+Magic`)", // note: quad-escape not required in scripts + false, + true, + )?; + execute_single_line( + "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(artist? matches `Bruno Mars`)", + false, + true, + ) +} diff --git a/src/help.rs b/src/help.rs index 0f0396f..99de522 100644 --- a/src/help.rs +++ b/src/help.rs @@ -51,6 +51,7 @@ Operations to reduce the items in an iterable: iterable.(filter) field == something field like something + field matches some_regex field != something field >= something field > something