Add regex pattern field filter

This commit is contained in:
NGnius (Graham) 2022-02-02 13:12:56 -05:00
parent 4e7948d5ad
commit 19748d33ac
7 changed files with 192 additions and 0 deletions

View file

@ -50,6 +50,8 @@ E.g. `files(folder="~/Music/", recursive=true).(title == "Romantic Traffic");` i
#### field like something
#### field matches some_regex
#### field != something
#### field >= something
@ -62,6 +64,7 @@ E.g. `files(folder="~/Music/", recursive=true).(title == "Romantic Traffic");` i
Compare all items, keeping only those that match the condition. Valid field names are those of the MpsMusicItem (title, artist, album, genre, track, etc.), though this will change when proper object support is added. Optionally, a ? or ! can be added to the end of the field name to skip items whose field is missing/incomparable, or keep all items whose field is missing/incomparable (respectively).
#### start..end -- e.g. `iterable.(0..42);`
Keep only the items that are at the start index up to the end index. Start and/or end may be omitted to start/stop at the iterable's existing start/end (respectively). This stops once the end condition is met, leaving the rest of the iterator unconsumed.

View file

@ -156,6 +156,7 @@ pub(crate) fn standard_vocab(vocabulary: &mut MpsLanguageDictionary) {
.add(crate::lang::vocabulary::filters::index_filter())
.add(crate::lang::vocabulary::filters::range_filter())
.add(crate::lang::vocabulary::filters::field_like_filter())
.add(crate::lang::vocabulary::filters::field_re_filter())
// sorters
.add(crate::lang::vocabulary::sorters::empty_sort())
.add(crate::lang::vocabulary::sorters::shuffle_sort()) // accepts valid field ~(shuffle)

View file

@ -0,0 +1,166 @@
use std::collections::VecDeque;
use std::fmt::{Debug, Display, Error, Formatter};
use regex::Regex;
use super::field_filter::{FieldFilterErrorHandling, VariableOrValue};
use crate::lang::utility::{assert_name, assert_token, assert_token_raw, check_name};
use crate::lang::MpsLanguageDictionary;
use crate::lang::MpsTypePrimitive;
use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory};
use crate::lang::{RuntimeMsg, SyntaxError};
use crate::processing::general::MpsType;
use crate::tokens::MpsToken;
use crate::MpsContext;
use crate::MpsItem;
#[derive(Debug, Clone)]
pub struct FieldRegexFilter {
field_name: String,
field_errors: FieldFilterErrorHandling,
val: VariableOrValue,
regex_cache: Option<(String, Regex)>,
}
impl Display for FieldRegexFilter {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
match &self.val {
VariableOrValue::Variable(name) => write!(f, "{} matches {}", self.field_name, name),
VariableOrValue::Value(t) => write!(f, "{} matches {}", self.field_name, t),
}
}
}
impl MpsFilterPredicate for FieldRegexFilter {
fn matches(
&mut self,
music_item_lut: &MpsItem,
ctx: &mut MpsContext,
) -> Result<bool, RuntimeMsg> {
let variable = match &self.val {
VariableOrValue::Variable(name) => match ctx.variables.get(name)? {
MpsType::Primitive(MpsTypePrimitive::String(s)) => Ok(s),
_ => Err(RuntimeMsg(format!("Variable {} is not comparable", name))),
},
VariableOrValue::Value(MpsTypePrimitive::String(s)) => Ok(s),
// non-string values will be stopped at parse-time, so this should never occur
_ => Err(RuntimeMsg("Value is not type String".to_string())),
}?;
let pattern = if let Some((_, regex_c)) = &self.regex_cache {
regex_c
} else {
let regex_c = Regex::new(variable).map_err(|e| RuntimeMsg(format!("Regex compile error: {}", e)))?;
self.regex_cache = Some((variable.clone(), regex_c));
&self.regex_cache.as_ref().unwrap().1
};
if let Some(field) = music_item_lut.field(&self.field_name) {
let field_str = field.as_str();
Ok(pattern.is_match(&field_str))
} else {
match self.field_errors {
FieldFilterErrorHandling::Error => Err(RuntimeMsg(format!(
"Field {} does not exist",
&self.field_name
))),
FieldFilterErrorHandling::Ignore => Ok(false),
FieldFilterErrorHandling::Include => Ok(true),
}
}
}
fn is_complete(&self) -> bool {
false
}
fn reset(&mut self) -> Result<(), RuntimeMsg> {
Ok(())
}
}
pub struct FieldRegexFilterFactory;
impl MpsFilterFactory<FieldRegexFilter> for FieldRegexFilterFactory {
fn is_filter(&self, tokens: &VecDeque<&MpsToken>) -> bool {
let tokens_len = tokens.len();
(tokens_len == 3 // field like variable
&& tokens[0].is_name()
&& check_name("matches", tokens[1])
&& (tokens[2].is_name() || tokens[2].is_literal()))
|| (tokens_len == 4 // field? like variable OR field! like variable
&& tokens[0].is_name()
&& (tokens[1].is_interrogation() || tokens[1].is_exclamation())
&& check_name("matches", tokens[2])
&& (tokens[3].is_name() || tokens[3].is_literal()))
}
fn build_filter(
&self,
tokens: &mut VecDeque<MpsToken>,
_dict: &MpsLanguageDictionary,
) -> Result<FieldRegexFilter, SyntaxError> {
let field = assert_token(
|t| match t {
MpsToken::Name(n) => Some(n),
_ => None,
},
MpsToken::Name("field_name".into()),
tokens,
)?;
let error_handling = if tokens[0].is_interrogation() {
assert_token_raw(MpsToken::Interrogation, tokens)?;
FieldFilterErrorHandling::Ignore
} else if tokens[0].is_exclamation() {
assert_token_raw(MpsToken::Exclamation, tokens)?;
FieldFilterErrorHandling::Include
} else {
FieldFilterErrorHandling::Error
};
assert_name("matches", tokens)?;
if tokens[0].is_literal() {
let literal = assert_token(
|t| match t {
MpsToken::Literal(n) => Some(n),
_ => None,
},
MpsToken::Literal("regex_string".into()),
tokens,
)?;
let regex_c = Regex::new(&literal).map_err(|_| SyntaxError {
line: 0,
token: MpsToken::Literal("[valid regex]".to_string()),
got: Some(MpsToken::Literal(literal.clone()))
})?;
let compiled_cache = (literal.clone(), regex_c);
let value = VariableOrValue::Value(MpsTypePrimitive::String(literal));
Ok(FieldRegexFilter {
field_name: field,
field_errors: error_handling,
val: value,
regex_cache: Some(compiled_cache),
})
} else {
let variable = VariableOrValue::Variable(assert_token(
|t| match t {
MpsToken::Name(n) => Some(n),
_ => None,
},
MpsToken::Name("variable_name".into()),
tokens,
)?);
Ok(FieldRegexFilter {
field_name: field,
field_errors: FieldFilterErrorHandling::Error,
val: variable,
regex_cache: None,
})
}
}
}
pub type FieldRegexFilterStatementFactory =
MpsFilterStatementFactory<FieldRegexFilter, FieldRegexFilterFactory>;
#[inline(always)]
pub fn field_re_filter() -> FieldRegexFilterStatementFactory {
FieldRegexFilterStatementFactory::new(FieldRegexFilterFactory)
}

View file

@ -2,6 +2,7 @@ mod empty_filter;
mod field_filter;
mod field_filter_maybe;
mod field_like_filter;
mod field_match_filter;
mod index_filter;
mod range_filter;
pub(crate) mod utility;
@ -19,6 +20,9 @@ pub use field_filter_maybe::{
pub use field_like_filter::{
field_like_filter, FieldLikeFilterFactory, FieldLikeFilterStatementFactory,
};
pub use field_match_filter::{
field_re_filter, FieldRegexFilterFactory, FieldRegexFilterStatementFactory,
};
pub use index_filter::{
index_filter, IndexFilter, IndexFilterFactory, IndexFilterStatementFactory,
};

View file

@ -48,6 +48,8 @@
//!
//! ### field like something
//!
//! ### field matches some_regex
//!
//! ### field != something
//!
//! ### field >= something
@ -60,6 +62,7 @@
//!
//! Compare all items, keeping only those that match the condition. Valid field names are those of the MpsMusicItem (title, artist, album, genre, track, etc.), though this will change when proper object support is added. Optionally, a ? or ! can be added to the end of the field name to skip items whose field is missing/incomparable, or keep all items whose field is missing/incomparable (respectively).
//!
//!
//! ### start..end -- e.g. `iterable.(0..42);`
//!
//! Keep only the items that are at the start index up to the end index. Start and/or end may be omitted to start/stop at the iterable's existing start/end (respectively). This stops once the end condition is met, leaving the rest of the iterator unconsumed.

View file

@ -425,3 +425,17 @@ fn execute_unionfn_line() -> Result<(), Box<dyn MpsLanguageError>> {
true
)
}
#[test]
fn execute_regexfilter_line() -> Result<(), Box<dyn MpsLanguageError>> {
execute_single_line(
"files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(title matches `24K\\\\s+Magic`)", // note: quad-escape not required in scripts
false,
true,
)?;
execute_single_line(
"files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).(artist? matches `Bruno Mars`)",
false,
true,
)
}

View file

@ -51,6 +51,7 @@ Operations to reduce the items in an iterable: iterable.(filter)
field == something
field like something
field matches some_regex
field != something
field >= something
field > something