diff --git a/mps-interpreter/src/interpretor.rs b/mps-interpreter/src/interpretor.rs index 3029cc0..9dfbdf7 100644 --- a/mps-interpreter/src/interpretor.rs +++ b/mps-interpreter/src/interpretor.rs @@ -156,11 +156,15 @@ fn box_error_with_ctx( /// Builder function to add the standard statements of MPS. pub(crate) fn standard_vocab(vocabulary: &mut MpsLanguageDictionary) { vocabulary + // filters .add(crate::lang::vocabulary::filters::empty_filter()) .add(crate::lang::vocabulary::filters::field_filter()) .add(crate::lang::vocabulary::filters::field_filter_maybe()) .add(crate::lang::vocabulary::filters::index_filter()) .add(crate::lang::vocabulary::filters::range_filter()) + // sorters + .add(crate::lang::vocabulary::sorters::empty_sort()) + // functions and misc .add(crate::lang::vocabulary::sql_function_factory()) .add(crate::lang::vocabulary::simple_sql_function_factory()) .add(crate::lang::vocabulary::CommentStatementFactory) diff --git a/mps-interpreter/src/lang/mod.rs b/mps-interpreter/src/lang/mod.rs index 3be9050..46b7e8b 100644 --- a/mps-interpreter/src/lang/mod.rs +++ b/mps-interpreter/src/lang/mod.rs @@ -9,6 +9,7 @@ mod operation; mod pseudo_op; mod repeated_meme; mod single_op; +mod sorter; //mod statement; mod type_primitives; pub(crate) mod utility; @@ -25,6 +26,7 @@ pub use operation::{BoxedMpsOpFactory, MpsOp, MpsOpFactory, SimpleMpsOpFactory, pub use pseudo_op::PseudoOp; pub use repeated_meme::{repeated_tokens, RepeatedTokens}; pub use single_op::SingleItem; +pub use sorter::{MpsSorterFactory, MpsSorter, MpsSortStatement, MpsSortStatementFactory}; //pub(crate) use statement::MpsStatement; pub use type_primitives::MpsTypePrimitive; diff --git a/mps-interpreter/src/lang/pseudo_op.rs b/mps-interpreter/src/lang/pseudo_op.rs index bb0229c..6055d97 100644 --- a/mps-interpreter/src/lang/pseudo_op.rs +++ b/mps-interpreter/src/lang/pseudo_op.rs @@ -22,6 +22,17 @@ impl PseudoOp { } } + pub fn try_real_ref(&self) -> Result<&Box, RuntimeError> { + match self { + Self::Real(op) => Ok(op), + Self::Fake(_) => Err(RuntimeError { + line: 0, + op: self.clone(), + msg: "PseudoOp::Fake is not a real MpsOp".into(), + }), + } + } + pub fn unwrap_real(self) -> Result, RuntimeError> { match self { Self::Real(op) => { diff --git a/mps-interpreter/src/lang/sorter.rs b/mps-interpreter/src/lang/sorter.rs new file mode 100644 index 0000000..5925b65 --- /dev/null +++ b/mps-interpreter/src/lang/sorter.rs @@ -0,0 +1,219 @@ +use std::collections::VecDeque; +use std::fmt::{Debug, Display, Error, Formatter}; +use std::iter::Iterator; +use std::marker::PhantomData; + +use crate::lang::utility::{assert_token_raw, check_name, assert_name}; +use crate::lang::MpsLanguageDictionary; +use crate::lang::{BoxedMpsOpFactory, MpsOp, PseudoOp, MpsIteratorItem}; +use crate::lang::{RuntimeError, SyntaxError}; +use crate::tokens::MpsToken; +use crate::MpsContext; + +const SORTER_ITEM_CACHE_SIZE: usize = 8; + +pub trait MpsSorter: Clone + Debug + Display { + fn sort(&mut self, iterator: &mut dyn MpsOp, item_buf: &mut VecDeque) -> Result<(), RuntimeError>; +} + +pub trait MpsSorterFactory { + fn is_sorter(&self, tokens: &VecDeque<&MpsToken>) -> bool; + + fn build_sorter( + &self, + tokens: &mut VecDeque, + dict: &MpsLanguageDictionary, + ) -> Result; +} + +#[derive(Debug)] +pub struct MpsSortStatement { + orderer: S, + iterable: PseudoOp, + // state + item_cache: VecDeque, +} + +impl std::clone::Clone for MpsSortStatement { + fn clone(&self) -> Self { + Self { + orderer: self.orderer.clone(), + iterable: self.iterable.clone(), + item_cache: VecDeque::new(), + } + } +} + +impl Display for MpsSortStatement { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{}~({})", self.iterable, self.orderer) + } +} + +impl MpsOp for MpsSortStatement { + fn enter(&mut self, ctx: MpsContext) { + self.iterable.try_real().unwrap().enter(ctx) + } + + fn escape(&mut self) -> MpsContext { + self.iterable.try_real().unwrap().escape() + } + + fn is_resetable(&self) -> bool { + if let Ok(iter) = self.iterable.try_real_ref() { + iter.is_resetable() + } else {false} + } + + fn reset(&mut self) -> Result<(), RuntimeError> { + self.item_cache.clear(); + self.iterable.try_real()?.reset() + } +} + +impl Iterator for MpsSortStatement { + type Item = MpsIteratorItem; + + fn next(&mut self) -> Option { + let real_op = match self.iterable.try_real() { + Ok(op) => op, + Err(e) => return Some(Err(e)), + }; + match self.orderer.sort(real_op.as_mut(), &mut self.item_cache) { + Ok(_) => {}, + Err(e) => return Some(Err(e)), + } + self.item_cache.pop_front() + } +} + +pub struct MpsSortStatementFactory + 'static> { + sort_factory: F, + idc: PhantomData, +} + +impl + 'static> + MpsSortStatementFactory +{ + pub fn new(factory: F) -> Self { + Self { + sort_factory: factory, + idc: PhantomData::, + } + } +} + +impl + 'static> BoxedMpsOpFactory + for MpsSortStatementFactory +{ + fn is_op_boxed(&self, tokens: &VecDeque) -> bool { + let tokens_len = tokens.len(); + if let Some(tilde_location) = last_tilde(tokens, 0) { + // iterable~(sorter) + if tokens_len > tilde_location + 2 { + let tokens2: VecDeque<&MpsToken> = + VecDeque::from_iter(tokens.range(tilde_location+2..tokens_len-1)); + tokens[tokens_len-1].is_close_bracket() + && self.sort_factory.is_sorter(&tokens2) + } else {false} + } else if let Some(dot_location) = last_dot_sort(tokens, 1) { + // iterable.sort(sorter) + if tokens_len > dot_location + 3 { + let tokens2: VecDeque<&MpsToken> = + VecDeque::from_iter(tokens.range(dot_location+3..tokens_len-1)); + tokens[tokens_len-1].is_close_bracket() + && self.sort_factory.is_sorter(&tokens2) + } else {false} + } else { + false + } + } + + fn build_op_boxed( + &self, + tokens: &mut VecDeque, + dict: &MpsLanguageDictionary, + ) -> Result, SyntaxError> { + let inner_op; + if let Some(tilde_location) = last_tilde(tokens, 0) { + let end_tokens = tokens.split_off(tilde_location); + inner_op = dict.try_build_statement(tokens)?; + tokens.extend(end_tokens); + assert_token_raw(MpsToken::Tilde, tokens)?; + } else if let Some(dot_location) = last_dot_sort(tokens, 1) { + let end_tokens = tokens.split_off(dot_location); + inner_op = dict.try_build_statement(tokens)?; + tokens.extend(end_tokens); + assert_token_raw(MpsToken::Dot, tokens)?; + assert_name("sort", tokens)?; + } else { + return Err(SyntaxError { + line: 0, + token: MpsToken::Name(".|~".into()), + got: tokens.pop_front() + }) + } + assert_token_raw(MpsToken::OpenBracket, tokens)?; + let end_tokens = tokens.split_off(tokens.len()-1); + let sorter = self.sort_factory.build_sorter(tokens, dict)?; + tokens.extend(end_tokens); + assert_token_raw(MpsToken::CloseBracket, tokens)?; + Ok(Box::new( + MpsSortStatement { + orderer: sorter, + iterable: inner_op.into(), + item_cache: VecDeque::with_capacity(SORTER_ITEM_CACHE_SIZE), + } + )) + } +} + +fn last_tilde(tokens: &VecDeque, target_depth: usize) -> Option { + let mut bracket_depth = 0; + for i in (0..tokens.len()).rev() { + let current_token = &tokens[i]; + if current_token.is_close_bracket() { + bracket_depth += 1; + } else if current_token.is_open_bracket() && bracket_depth != 0 { + bracket_depth -= 1; + } else if current_token.is_tilde() && bracket_depth == target_depth { + return Some(i) + } + } + None +} + +fn last_dot_sort(tokens: &VecDeque, target_depth: usize) -> Option { + let mut bracket_depth = 0; + let mut sort_found = false; + let mut bracket_found = false; + for i in (0..tokens.len()).rev() { + let current_token = &tokens[i]; + if sort_found { + return { + if current_token.is_dot() { + Some(i) + } else { + None + } + } + } else if bracket_found { + if check_name("sort", current_token) { + sort_found = true; + } else { + bracket_found = false; + } + } + if current_token.is_close_bracket() { + bracket_depth += 1; + } else if current_token.is_open_bracket() { + if target_depth == bracket_depth { + bracket_found = true; + } + if bracket_depth != 0 { + bracket_depth -= 1; + } + } + } + None +} diff --git a/mps-interpreter/src/lang/vocabulary/mod.rs b/mps-interpreter/src/lang/vocabulary/mod.rs index a0fdc23..7dd0dba 100644 --- a/mps-interpreter/src/lang/vocabulary/mod.rs +++ b/mps-interpreter/src/lang/vocabulary/mod.rs @@ -13,4 +13,6 @@ pub use sql_init::{sql_init_function_factory, SqlInitStatementFactory}; pub use sql_query::{sql_function_factory, SqlStatementFactory}; pub use sql_simple_query::{simple_sql_function_factory, SimpleSqlStatementFactory}; pub use variable_assign::{AssignStatement, AssignStatementFactory}; + pub mod filters; +pub mod sorters; diff --git a/mps-interpreter/src/lang/vocabulary/sorters/empty_sorter.rs b/mps-interpreter/src/lang/vocabulary/sorters/empty_sorter.rs new file mode 100644 index 0000000..a12ca4b --- /dev/null +++ b/mps-interpreter/src/lang/vocabulary/sorters/empty_sorter.rs @@ -0,0 +1,48 @@ +use std::collections::VecDeque; +use std::fmt::{Debug, Display, Error, Formatter}; + +use crate::lang::{MpsSorter, MpsSorterFactory, MpsSortStatementFactory}; +use crate::lang::{MpsLanguageDictionary, MpsIteratorItem, MpsOp}; +use crate::lang::{RuntimeError, SyntaxError}; +use crate::tokens::MpsToken; + +#[derive(Debug, Clone)] +pub struct EmptySorter; + +impl MpsSorter for EmptySorter { + fn sort(&mut self, iterator: &mut dyn MpsOp, item_buf: &mut VecDeque) -> Result<(), RuntimeError> { + if let Some(item) = iterator.next() { + item_buf.push_back(item) + } + Ok(()) + } +} + +impl Display for EmptySorter { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "[empty]") + } +} + +pub struct EmptySorterFactory; + +impl MpsSorterFactory for EmptySorterFactory { + fn is_sorter(&self, tokens: &VecDeque<&MpsToken>) -> bool { + tokens.len() == 0 + } + + fn build_sorter( + &self, + _tokens: &mut VecDeque, + _dict: &MpsLanguageDictionary, + ) -> Result { + Ok(EmptySorter) + } +} + +pub type EmptySorterStatementFactory = MpsSortStatementFactory; + +#[inline(always)] +pub fn empty_sort() -> EmptySorterStatementFactory { + EmptySorterStatementFactory::new(EmptySorterFactory) +} diff --git a/mps-interpreter/src/lang/vocabulary/sorters/mod.rs b/mps-interpreter/src/lang/vocabulary/sorters/mod.rs new file mode 100644 index 0000000..26a771a --- /dev/null +++ b/mps-interpreter/src/lang/vocabulary/sorters/mod.rs @@ -0,0 +1,3 @@ +mod empty_sorter; + +pub use empty_sorter::{empty_sort, EmptySorter, EmptySorterFactory, EmptySorterStatementFactory}; diff --git a/mps-interpreter/src/processing/filesystem.rs b/mps-interpreter/src/processing/filesystem.rs index 4e4182c..25e57fe 100644 --- a/mps-interpreter/src/processing/filesystem.rs +++ b/mps-interpreter/src/processing/filesystem.rs @@ -100,13 +100,14 @@ impl FileIter { } else { Vec::with_capacity(DEFAULT_VEC_CACHE_SIZE) }; + let pattern_re = Regex::new(pattern.unwrap_or(DEFAULT_REGEX)).map_err(|e| RuntimeError { + line: 0, + op: op(), + msg: format!("Regex compile error: {}", e), + })?; Ok(Self { root: root_path, - pattern: Regex::new(pattern.unwrap_or(DEFAULT_REGEX)).map_err(|e| RuntimeError { - line: 0, - op: op(), - msg: format!("Regex compile error: {}", e), - })?, + pattern: pattern_re, recursive: recurse, dir_iters: dir_vec, is_complete: false, @@ -211,9 +212,11 @@ impl FileIter { mut capture_names: regex::CaptureNames, ) { // populates fields from named capture groups - while let Some(Some(name)) = capture_names.next() { - if let Some(value) = captures.name(name).and_then(|m| Some(m.as_str().to_string())) { - item.set_field(name, MpsTypePrimitive::parse(value)); + while let Some(name_maybe) = capture_names.next() { + if let Some(name) = name_maybe { + if let Some(value) = captures.name(name).and_then(|m| Some(m.as_str().to_string())) { + item.set_field(name, MpsTypePrimitive::parse(value)); + } } } item.set_field("filename", path_str.to_string().into()); diff --git a/mps-interpreter/src/tokens/token_enum.rs b/mps-interpreter/src/tokens/token_enum.rs index cdd4ba0..f46daa8 100644 --- a/mps-interpreter/src/tokens/token_enum.rs +++ b/mps-interpreter/src/tokens/token_enum.rs @@ -20,6 +20,7 @@ pub enum MpsToken { Pipe, Ampersand, Colon, + Tilde, } impl MpsToken { @@ -40,6 +41,7 @@ impl MpsToken { "|" => Ok(Self::Pipe), "&" => Ok(Self::Ampersand), ":" => Ok(Self::Colon), + "~" => Ok(Self::Tilde), _ => { // name validation let mut ok = true; @@ -183,6 +185,13 @@ impl MpsToken { _ => false, } } + + pub fn is_tilde(&self) -> bool { + match self { + Self::Tilde => true, + _ => false, + } + } } impl Display for MpsToken { @@ -205,7 +214,8 @@ impl Display for MpsToken { Self::Interrogation => write!(f, "?"), Self::Pipe => write!(f, "|"), Self::Ampersand => write!(f, "&"), - Self::Colon => write!(f, ":") + Self::Colon => write!(f, ":"), + Self::Tilde => write!(f, "~"), } } } diff --git a/mps-interpreter/tests/single_line.rs b/mps-interpreter/tests/single_line.rs index fe894b0..1040622 100644 --- a/mps-interpreter/tests/single_line.rs +++ b/mps-interpreter/tests/single_line.rs @@ -286,3 +286,17 @@ fn execute_replacefilter_line() -> Result<(), Box> { true, ) } + +#[test] +fn execute_emptysort_line() -> Result<(), Box> { + execute_single_line( + "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`).sort()", + false, + true, + )?; + execute_single_line( + "files(`~/Music/MusicFlac/Bruno Mars/24K Magic/`)~()", + false, + true, + ) +}