Improve tag processing and filtering with ?? filter

This commit is contained in:
NGnius (Graham) 2022-05-14 11:10:03 -04:00
parent c2f93faf69
commit 34487c02eb
11 changed files with 187 additions and 42 deletions

View file

@ -13,6 +13,7 @@ pub(crate) fn standard_vocab(vocabulary: &mut MpsLanguageDictionary) {
.add(crate::lang::vocabulary::filters::field_like_filter())
.add(crate::lang::vocabulary::filters::field_re_filter())
.add(crate::lang::vocabulary::filters::unique_field_filter())
.add(crate::lang::vocabulary::filters::nonempty_filter())
// sorters
.add(crate::lang::vocabulary::sorters::empty_sort())
.add(crate::lang::vocabulary::sorters::shuffle_sort()) // accepts ~(shuffle)

View file

@ -4,6 +4,7 @@ mod field_filter_maybe;
mod field_like_filter;
mod field_match_filter;
mod index_filter;
mod nonempty_filter;
mod range_filter;
mod unique;
pub(crate) mod utility;
@ -27,6 +28,9 @@ pub use field_match_filter::{
pub use index_filter::{
index_filter, IndexFilter, IndexFilterFactory, IndexFilterStatementFactory,
};
pub use nonempty_filter::{
nonempty_filter, NonEmptyFilter, NonEmptyFilterFactory, NonEmptyFilterStatementFactory,
};
pub use range_filter::{
range_filter, RangeFilter, RangeFilterFactory, RangeFilterStatementFactory,
};

View file

@ -0,0 +1,65 @@
use std::collections::VecDeque;
use std::fmt::{Debug, Display, Error, Formatter};
use crate::lang::MpsLanguageDictionary;
use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory};
use crate::lang::{RuntimeMsg, SyntaxError, utility::assert_token_raw};
use crate::tokens::MpsToken;
use crate::MpsContext;
use crate::MpsItem;
#[derive(Debug, Clone)]
pub struct NonEmptyFilter;
impl Display for NonEmptyFilter {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "[empty]")
}
}
impl MpsFilterPredicate for NonEmptyFilter {
fn matches(&mut self, item: &MpsItem, _ctx: &mut MpsContext) -> Result<bool, RuntimeMsg> {
if item.len() != 0 {
if item.len() == 1 && item.field("filename").is_some() {
Ok(false) // ignore filename field, since that almost always exists
} else {
Ok(true)
}
} else {
Ok(false)
}
}
fn is_complete(&self) -> bool {
false
}
fn reset(&mut self) -> Result<(), RuntimeMsg> {
Ok(())
}
}
pub struct NonEmptyFilterFactory;
impl MpsFilterFactory<NonEmptyFilter> for NonEmptyFilterFactory {
fn is_filter(&self, tokens: &VecDeque<&MpsToken>) -> bool {
tokens.len() >= 2 && tokens[0].is_interrogation() && tokens[1].is_interrogation()
}
fn build_filter(
&self,
tokens: &mut VecDeque<MpsToken>,
_dict: &MpsLanguageDictionary,
) -> Result<NonEmptyFilter, SyntaxError> {
assert_token_raw(MpsToken::Interrogation, tokens)?;
assert_token_raw(MpsToken::Interrogation, tokens)?;
Ok(NonEmptyFilter)
}
}
pub type NonEmptyFilterStatementFactory = MpsFilterStatementFactory<NonEmptyFilter, NonEmptyFilterFactory>;
#[inline(always)]
pub fn nonempty_filter() -> NonEmptyFilterStatementFactory {
NonEmptyFilterStatementFactory::new(NonEmptyFilterFactory)
}

View file

@ -89,6 +89,10 @@
//!
//! Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept.
//!
//! ### ?? -- e.g. `iterable.(??);`
//!
//! Keep only the items that contain at least one field (not including the filename field).
//!
//! ## Functions
//! Similar to most other languages: `function_name(param1, param2, etc.);`.
//! These always return an iterable which can be manipulated with other syntax (filters, sorters, etc.).

View file

@ -70,6 +70,15 @@ impl Tags {
.map(|s| s.to_string())
}
#[inline]
pub fn albumartist_name(&self) -> Option<String> {
self.data
.get("ALBUMARTIST")
.unwrap_or(&TagType::Unknown)
.str()
.map(|s| s.to_string())
}
#[inline]
pub fn genre_title(&self) -> Option<String> {
self.data

View file

@ -163,35 +163,34 @@ impl FileIter {
match crate::music::MpsLibrary::read_media_tags(path) {
Ok(tags) => {
let mut item = MpsItem::new();
self.populate_item_impl_simple(&mut item, path_str, captures, capture_names);
if item.field("title").is_none() {
item.set_field("title", tags.track_title().into());
}
if item.field("artist").is_none() {
if let Some(artist) = tags.artist_name() {
item.set_field("artist", artist.into());
}
if let Some(albumartist) = tags.albumartist_name() {
item.set_field("albumartist", albumartist.clone().into());
if let Some(MpsTypePrimitive::String(artist)) = item.field("artist") {
if albumartist.trim() != artist.trim() {
let new_artist = format!("{},{}", artist, albumartist.as_str());
item.set_field("artist", new_artist.into());
}
} else {
item.set_field("artist", albumartist.into());
}
}
if item.field("album").is_none() {
if let Some(album) = tags.album_title() {
item.set_field("album", album.into());
}
}
if item.field("genre").is_none() {
if let Some(genre) = tags.genre_title() {
item.set_field("genre", genre.into());
}
}
if item.field("track").is_none() {
if let Some(track) = tags.track_number() {
item.set_field("track", track.into());
}
}
if item.field("year").is_none() {
if let Some(year) = tags.track_date() {
item.set_field("year", year.into());
}
}
self.populate_item_impl_simple(&mut item, path_str, captures, capture_names);
Some(item)
}
Err(_) => {
@ -227,7 +226,9 @@ impl FileIter {
if let Some(captures) = captures {
for name_maybe in capture_names {
if let Some(name) = name_maybe {
if let Some(value) = captures.name(name).map(|m| m.as_str().to_string()) {
if item.field(name).is_some() {
// do nothing
} else if let Some(value) = captures.name(name).map(|m| m.as_str().to_string()) {
item.set_field(name, MpsTypePrimitive::parse(value));
}
}

View file

@ -13,10 +13,10 @@ use bliss_audio_symphonia::{BlissError, Song};
const DEFAULT_PARALLELISM: usize = 2;
// maximum length of song cache (song objects take up a lot of memory)
const MAX_SONG_CACHE_SIZE: usize = 1000;
const MAX_SONG_CACHE_SIZE: usize = 10000;
// maximum length of distance cache (takes up significantly less memory than songs)
const MAX_DISTANCE_CACHE_SIZE: usize = MAX_SONG_CACHE_SIZE * 10;
const MAX_DISTANCE_CACHE_SIZE: usize = MAX_SONG_CACHE_SIZE * MAX_SONG_CACHE_SIZE;
use crate::lang::RuntimeMsg;
use crate::MpsItem;
@ -252,7 +252,7 @@ impl CacheThread {
self.distance_in_progress.remove(&key);
if self.distance_cache.len() > MAX_DISTANCE_CACHE_SIZE {
// avoid using too much memory
self.song_cache.clear();
self.distance_cache.clear();
}
self.distance_cache.insert(key, distance_result);
}
@ -560,6 +560,9 @@ fn worker_distance(
song: new_song2.clone(),
})
.unwrap_or(());
if new_song2.is_err() {
eprintln!("Song error on `{}`: {}", path2, new_song2.clone().err().unwrap());
}
new_song2?
};
Ok(song1.distance(&song2))

View file

@ -1,3 +1,5 @@
//! Integration tests for every syntax feature
use mps_interpreter::tokens::{MpsToken, MpsTokenizer, ParseError};
use mps_interpreter::*;
use std::collections::VecDeque;
@ -813,3 +815,17 @@ fn execute_emptiesop_line() -> Result<(), MpsError> {
)?;
execute_single_line("empties(0)", true, true)
}
#[test]
fn execute_nonemptyfilter_line() -> Result<(), MpsError> {
execute_single_line(
"files().(??)",
false,
true,
)?;
execute_single_line(
"empties(42).(??)",
true,
true,
)
}

View file

@ -11,7 +11,7 @@ use std::path::Path;
use m3u8_rs::{MediaPlaylist, MediaSegment};
use mps_interpreter::MpsRunner;
use mps_interpreter::{MpsFaye, MpsItem};
fn main() {
let args = cli::parse();
@ -29,16 +29,16 @@ fn main() {
println!("Executing: {}", &args.input);
let in_file = Cursor::new(&args.input);
let runner = MpsRunner::with_stream(in_file);
let runner = MpsFaye::with_stream(in_file);
for item in runner {
match item {
Ok(music) => {
if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str())
music_filename(&music)
{
playlist.segments.push(MediaSegment {
uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()),
title: music_title(&music),
..Default::default()
});
} else {
@ -52,16 +52,16 @@ fn main() {
let in_path = Path::new(&args.input);
let in_file = BufReader::new(File::open(in_path).expect("Invalid/missing input file"));
let runner = MpsRunner::with_stream(in_file);
let runner = MpsFaye::with_stream(in_file);
for item in runner {
match item {
Ok(music) => {
if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str())
music_filename(&music)
{
playlist.segments.push(MediaSegment {
uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()),
title: music_title(&music),
..Default::default()
});
} else {
@ -82,3 +82,22 @@ fn main() {
eprintln!("Playlist save error: {}", e);
}
}
fn music_title(item: &MpsItem) -> Option<String> {
item.field("title").and_then(|x| x.to_owned().to_str())
}
fn music_filename(item: &MpsItem) -> Option<String> {
if let Some(filename) = item.field("filename") {
if let Ok(cwd) = std::env::current_dir() {
let path: &Path = &cwd;
Some(filename.as_str().replace(path.to_str().unwrap_or(""), "./"))
} else {
Some(filename.to_string())
}
} else {
None
}
}

View file

@ -155,11 +155,12 @@ impl<'a, T: MpsTokenReader + 'a> MpsPlayer<'a, T> {
match item {
Ok(music) => {
if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str())
music_filename(&music)
{
println!("Adding file `{}` to playlist", filename);
playlist.segments.push(MediaSegment {
uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()),
title: music_title(&music),
..Default::default()
});
Ok(())
@ -198,6 +199,25 @@ impl<'a, T: MpsTokenReader + 'a> MpsPlayer<'a, T> {
}
}
#[inline]
fn music_title(item: &MpsItem) -> Option<String> {
item.field("title").and_then(|x| x.to_owned().to_str())
}
#[inline]
fn music_filename(item: &MpsItem) -> Option<String> {
if let Some(filename) = item.field("filename") {
if let Ok(cwd) = std::env::current_dir() {
let path: &std::path::Path = &cwd;
Some(filename.as_str().replace(path.to_str().unwrap_or(""), "./"))
} else {
Some(filename.to_string())
}
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -85,7 +85,10 @@ Operations to reduce the items in an iterable: iterable.(filter)
unique
unique field -- e.g. iterable.(unique title)
Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept.";
Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept.
??
Keep only the items that contain at least one field (not including the filename field).";
pub const SORTERS: &str =
"SORTERS (?sorters)