Improve tag processing and filtering with ?? filter

This commit is contained in:
NGnius (Graham) 2022-05-14 11:10:03 -04:00
parent c2f93faf69
commit 34487c02eb
11 changed files with 187 additions and 42 deletions

View file

@ -13,6 +13,7 @@ pub(crate) fn standard_vocab(vocabulary: &mut MpsLanguageDictionary) {
.add(crate::lang::vocabulary::filters::field_like_filter()) .add(crate::lang::vocabulary::filters::field_like_filter())
.add(crate::lang::vocabulary::filters::field_re_filter()) .add(crate::lang::vocabulary::filters::field_re_filter())
.add(crate::lang::vocabulary::filters::unique_field_filter()) .add(crate::lang::vocabulary::filters::unique_field_filter())
.add(crate::lang::vocabulary::filters::nonempty_filter())
// sorters // sorters
.add(crate::lang::vocabulary::sorters::empty_sort()) .add(crate::lang::vocabulary::sorters::empty_sort())
.add(crate::lang::vocabulary::sorters::shuffle_sort()) // accepts ~(shuffle) .add(crate::lang::vocabulary::sorters::shuffle_sort()) // accepts ~(shuffle)

View file

@ -4,6 +4,7 @@ mod field_filter_maybe;
mod field_like_filter; mod field_like_filter;
mod field_match_filter; mod field_match_filter;
mod index_filter; mod index_filter;
mod nonempty_filter;
mod range_filter; mod range_filter;
mod unique; mod unique;
pub(crate) mod utility; pub(crate) mod utility;
@ -27,6 +28,9 @@ pub use field_match_filter::{
pub use index_filter::{ pub use index_filter::{
index_filter, IndexFilter, IndexFilterFactory, IndexFilterStatementFactory, index_filter, IndexFilter, IndexFilterFactory, IndexFilterStatementFactory,
}; };
pub use nonempty_filter::{
nonempty_filter, NonEmptyFilter, NonEmptyFilterFactory, NonEmptyFilterStatementFactory,
};
pub use range_filter::{ pub use range_filter::{
range_filter, RangeFilter, RangeFilterFactory, RangeFilterStatementFactory, range_filter, RangeFilter, RangeFilterFactory, RangeFilterStatementFactory,
}; };

View file

@ -0,0 +1,65 @@
use std::collections::VecDeque;
use std::fmt::{Debug, Display, Error, Formatter};
use crate::lang::MpsLanguageDictionary;
use crate::lang::{MpsFilterFactory, MpsFilterPredicate, MpsFilterStatementFactory};
use crate::lang::{RuntimeMsg, SyntaxError, utility::assert_token_raw};
use crate::tokens::MpsToken;
use crate::MpsContext;
use crate::MpsItem;
#[derive(Debug, Clone)]
pub struct NonEmptyFilter;
impl Display for NonEmptyFilter {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "[empty]")
}
}
impl MpsFilterPredicate for NonEmptyFilter {
fn matches(&mut self, item: &MpsItem, _ctx: &mut MpsContext) -> Result<bool, RuntimeMsg> {
if item.len() != 0 {
if item.len() == 1 && item.field("filename").is_some() {
Ok(false) // ignore filename field, since that almost always exists
} else {
Ok(true)
}
} else {
Ok(false)
}
}
fn is_complete(&self) -> bool {
false
}
fn reset(&mut self) -> Result<(), RuntimeMsg> {
Ok(())
}
}
pub struct NonEmptyFilterFactory;
impl MpsFilterFactory<NonEmptyFilter> for NonEmptyFilterFactory {
fn is_filter(&self, tokens: &VecDeque<&MpsToken>) -> bool {
tokens.len() >= 2 && tokens[0].is_interrogation() && tokens[1].is_interrogation()
}
fn build_filter(
&self,
tokens: &mut VecDeque<MpsToken>,
_dict: &MpsLanguageDictionary,
) -> Result<NonEmptyFilter, SyntaxError> {
assert_token_raw(MpsToken::Interrogation, tokens)?;
assert_token_raw(MpsToken::Interrogation, tokens)?;
Ok(NonEmptyFilter)
}
}
pub type NonEmptyFilterStatementFactory = MpsFilterStatementFactory<NonEmptyFilter, NonEmptyFilterFactory>;
#[inline(always)]
pub fn nonempty_filter() -> NonEmptyFilterStatementFactory {
NonEmptyFilterStatementFactory::new(NonEmptyFilterFactory)
}

View file

@ -89,6 +89,10 @@
//! //!
//! Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept. //! Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept.
//! //!
//! ### ?? -- e.g. `iterable.(??);`
//!
//! Keep only the items that contain at least one field (not including the filename field).
//!
//! ## Functions //! ## Functions
//! Similar to most other languages: `function_name(param1, param2, etc.);`. //! Similar to most other languages: `function_name(param1, param2, etc.);`.
//! These always return an iterable which can be manipulated with other syntax (filters, sorters, etc.). //! These always return an iterable which can be manipulated with other syntax (filters, sorters, etc.).

View file

@ -70,6 +70,15 @@ impl Tags {
.map(|s| s.to_string()) .map(|s| s.to_string())
} }
#[inline]
pub fn albumartist_name(&self) -> Option<String> {
self.data
.get("ALBUMARTIST")
.unwrap_or(&TagType::Unknown)
.str()
.map(|s| s.to_string())
}
#[inline] #[inline]
pub fn genre_title(&self) -> Option<String> { pub fn genre_title(&self) -> Option<String> {
self.data self.data

View file

@ -163,35 +163,34 @@ impl FileIter {
match crate::music::MpsLibrary::read_media_tags(path) { match crate::music::MpsLibrary::read_media_tags(path) {
Ok(tags) => { Ok(tags) => {
let mut item = MpsItem::new(); let mut item = MpsItem::new();
item.set_field("title", tags.track_title().into());
if let Some(artist) = tags.artist_name() {
item.set_field("artist", artist.into());
}
if let Some(albumartist) = tags.albumartist_name() {
item.set_field("albumartist", albumartist.clone().into());
if let Some(MpsTypePrimitive::String(artist)) = item.field("artist") {
if albumartist.trim() != artist.trim() {
let new_artist = format!("{},{}", artist, albumartist.as_str());
item.set_field("artist", new_artist.into());
}
} else {
item.set_field("artist", albumartist.into());
}
}
if let Some(album) = tags.album_title() {
item.set_field("album", album.into());
}
if let Some(genre) = tags.genre_title() {
item.set_field("genre", genre.into());
}
if let Some(track) = tags.track_number() {
item.set_field("track", track.into());
}
if let Some(year) = tags.track_date() {
item.set_field("year", year.into());
}
self.populate_item_impl_simple(&mut item, path_str, captures, capture_names); self.populate_item_impl_simple(&mut item, path_str, captures, capture_names);
if item.field("title").is_none() {
item.set_field("title", tags.track_title().into());
}
if item.field("artist").is_none() {
if let Some(artist) = tags.artist_name() {
item.set_field("artist", artist.into());
}
}
if item.field("album").is_none() {
if let Some(album) = tags.album_title() {
item.set_field("album", album.into());
}
}
if item.field("genre").is_none() {
if let Some(genre) = tags.genre_title() {
item.set_field("genre", genre.into());
}
}
if item.field("track").is_none() {
if let Some(track) = tags.track_number() {
item.set_field("track", track.into());
}
}
if item.field("year").is_none() {
if let Some(year) = tags.track_date() {
item.set_field("year", year.into());
}
}
Some(item) Some(item)
} }
Err(_) => { Err(_) => {
@ -227,7 +226,9 @@ impl FileIter {
if let Some(captures) = captures { if let Some(captures) = captures {
for name_maybe in capture_names { for name_maybe in capture_names {
if let Some(name) = name_maybe { if let Some(name) = name_maybe {
if let Some(value) = captures.name(name).map(|m| m.as_str().to_string()) { if item.field(name).is_some() {
// do nothing
} else if let Some(value) = captures.name(name).map(|m| m.as_str().to_string()) {
item.set_field(name, MpsTypePrimitive::parse(value)); item.set_field(name, MpsTypePrimitive::parse(value));
} }
} }

View file

@ -13,10 +13,10 @@ use bliss_audio_symphonia::{BlissError, Song};
const DEFAULT_PARALLELISM: usize = 2; const DEFAULT_PARALLELISM: usize = 2;
// maximum length of song cache (song objects take up a lot of memory) // maximum length of song cache (song objects take up a lot of memory)
const MAX_SONG_CACHE_SIZE: usize = 1000; const MAX_SONG_CACHE_SIZE: usize = 10000;
// maximum length of distance cache (takes up significantly less memory than songs) // maximum length of distance cache (takes up significantly less memory than songs)
const MAX_DISTANCE_CACHE_SIZE: usize = MAX_SONG_CACHE_SIZE * 10; const MAX_DISTANCE_CACHE_SIZE: usize = MAX_SONG_CACHE_SIZE * MAX_SONG_CACHE_SIZE;
use crate::lang::RuntimeMsg; use crate::lang::RuntimeMsg;
use crate::MpsItem; use crate::MpsItem;
@ -252,7 +252,7 @@ impl CacheThread {
self.distance_in_progress.remove(&key); self.distance_in_progress.remove(&key);
if self.distance_cache.len() > MAX_DISTANCE_CACHE_SIZE { if self.distance_cache.len() > MAX_DISTANCE_CACHE_SIZE {
// avoid using too much memory // avoid using too much memory
self.song_cache.clear(); self.distance_cache.clear();
} }
self.distance_cache.insert(key, distance_result); self.distance_cache.insert(key, distance_result);
} }
@ -560,6 +560,9 @@ fn worker_distance(
song: new_song2.clone(), song: new_song2.clone(),
}) })
.unwrap_or(()); .unwrap_or(());
if new_song2.is_err() {
eprintln!("Song error on `{}`: {}", path2, new_song2.clone().err().unwrap());
}
new_song2? new_song2?
}; };
Ok(song1.distance(&song2)) Ok(song1.distance(&song2))

View file

@ -1,3 +1,5 @@
//! Integration tests for every syntax feature
use mps_interpreter::tokens::{MpsToken, MpsTokenizer, ParseError}; use mps_interpreter::tokens::{MpsToken, MpsTokenizer, ParseError};
use mps_interpreter::*; use mps_interpreter::*;
use std::collections::VecDeque; use std::collections::VecDeque;
@ -813,3 +815,17 @@ fn execute_emptiesop_line() -> Result<(), MpsError> {
)?; )?;
execute_single_line("empties(0)", true, true) execute_single_line("empties(0)", true, true)
} }
#[test]
fn execute_nonemptyfilter_line() -> Result<(), MpsError> {
execute_single_line(
"files().(??)",
false,
true,
)?;
execute_single_line(
"empties(42).(??)",
true,
true,
)
}

View file

@ -11,7 +11,7 @@ use std::path::Path;
use m3u8_rs::{MediaPlaylist, MediaSegment}; use m3u8_rs::{MediaPlaylist, MediaSegment};
use mps_interpreter::MpsRunner; use mps_interpreter::{MpsFaye, MpsItem};
fn main() { fn main() {
let args = cli::parse(); let args = cli::parse();
@ -29,16 +29,16 @@ fn main() {
println!("Executing: {}", &args.input); println!("Executing: {}", &args.input);
let in_file = Cursor::new(&args.input); let in_file = Cursor::new(&args.input);
let runner = MpsRunner::with_stream(in_file); let runner = MpsFaye::with_stream(in_file);
for item in runner { for item in runner {
match item { match item {
Ok(music) => { Ok(music) => {
if let Some(filename) = if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str()) music_filename(&music)
{ {
playlist.segments.push(MediaSegment { playlist.segments.push(MediaSegment {
uri: filename, uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()), title: music_title(&music),
..Default::default() ..Default::default()
}); });
} else { } else {
@ -52,16 +52,16 @@ fn main() {
let in_path = Path::new(&args.input); let in_path = Path::new(&args.input);
let in_file = BufReader::new(File::open(in_path).expect("Invalid/missing input file")); let in_file = BufReader::new(File::open(in_path).expect("Invalid/missing input file"));
let runner = MpsRunner::with_stream(in_file); let runner = MpsFaye::with_stream(in_file);
for item in runner { for item in runner {
match item { match item {
Ok(music) => { Ok(music) => {
if let Some(filename) = if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str()) music_filename(&music)
{ {
playlist.segments.push(MediaSegment { playlist.segments.push(MediaSegment {
uri: filename, uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()), title: music_title(&music),
..Default::default() ..Default::default()
}); });
} else { } else {
@ -82,3 +82,22 @@ fn main() {
eprintln!("Playlist save error: {}", e); eprintln!("Playlist save error: {}", e);
} }
} }
fn music_title(item: &MpsItem) -> Option<String> {
item.field("title").and_then(|x| x.to_owned().to_str())
}
fn music_filename(item: &MpsItem) -> Option<String> {
if let Some(filename) = item.field("filename") {
if let Ok(cwd) = std::env::current_dir() {
let path: &Path = &cwd;
Some(filename.as_str().replace(path.to_str().unwrap_or(""), "./"))
} else {
Some(filename.to_string())
}
} else {
None
}
}

View file

@ -155,11 +155,12 @@ impl<'a, T: MpsTokenReader + 'a> MpsPlayer<'a, T> {
match item { match item {
Ok(music) => { Ok(music) => {
if let Some(filename) = if let Some(filename) =
music.field("filename").and_then(|x| x.to_owned().to_str()) music_filename(&music)
{ {
println!("Adding file `{}` to playlist", filename);
playlist.segments.push(MediaSegment { playlist.segments.push(MediaSegment {
uri: filename, uri: filename,
title: music.field("title").and_then(|x| x.to_owned().to_str()), title: music_title(&music),
..Default::default() ..Default::default()
}); });
Ok(()) Ok(())
@ -198,6 +199,25 @@ impl<'a, T: MpsTokenReader + 'a> MpsPlayer<'a, T> {
} }
} }
#[inline]
fn music_title(item: &MpsItem) -> Option<String> {
item.field("title").and_then(|x| x.to_owned().to_str())
}
#[inline]
fn music_filename(item: &MpsItem) -> Option<String> {
if let Some(filename) = item.field("filename") {
if let Ok(cwd) = std::env::current_dir() {
let path: &std::path::Path = &cwd;
Some(filename.as_str().replace(path.to_str().unwrap_or(""), "./"))
} else {
Some(filename.to_string())
}
} else {
None
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -85,7 +85,10 @@ Operations to reduce the items in an iterable: iterable.(filter)
unique unique
unique field -- e.g. iterable.(unique title) unique field -- e.g. iterable.(unique title)
Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept."; Keep only items which are do not duplicate another item, or keep only items whoes specified field does not duplicate another item's same field. The first non-duplicated instance of an item is always the one that is kept.
??
Keep only the items that contain at least one field (not including the filename field).";
pub const SORTERS: &str = pub const SORTERS: &str =
"SORTERS (?sorters) "SORTERS (?sorters)