Create initial language parser

This commit is contained in:
NGnius (Graham) 2024-05-30 20:03:56 -04:00
commit 453c48b686
17 changed files with 2062 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

129
Cargo.lock generated Normal file
View file

@ -0,0 +1,129 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "logos"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
dependencies = [
"beef",
"fnv",
"lazy_static",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
dependencies = [
"logos-codegen",
]
[[package]]
name = "muss2"
version = "0.1.0"
[[package]]
name = "muss2-lang"
version = "0.1.0"
dependencies = [
"logos",
"pretty_assertions",
]
[[package]]
name = "pretty_assertions"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
[[package]]
name = "syn"
version = "2.0.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

13
Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "muss2"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
[workspace]
members = [
"crates/lang"
]

12
crates/lang/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "muss2-lang"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
logos = { version = "0.14" }
[dev-dependencies]
pretty_assertions = "1.3.0"

View file

@ -0,0 +1,16 @@
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy,)]
pub enum LexError {
#[default]
UnrecognizedToken,
}
impl core::fmt::Display for LexError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
//use core::fmt::Write;
match self {
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
}
}
}
impl std::error::Error for LexError {}

View file

@ -0,0 +1,68 @@
mod errors;
pub use errors::LexError;
mod tokens;
pub use tokens::{Token, TokenInfo};
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> = . n_u_ :: is_a_ :: VaR1AbLe ( ) { } : ; -12345 12345.6789 \"char[]\" /* long\ncomment */ // short comment \n \n <>";
#[test]
fn parse_everything() {
let expected = vec![
Token::Union(TokenInfo { line: 0, column: 0..1, index: 0..1 }),
Token::Intersection(TokenInfo { line: 0, column: 2..3, index: 2..3 }),
Token::Plus(TokenInfo { line: 0, column: 4..5, index: 4..5 }),
Token::Minus(TokenInfo { line: 0, column: 6..7, index: 6..7 }),
Token::Multiply(TokenInfo { line: 0, column: 8..9, index: 8..9 }),
Token::Divide(TokenInfo { line: 0, column: 10..11, index: 10..11 }),
Token::And(TokenInfo { line: 0, column: 12..14, index: 12..14 }),
Token::Or(TokenInfo { line: 0, column: 15..17, index: 15..17 }),
Token::Map(TokenInfo { line: 0, column: 18..20, index: 18..20 }),
Token::Filter(TokenInfo { line: 0, column: 21..23, index: 21..23 }),
Token::Sort(TokenInfo { line: 0, column: 24..26, index: 24..26 }),
Token::Equal(TokenInfo { line: 0, column: 27..28, index: 27..28 }),
Token::Dot(TokenInfo { line: 0, column: 29..30, index: 29..30 }),
Token::Variable(("n_u_".into(), TokenInfo { line: 0, column: 31..35, index: 31..35 })),
Token::PathSeparator(TokenInfo { line: 0, column: 36..38, index: 36..38 }),
Token::Variable(("is_a_".into(), TokenInfo { line: 0, column: 39..44, index: 39..44 })),
Token::PathSeparator(TokenInfo { line: 0, column: 45..47, index: 45..47 }),
Token::Variable(("VaR1AbLe".into(), TokenInfo { line: 0, column: 48..56, index: 48..56 })),
Token::OpenRoundBracket(TokenInfo { line: 0, column: 57..58, index: 57..58 }),
Token::CloseRoundBracket(TokenInfo { line: 0, column: 59..60, index: 59..60 }),
Token::OpenCurlyBracket(TokenInfo { line: 0, column: 61..62, index: 61..62 }),
Token::CloseCurlyBracket(TokenInfo { line: 0, column: 63..64, index: 63..64 }),
Token::Colon(TokenInfo { line: 0, column: 65..66, index: 65..66 }),
Token::Semicolon(TokenInfo { line: 0, column: 67..68, index: 67..68 }),
Token::Integer((-12345, TokenInfo { line: 0, column: 69..75, index: 69..75 })),
Token::Float((12345.6789, TokenInfo { line: 0, column: 76..86, index: 76..86 })),
Token::String(("char[]".into(), TokenInfo { line: 0, column: 87..95, index: 87..95 })),
Token::LongComment((" long\ncomment ".into(), TokenInfo { line: 0, column: 96..114, index: 96..114 })),
Token::ShortComment((" short comment ".into(), TokenInfo { line: 1, column: 11..29, index: 115..133 })),
Token::Newline(TokenInfo { line: 2, column: 1..2, index: 134..135 }),
Token::Generate(TokenInfo { line: 3, column: 1..3, index: 136..138 }),
];
let mut actual = Vec::new();
for (index, token_result) in Token::tokenify(ALL_TOKENS_STR).enumerate() {
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
actual.push(token_result.unwrap());
}
assert_eq!(actual, expected)
}
#[test]
fn parse_reversability() {
let expected = format!("{} ", ALL_TOKENS_STR);
let actual = Token::stringify(Token::tokenify(&expected).map(|token_result| token_result.unwrap()));
assert_eq!(actual, expected)
}
}

View file

@ -0,0 +1,240 @@
use logos::Logos;
#[derive(Default, PartialEq, Clone)]
pub struct ExtraState {
line: usize,
line_start: usize,
start: usize,
end: usize,
}
impl ExtraState {
fn lexer_sync(lex: &mut logos::Lexer<Token>) {
let span = lex.span();
lex.extras.start = span.start;
lex.extras.end = span.end;
}
fn newline(lex: &mut logos::Lexer<Token>) -> TokenInfo {
Self::lexer_sync(lex);
let info = lex.extras.token_info();
lex.extras.line += 1;
lex.extras.line_start = lex.span().end;
info
}
fn token_info(&self) -> TokenInfo {
TokenInfo {
line: self.line,
column: (self.start - self.line_start)..(self.end - self.line_start),
index: self.start..self.end,
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct TokenInfo {
pub line: usize,
pub column: core::ops::Range<usize>,
pub index: core::ops::Range<usize>,
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
#[logos(error = super::LexError)]
#[logos(extras = ExtraState)]
pub enum Token {
// Operands
// Set operations
#[token("u", priority = 99, callback = all_cb)]
Union(TokenInfo),
#[token("n", priority = 99, callback = all_cb)]
Intersection(TokenInfo),
// Arithmetic operations (also applicable to sets)
#[token("+", callback = all_cb)]
Plus(TokenInfo),
#[token("-", callback = all_cb)]
Minus(TokenInfo),
#[token("*", callback = all_cb)]
Multiply(TokenInfo),
#[token("/", callback = all_cb)]
Divide(TokenInfo),
// Logical operations
#[token("&&", callback = all_cb)]
And(TokenInfo),
#[token("||", callback = all_cb)]
Or(TokenInfo),
// Functional
#[token("=>", callback = all_cb)]
Map(TokenInfo),
#[token("x>", callback = all_cb)]
Filter(TokenInfo),
#[token("~>", callback = all_cb)]
Sort(TokenInfo),
#[token("<>", callback = all_cb)]
Generate(TokenInfo),
// Declarations
// Basics
#[token("=", callback = all_cb)]
Equal(TokenInfo),
#[token("::", callback = all_cb)]
PathSeparator(TokenInfo),
#[token(".", callback = all_cb)]
Dot(TokenInfo),
#[regex("[a-zA-Z_][a-zA-Z_0-9]*", priority = 1, callback = variable_cb)]
Variable((String, TokenInfo)),
#[token("(", callback = all_cb)]
OpenRoundBracket(TokenInfo),
#[token(")", callback = all_cb)]
CloseRoundBracket(TokenInfo),
#[token("{", callback = all_cb)]
OpenCurlyBracket(TokenInfo),
#[token("}", callback = all_cb)]
CloseCurlyBracket(TokenInfo),
#[token(":", callback = all_cb)]
Colon(TokenInfo),
#[token(";", callback = all_cb)]
Semicolon(TokenInfo),
// Literals
#[regex("-?[1-9][0-9]*", priority = 1, callback = integer_cb)]
Integer((i64, TokenInfo)),
#[regex("-?[1-9][0-9]*\\.[0-9]+", priority = 99, callback = float_cb)]
Float((f64, TokenInfo)),
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, priority = 1, callback = string_cb)]
String((String, TokenInfo)),
/// Comments
#[regex(r#"\/\*([^\*]+(\*[^\/])?)*\*\/"#, priority = 1, callback = multiline_comment_cb)]
LongComment((String, TokenInfo)),
#[regex("\\/\\/[^\n]*\n", priority = 1, callback = oneline_comment_cb)]
ShortComment((String, TokenInfo)),
/// Ignore
#[regex(r"\n", newline_cb)]
Newline(TokenInfo),
}
fn all_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
ExtraState::lexer_sync(lex);
lex.extras.token_info()
}
fn variable_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
(slice.to_owned(), all_cb(lex))
}
fn integer_cb(lex: &mut logos::Lexer<Token>) -> (i64, TokenInfo) {
let slice = lex.slice();
(slice.parse().unwrap(), all_cb(lex))
}
fn float_cb(lex: &mut logos::Lexer<Token>) -> (f64, TokenInfo) {
let slice = lex.slice();
(slice.parse().unwrap(), all_cb(lex))
}
fn string_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
// TODO handle escaped chars
(slice[1..slice.len()-1].to_owned(), all_cb(lex))
}
fn multiline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
let info = all_cb(lex);
for (i, c) in slice.chars().enumerate() {
if c == '\n' {
lex.extras.line += 1;
lex.extras.line_start = lex.span().start + i + 1;
}
}
(slice[2..slice.len()-2].to_owned(), info)
}
fn oneline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
let info = all_cb(lex);
lex.extras.line += 1;
lex.extras.line_start = lex.span().end;
(slice[2..slice.len()-1].to_owned(), info)
}
fn newline_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
ExtraState::newline(lex)
}
impl Token {
pub fn tokenify<'a>(s: &'a str) -> logos::Lexer<'a, Self> {
Token::lexer(s)
}
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn as_str(&self) -> String {
let mut s = String::new();
self.write_str(&mut s).unwrap();
s
}
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
use core::fmt::Write;
match self {
Self::Union(_) => write!(result, "u"),
Self::Intersection(_) => write!(result, "n"),
Self::Plus(_) => write!(result, "+"),
Self::Minus(_) => write!(result, "-"),
Self::Multiply(_) => write!(result, "*"),
Self::Divide(_) => write!(result, "/"),
Self::And(_) => write!(result, "&&"),
Self::Or(_) => write!(result, "||"),
Self::Map(_) => write!(result, "=>"),
Self::Filter(_) => write!(result, "x>"),
Self::Sort(_) => write!(result, "~>"),
Self::Generate(_) => write!(result, "<>"),
Self::Equal(_) => write!(result, "="),
Self::PathSeparator(_) => write!(result, "::"),
Self::Dot(_) => write!(result, "."),
Self::Variable((name, _)) => write!(result, "{}", name),
Self::OpenRoundBracket(_) => write!(result, "("),
Self::CloseRoundBracket(_) => write!(result, ")"),
Self::OpenCurlyBracket(_) => write!(result, "{{"),
Self::CloseCurlyBracket(_) => write!(result, "}}"),
Self::Colon(_) => write!(result, ":"),
Self::Semicolon(_) => write!(result, ";"),
Self::Integer((int, _)) => write!(result, "{}", int),
Self::Float((float, _)) => write!(result, "{}", float),
Self::String((s, _)) => write!(result, "\"{}\"", s),
Self::LongComment((c, _)) => write!(result, "/*{}*/", c),
Self::ShortComment((c, _)) => write!(result, "//{}\n", c),
Self::Newline(_) => write!(result, "\n"),
}
}
pub fn is_ignore(&self) -> bool {
matches!(self, Self::Newline(_))
}
}

7
crates/lang/src/lib.rs Normal file
View file

@ -0,0 +1,7 @@
//! Language specification
//!
//! Parsing order: lexer -> syntax -> statement
pub mod lexer;
pub mod statement;
pub mod syntax;

View file

@ -0,0 +1,32 @@
#[derive(Debug, PartialEq, Clone)]
pub enum LanguageError {
InvalidSequence(Vec<crate::lexer::Token>),
InvalidSyntax(Vec<crate::syntax::SyntaxToken>),
UnexpectedEnd(Vec<crate::syntax::SyntaxToken>),
UnexpectedToken(crate::syntax::SyntaxToken),
UnrecognizedToken,
}
impl core::fmt::Display for LanguageError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
//use core::fmt::Write;
match self {
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
Self::InvalidSyntax(seq) => write!(f, "Invalid syntax {}", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
Self::UnexpectedEnd(seq) => write!(f, "Unexpected end of file {} <EOF>", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
Self::UnexpectedToken(token) => write!(f, "Unexpected token {}", token.as_str()),
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
}
}
}
impl std::error::Error for LanguageError {}
impl From<crate::syntax::SyntaxError> for LanguageError {
fn from(value: crate::syntax::SyntaxError) -> Self {
match value {
crate::syntax::SyntaxError::UnrecognizedToken => Self::UnrecognizedToken,
crate::syntax::SyntaxError::InvalidSequence(seq) => Self::InvalidSequence(seq),
}
}
}

View file

@ -0,0 +1,133 @@
//! High-level language
mod errors;
pub use errors::LanguageError;
mod parser;
pub use parser::LanguageParser;
mod tree;
pub use tree::{Statement, Notification, Param, Declare, DeclareFun, DeclareType, Module, Op, DeclareAssignVar, DeclareVar, AssignVar, Dyadic, CallFun};
// my_namespace { <>generate_fn()=>map_fn()x>filter_fn(_)~>sort_fn(_) }
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
fn assert_no_errors(iter: impl Iterator<Item=Result<Statement, LanguageError>>) -> Vec<Statement> {
let mut statements = Vec::new();
for (i, res) in iter.enumerate() {
match res {
Ok(statement) => if !statement.is_ignore() { statements.push(statement); },
Err(e) => {
let e_display = e.to_string();
match e {
LanguageError::InvalidSequence(_seq) => {},
LanguageError::InvalidSyntax(seq) => {
let bad_syntax = seq.last().expect("Empty invalid syntax token sequence");
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnexpectedEnd(seq) => {
let bad_syntax = seq.last().expect("Empty unexpected end token sequence");
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnexpectedToken(bad_syntax) => {
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnrecognizedToken => {
eprintln!("Unrecognized token #{} ?!?!", i);
}
}
panic!("{} for token #{}", e_display, i);
}
}
}
statements
}
#[test]
fn parse_minimum_module() {
let parser = LanguageParser::lex("my_module {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Module(Module {
name: crate::syntax::Path(vec![
"my_module".into(),
]),
inner: Vec::new(),
}),
], parsed);
}
#[test]
fn parse_minimum_function_declaration() {
let parser = LanguageParser::lex("my_generator () <> {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Declare(Declare::Function(DeclareFun {
name: crate::syntax::Path(vec![
"my_generator".into(),
]),
params: Vec::new(),
type_: crate::syntax::Functional::Generate,
ops: Vec::new(),
}))
], parsed);
}
#[test]
fn parse_minimum_type_declaration() {
let parser = LanguageParser::lex("my_type = {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Declare(Declare::Type(DeclareType {
name: crate::syntax::Path(vec![
"my_type".into(),
]),
params: Vec::new(),
}))
], parsed);
}
#[test]
fn parse_minimum_entrypoint() {
let parser = LanguageParser::lex("<> my_generator ()");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Entrypoint(CallFun {
type_: crate::syntax::Functional::Generate,
var: crate::syntax::Path(vec![
"my_generator".into(),
]),
params: Vec::new(),
})
], parsed);
}
}

View file

@ -0,0 +1,769 @@
pub struct LanguageParser<'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> {
_idc: core::marker::PhantomData<&'a ()>,
iter: I,
lookahead: Option<crate::syntax::SyntaxToken>,
incomplete_modules: Vec<super::Module>,
}
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> LanguageParser<'a, I> {
pub fn new(tokens_in: I) -> Self {
Self {
_idc: Default::default(),
iter: tokens_in,
lookahead: None,
incomplete_modules: Vec::new(),
}
}
fn parse_incomplete_module(&mut self, name: crate::syntax::Path) -> Result<super::Module, super::LanguageError> {
Ok(super::Module { name, inner: Vec::new() })
}
fn parse_function_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareFun, super::LanguageError> {
// `Name (` (first 2 tokens) are already consumed
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
// function params
let mut params = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
self.lookahead = Some(lookahead);
params.push(self.parse_param()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
// function type
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let fn_type = if let crate::syntax::Token::Functional(fn_type) = token1.token {
fn_type
} else {
return Err(super::LanguageError::InvalidSyntax(vec![token1]));
};
// operations
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
if !matches!(token2.token, crate::syntax::Token::OpenCurlyBracket) {
return Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Functional(fn_type),
info: token1.info,
},
token2
]));
}
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let mut ops = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
self.lookahead = Some(lookahead);
ops.push(self.parse_op()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
Ok(super::DeclareFun {
name,
params,
type_: fn_type,
ops,
})
}
fn parse_type_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareType, super::LanguageError> {
// `Name =` (first 2 tokens) are already consumed
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
match token0.token {
crate::syntax::Token::OpenCurlyBracket => {
let mut lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let mut params = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
self.lookahead = Some(lookahead);
params.push(self.parse_param()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
Ok(super::DeclareType {
name,
params,
})
}
t => Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: t,
info: token0.info,
}
]))
}
}
fn parse_function_call(&mut self, fun: crate::syntax::Functional) -> Result<super::CallFun, super::LanguageError> {
// `Func` (first token) is already consumed
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let name = if let crate::syntax::Token::Path(name) = token0.token {
name
} else {
return Err(super::LanguageError::UnexpectedToken(token0));
};
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
if !matches!(token1.token, crate::syntax::Token::OpenRoundBracket) {
return Err(super::LanguageError::UnexpectedToken(token1));
}
let op_params = match self.parse_op_params() {
Ok(ops) => ops,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token1.info,
},
], e));
}
};
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if !matches!(token_last.token, crate::syntax::Token::CloseRoundBracket) {
return Err(super::LanguageError::UnexpectedToken(token1));
}
Ok(super::CallFun {
type_: fun,
var: name,
params: op_params,
})
}
fn parse_op(&mut self) -> Result<super::Op, super::LanguageError> {
let op = self.parse_inner_op(0)?;
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Semicolon = token_last.token {
Ok(op)
} else {
Err(super::LanguageError::UnexpectedToken(token_last))
}
}
fn extend_err_tokens(mut tokens: Vec<crate::syntax::SyntaxToken>, err: super::LanguageError) -> super::LanguageError {
match err {
super::LanguageError::InvalidSyntax(mut seq) => {
tokens.append(&mut seq);
super::LanguageError::InvalidSyntax(tokens)
},
super::LanguageError::UnexpectedEnd(mut seq) => {
tokens.append(&mut seq);
super::LanguageError::UnexpectedEnd(tokens)
}
e => e
}
}
fn parse_inner_op(&mut self, recursion_level: usize) -> Result<super::Op, super::LanguageError> {
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let op0 = match token0.token {
crate::syntax::Token::Path(var_name) => {
// variable-oriented operations
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
}
])),
};
match token1.token {
crate::syntax::Token::Colon => {
// Declare-assign or declare
let mut lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
])),
};
let type_name = if let crate::syntax::Token::Path(type_name) = lookahead.token {
let type_token_info = lookahead.info;
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(type_name),
info: type_token_info,
},
])),
};
Some((type_name, type_token_info))
} else {
None
};
if let crate::syntax::Token::Equal = lookahead.token {
// Declare-Assign
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
// roughly equivalent to self.parse_inner_op(...).map_err(|e| { ... })
// (the closure captures variables which the compiler can't prove aren't used in this fn after)
let tokens = if let Some((type_name, type_token_info)) = type_name {
vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(type_name),
info: type_token_info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: lookahead.info,
},
]
} else {
vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: lookahead.info,
},
]
};
return Err(Self::extend_err_tokens(tokens, e));
}
};
super::Op::DeclareAssign(super::tree::DeclareAssignVar {
var: var_name,
type_: type_name.map(|x| x.0),
op: Box::new(inner_op),
})
} else {
// declare
self.lookahead = Some(lookahead);
super::Op::Declare(super::tree::DeclareVar {
var: var_name,
type_: type_name.map(|x| x.0),
})
}
},
crate::syntax::Token::Equal => {
// Assign
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
}
])),
};
if let crate::syntax::Token::Field(f) = token2.token {
let inner_op = self.parse_inner_op(recursion_level + 1)?;
super::Op::Assign(crate::statement::tree::AssignVar {
var: var_name,
field: Some(f),
op: Box::new(inner_op),
})
} else {
self.lookahead = Some(token2);
let inner_op = self.parse_inner_op(recursion_level + 1)?;
super::Op::Assign(crate::statement::tree::AssignVar {
var: var_name,
field: None,
op: Box::new(inner_op),
})
}
},
/*crate::syntax::Token::OpenRoundBracket => {
// Call
let op_params = match self.parse_op_params() {
Ok(ops) => ops,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token1.info,
},
], e));
}
};
self.lookahead.take().unwrap(); // always a closing round bracket; no need to verify
let token_filter = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
},
// TODO include tokens from op_params
])),
};
if let crate::syntax::Token::Functional(crate::syntax::Functional::Filter) = token_filter.token {
let filter_op = match self.parse_inner_op(recursion_level + 1) {
Ok(x) => x,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
},
// TODO include tokens from op_params
], e))
}
};
super::Op::Call(super::CallVar {
var: var_name,
params: op_params,
})
} else {
return Err(super::LanguageError::UnexpectedToken(token_filter));
}
}*/
t => {
// Retrieve
self.lookahead = Some(crate::syntax::SyntaxToken {
token: t,
info: token1.info,
});
super::Op::Retrieve(var_name)
}
}
},
crate::syntax::Token::Functional(fun) => {
// Call
match self.parse_function_call(fun.clone()) {
Ok(x) => super::Op::Call(x),
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Functional(fun),
info: token0.info,
}
], e));
}
}
}
crate::syntax::Token::Operation(unary_op) => {
// Unary operation
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Operation(unary_op),
info: token0.info,
}
], e));
}
};
super::Op::Unary(super::tree::Unary {
first: Box::new(inner_op),
op: unary_op,
})
},
crate::syntax::Token::Literal(literal) => {
super::Op::Literal(literal)
},
crate::syntax::Token::OpenRoundBracket => {
// Operation surrounded by brackets
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token0.info,
}
], e));
}
};
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
// TODO include all tokens from inner_op
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::CloseRoundBracket = token_last.token {
super::Op::Bracketed(Box::new(inner_op))
} else {
// TODO maybe? include all tokens from inner_op
return Err(super::LanguageError::UnexpectedToken(token_last));
}
}
t => {
return Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: t,
info: token0.info,
}
]));
}
};
// check if operation continues (i.e. is dyadic)
let lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Operation(dyadic_op) = lookahead.token {
let op1 = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
// TODO add tokens of op0 too
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Operation(dyadic_op),
info: token0.info,
}
], e));
}
};
Ok(super::Op::Dyadic(super::Dyadic {
first: Box::new(op0),
op: dyadic_op,
second: Box::new(op1),
}))
} else {
self.lookahead = Some(lookahead);
Ok(op0)
}
}
fn parse_op_params(&mut self) -> Result<Vec<super::Op>, super::LanguageError> {
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let mut ops = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
self.lookahead = Some(lookahead);
ops.push(self.parse_op()?);
lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
// TODO add tokens of previous op(s)
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
}
self.lookahead = Some(lookahead);
Ok(ops)
}
// [!] no unhandled lookaheads
fn parse_param(&mut self) -> Result<super::Param, super::LanguageError> {
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Path(var_name) = token0.token {
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
}
])),
};
match token1.token {
crate::syntax::Token::Colon => {
// with type declaration
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
}
])),
};
if let crate::syntax::Token::Path(ty_name) = token2.token {
Ok(super::Param {
name: var_name,
type_: Some(ty_name),
})
} else {
Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
token2,
]))
}
},
crate::syntax::Token::Semicolon => {
// without type declaration
Ok(super::Param {
name: var_name,
type_: None,
})
},
t => Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: t,
info: token1.info,
}
]))
}
} else {
Err(super::LanguageError::InvalidSyntax(vec![token0]))
}
}
}
impl <'a> LanguageParser<'a, crate::syntax::TokenParser<'a, logos::Lexer<'a, crate::lexer::Token>>> {
pub fn lex(s: &'a str) -> Self {
Self::new(crate::syntax::TokenParser::new(crate::lexer::Token::tokenify(s)))
}
}
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> core::iter::Iterator for LanguageParser<'a, I> {
type Item = Result<super::Statement, super::LanguageError>;
fn next(&mut self) -> Option<Self::Item> {
let opt_next_token = if let Some(token) = self.lookahead.take() {
Some(token)
} else {
match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
}
};
if let Some(token) = opt_next_token {
let statement = match token.token {
crate::syntax::Token::Path(p0) => {
let next_token = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => t,
None => return Some(Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(p0),
info: token.info,
}
]))),
};
match next_token.token {
crate::syntax::Token::OpenCurlyBracket => {
// module
match self.parse_incomplete_module(p0.clone()) {
Ok(module) => self.incomplete_modules.push(module),
Err(e) => return Some(Err(e)),
}
// skip capturing by model by immediately returning
return Some(Ok(super::Statement::Notification(super::Notification::EnteringModule(p0))));
},
crate::syntax::Token::OpenRoundBracket => {
// function declaration
match self.parse_function_decl(p0) {
Ok(fn_decl) => super::Statement::Declare(super::Declare::Function(fn_decl)),
Err(e) => return Some(Err(e)),
}
},
crate::syntax::Token::Equal => {
// type declaration
match self.parse_type_decl(p0) {
Ok(ty_decl) => super::Statement::Declare(super::Declare::Type(ty_decl)),
Err(e) => return Some(Err(e)),
}
},
unrecognized => return Some(Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(p0),
info: token.info,
},
crate::syntax::SyntaxToken {
token: unrecognized,
info: next_token.info,
}
]))),
}
},
crate::syntax::Token::Functional(fun) => {
match self.parse_function_call(fun) {
Ok(f) => super::Statement::Entrypoint(f),
Err(e) => return Some(Err(e))
}
}
crate::syntax::Token::CloseCurlyBracket => {
if let Some(module) = self.incomplete_modules.pop() {
super::Statement::Module(module)
} else {
return Some(Err(super::LanguageError::UnexpectedToken(crate::syntax::SyntaxToken {
token: crate::syntax::Token::CloseCurlyBracket,
info: token.info
})));
}
}
t => return Some(Err(super::LanguageError::UnexpectedToken(crate::syntax::SyntaxToken {
token: t,
info: token.info
}))),
};
if let Some(mut module) = self.incomplete_modules.pop() {
module.inner.push(statement.clone());
self.incomplete_modules.push(module);
Some(Ok(super::Statement::Notification(super::Notification::CapturedByModule(Box::new(statement)))))
} else {
Some(Ok(statement))
}
} else {
None
}
}
}

View file

@ -0,0 +1,176 @@
/// Statement declaration
///
/// Statement -> Declare
/// Statement -> Module
/// Statement -> CallFun [entrypoint]
///
/// Statements -> Statement Statements
/// Statements -> DONE
#[derive(Debug, PartialEq, Clone)]
pub enum Statement {
Declare(Declare),
Module(Module),
Entrypoint(CallFun),
Notification(Notification),
}
impl Statement {
pub fn is_ignore(&self) -> bool {
matches!(self, Self::Notification(_))
}
}
/// Fake tokens emitted by parser to avoid excessive recursion
#[derive(Debug, PartialEq, Clone)]
pub enum Notification {
EnteringModule(crate::syntax::Path),
CapturedByModule(Box<Statement>),
}
/// Param declaration
///
/// Param -> Variable: Type
/// OR (depending on context)
/// Param -> Variable
///
/// Params -> Param; Params [semicolon-separated]
/// Params -> ;
/// Params -> Param
#[derive(Debug, PartialEq, Clone)]
pub struct Param {
pub name: crate::syntax::Path,
pub type_: Option<crate::syntax::Path>,
}
/// Function or Type declaration
///
/// Declare -> DeclareFun
/// Declare -> DeclareType
#[derive(Debug, PartialEq, Clone)]
pub enum Declare {
Function(DeclareFun),
Type(DeclareType),
}
/// Function declaration
///
/// DeclareFun -> Name (Params) Func { Ops }
/// Func -> => [map]
/// Func -> x> [filter]
/// Func -> ~> [sort]
/// Func -> <> [generator]
#[derive(Debug, PartialEq, Clone)]
pub struct DeclareFun {
pub name: crate::syntax::Path,
pub params: Vec<Param>,
pub type_: crate::syntax::Functional,
pub ops: Vec<Op>,
}
/// Type declaration
///
/// DeclareType -> Name = { Params }
#[derive(Debug, PartialEq, Clone)]
pub struct DeclareType {
pub name: crate::syntax::Path,
pub params: Vec<Param>,
}
/// Module declaration
///
/// Module -> Variable { Statements }
#[derive(Debug, PartialEq, Clone)]
pub struct Module {
pub name: crate::syntax::Path,
pub inner: Vec<Statement>,
}
/// Operation declaration
///
/// Op -> Variable := Op [declare-assign]
/// Op -> Variable: Type [declare]
/// Op -> Variable = Op [assign]
/// Op -> Variable Fields [retrieve]
/// Op -> CallFun [invoke]
/// Op -> Op DualOp Op [dyadic]
/// Op -> UnaryOp Op [unary]
/// Op -> Literal [literal]
/// Op -> Bracketed [bracketed]
/// Fields ->
/// Fields -> .Name Fields
/// DualOp -> SetOp
/// DualOp -> n
/// DualOp -> u
/// DualOp -> +
/// DualOp -> -
/// DualOp -> *
/// DualOp -> /
/// DualOp -> &&
/// DualOp -> ||
/// SetOp -> n
/// SetOp -> u
/// UnaryOp -> -
/// Literal -> "Name"
/// Literal -> Integer
/// Literal -> Float
/// Bracketed -> (Op)
///
///
/// Ops -> Op; Ops [semicolon-separated]
/// Ops -> ;
/// Ops -> DONE
#[derive(Debug, PartialEq, Clone)]
pub enum Op {
DeclareAssign(DeclareAssignVar),
Declare(DeclareVar),
Assign(AssignVar),
Retrieve(crate::syntax::Path),
Call(CallFun),
Dyadic(Dyadic),
Unary(Unary),
Literal(crate::syntax::Literal),
Bracketed(Box<Op>),
}
#[derive(Debug, PartialEq, Clone)]
pub struct DeclareAssignVar {
pub var: crate::syntax::Path,
pub type_: Option<crate::syntax::Path>,
pub op: Box<Op>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct DeclareVar {
pub var: crate::syntax::Path,
pub type_: Option<crate::syntax::Path>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct AssignVar {
pub var: crate::syntax::Path,
pub field: Option<crate::syntax::Field>,
pub op: Box<Op>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Dyadic {
pub first: Box<Op>,
pub op: crate::syntax::Op,
pub second: Box<Op>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Unary {
pub first: Box<Op>,
pub op: crate::syntax::Op,
}
/// Function call declaration
///
/// CallFun -> Func Name (Params)
#[derive(Debug, PartialEq, Clone)]
pub struct CallFun {
pub type_: crate::syntax::Functional,
pub var: crate::syntax::Path,
pub params: Vec<Op>,
}

View file

@ -0,0 +1,25 @@
#[derive(Debug, PartialEq, Clone)]
pub enum SyntaxError {
InvalidSequence(Vec<crate::lexer::Token>),
UnrecognizedToken,
}
impl core::fmt::Display for SyntaxError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
//use core::fmt::Write;
match self {
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
}
}
}
impl std::error::Error for SyntaxError {}
impl From<crate::lexer::LexError> for SyntaxError {
fn from(value: crate::lexer::LexError) -> Self {
match value {
crate::lexer::LexError::UnrecognizedToken => Self::UnrecognizedToken,
}
}
}

View file

@ -0,0 +1,66 @@
//! High-level syntax
mod errors;
pub use errors::SyntaxError;
mod parser;
pub(crate) use parser::TokenParser;
mod tokens;
pub use tokens::{SyntaxToken, Token, Literal, Op, Functional, Field, Path, Comment};
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> <> = n_u_::is_a_::VaR1AbLe .th1s.is_A_.f13Ld ( ) { } ; -404 -1234.5 \"\" /* block comment */ // line comment \n";
#[test]
fn parse_everything() {
let expected = vec![
Token::Operation(Op::Union),
Token::Operation(Op::Intersection),
Token::Operation(Op::Plus),
Token::Operation(Op::Minus),
Token::Operation(Op::Multiply),
Token::Operation(Op::Divide),
Token::Operation(Op::And),
Token::Operation(Op::Or),
Token::Functional(Functional::Map),
Token::Functional(Functional::Filter),
Token::Functional(Functional::Sort),
Token::Functional(Functional::Generate),
Token::Equal,
Token::Path(Path(vec!["n_u_".into(), "is_a_".into(), "VaR1AbLe".into()])),
Token::Field(Field(vec!["th1s".into(), "is_A_".into(), "f13Ld".into()])),
Token::OpenRoundBracket,
Token::CloseRoundBracket,
Token::OpenCurlyBracket,
Token::CloseCurlyBracket,
Token::Semicolon,
Token::Literal(Literal::Integer(-404)),
Token::Literal(Literal::Float(-1234.5)),
Token::Literal(Literal::String("".into())),
Token::Comment(Comment::Block(" block comment ".into())),
Token::Comment(Comment::Line(" line comment ".into())),
];
let mut actual = Vec::new();
for (index, token_result) in SyntaxToken::tokenify(ALL_TOKENS_STR).enumerate() {
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
actual.push(token_result.unwrap().token);
}
assert_eq!(actual, expected)
}
#[test]
fn parse_reversability() {
let expected = format!("{} ", ALL_TOKENS_STR);
let actual = SyntaxToken::stringify(SyntaxToken::tokenify(&expected).map(|token_result| token_result.unwrap()));
assert_eq!(actual, expected)
}
}

View file

@ -0,0 +1,184 @@
pub(crate) struct TokenParser<'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> {
_idc: core::marker::PhantomData<&'a ()>,
iter: I,
lookahead: Option<crate::lexer::Token>,
}
impl <'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> TokenParser<'a, I> {
pub fn new(tokens_in: I) -> Self {
Self {
_idc: Default::default(),
iter: tokens_in,
lookahead: None,
}
}
fn rebuild_field_token_sequence(parts: Vec<String>, infos: &mut Vec<Option<crate::lexer::TokenInfo>>) -> Vec<crate::lexer::Token> {
parts.into_iter()
.flat_map(|var| [crate::lexer::Token::Dot(Self::take_first_some(infos).unwrap()), crate::lexer::Token::Variable((var, Self::take_first_some(infos).unwrap()))])
.collect()
}
fn rebuild_path_token_sequence(parts: Vec<String>, infos: &mut Vec<Option<crate::lexer::TokenInfo>>) -> Vec<crate::lexer::Token> {
let mut tokens: Vec<_> = parts.into_iter()
.flat_map(|var| [crate::lexer::Token::Variable((var, Self::take_first_some(infos).unwrap())), crate::lexer::Token::PathSeparator(Self::take_first_some(infos).unwrap())])
.collect();
if !tokens.is_empty() {
// remove trailing path separator
tokens.pop();
}
tokens
}
fn take_first_some<T>(items: &mut Vec<Option<T>>) -> Option<T> {
for i in items.iter_mut() {
if let Some(item) = i.take() {
return Some(item)
}
}
None
}
}
impl <'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> core::iter::Iterator for TokenParser<'a, I> {
type Item = Result<super::SyntaxToken, super::SyntaxError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let opt_next_lex_token = if let Some(lex_token) = self.lookahead.take() {
Some(lex_token)
} else {
match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
}
};
if let Some(lex_token) = opt_next_lex_token {
let translated = match lex_token {
crate::lexer::Token::Union(info) => super::Token::Operation(super::Op::Union).with(info),
crate::lexer::Token::Intersection(info) => super::Token::Operation(super::Op::Intersection).with(info),
crate::lexer::Token::Plus(info) => super::Token::Operation(super::Op::Plus).with(info),
crate::lexer::Token::Minus(info) => super::Token::Operation(super::Op::Minus).with(info),
crate::lexer::Token::Multiply(info) => super::Token::Operation(super::Op::Multiply).with(info),
crate::lexer::Token::Divide(info) => super::Token::Operation(super::Op::Divide).with(info),
crate::lexer::Token::And(info) => super::Token::Operation(super::Op::And).with(info),
crate::lexer::Token::Or(info) => super::Token::Operation(super::Op::Or).with(info),
crate::lexer::Token::Map(info) => super::Token::Functional(super::Functional::Map).with(info),
crate::lexer::Token::Filter(info) => super::Token::Functional(super::Functional::Filter).with(info),
crate::lexer::Token::Sort(info) => super::Token::Functional(super::Functional::Sort).with(info),
crate::lexer::Token::Generate(info) => super::Token::Functional(super::Functional::Generate).with(info),
crate::lexer::Token::Equal(info) => super::Token::Equal.with(info),
crate::lexer::Token::PathSeparator(info) => return Some(Err(super::SyntaxError::InvalidSequence(vec![crate::lexer::Token::PathSeparator(info)]))),
crate::lexer::Token::Dot(info) => {
// read all incoming dots and variable combos into a single path token
// e.g. [Dot, Variable("x"), Dot, Variable("y"), Dot, Variable("z")] becomes Field(["x", "y", "z"])
let mut parts = Vec::new();
let mut infos = Vec::new();
infos.push(Some(info));
loop {
let next_lex_token = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
};
if let Some(next_lex_token) = next_lex_token {
match next_lex_token {
crate::lexer::Token::Variable((part, info)) => {
parts.push(part);
infos.push(Some(info));
},
invalid_token => {
let last_dot = crate::lexer::Token::Dot(infos.pop().unwrap().unwrap());
let mut sequence = Self::rebuild_field_token_sequence(parts, &mut infos);
sequence.push(last_dot);
sequence.push(invalid_token);
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
}
}
} else {
let last_dot = crate::lexer::Token::Dot(infos.pop().unwrap().unwrap());
let mut sequence = Self::rebuild_field_token_sequence(parts, &mut infos);
sequence.push(last_dot);
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
}
self.lookahead = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
};
if let Some(crate::lexer::Token::Dot(info)) = &self.lookahead {
infos.push(Some(info.to_owned()));
self.lookahead = None;
} else {
break;
}
}
super::SyntaxToken {
token: super::Token::Field(super::Field(parts)),
info: infos.into_iter().map(|x| x.unwrap()).collect(),
}
}
crate::lexer::Token::Variable((root, info)) => {
// read all incoming path separators and variable combos into a single path token
// e.g. [Variable("x"), PathSeparator, Variable("y"), PathSeparator, Variable("z")] becomes Path(["x", "y", "z"])
self.lookahead = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
};
let mut parts = Vec::new();
let mut infos = Vec::new();
parts.push(root);
infos.push(Some(info));
while let Some(crate::lexer::Token::PathSeparator(path_info)) = &self.lookahead {
infos.push(Some(path_info.to_owned()));
let next_lex_token = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
};
if let Some(crate::lexer::Token::Variable((part, info))) = next_lex_token {
parts.push(part);
infos.push(Some(info));
} else {
let last_sep = crate::lexer::Token::PathSeparator(infos.pop().unwrap().unwrap());
let mut sequence = Self::rebuild_path_token_sequence(parts, &mut infos);
sequence.push(last_sep);
if let Some(lex_token) = next_lex_token {
sequence.push(lex_token);
return Some(Err(super::SyntaxError::InvalidSequence(sequence)))
} else {
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
}
}
self.lookahead = match self.iter.next() {
Some(Err(e)) => return Some(Err(e.into())),
Some(Ok(t)) => Some(t),
None => None,
};
}
super::SyntaxToken {
token: super::Token::Path(super::Path(parts)),
info: infos.into_iter().map(|x| x.unwrap()).collect(),
}
},
crate::lexer::Token::OpenRoundBracket(info) => super::Token::OpenRoundBracket.with(info),
crate::lexer::Token::CloseRoundBracket(info) => super::Token::CloseRoundBracket.with(info),
crate::lexer::Token::OpenCurlyBracket(info) => super::Token::OpenCurlyBracket.with(info),
crate::lexer::Token::CloseCurlyBracket(info) => super::Token::CloseCurlyBracket.with(info),
crate::lexer::Token::Colon(info) => super::Token::Colon.with(info),
crate::lexer::Token::Semicolon(info) => super::Token::Semicolon.with(info),
crate::lexer::Token::Integer((int, info)) => super::Token::Literal(super::Literal::Integer(int)).with(info),
crate::lexer::Token::Float((float, info)) => super::Token::Literal(super::Literal::Float(float)).with(info),
crate::lexer::Token::String((s, info)) => super::Token::Literal(super::Literal::String(s)).with(info),
crate::lexer::Token::LongComment((c, info)) => super::Token::Comment(super::tokens::Comment::Block(c)).with(info),
crate::lexer::Token::ShortComment((c, info)) => super::Token::Comment(super::tokens::Comment::Line(c)).with(info),
crate::lexer::Token::Newline(_) => panic!("Got non-ignored newline"),
};
Some(Ok(translated))
} else {
None
}
}
}

View file

@ -0,0 +1,188 @@
#[derive(Debug, PartialEq, Clone)]
pub struct SyntaxToken {
pub token: Token,
pub info: Vec<crate::lexer::TokenInfo>,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Operation(Op),
Functional(Functional),
// Basics
Equal,
Field(Field),
Path(Path),
OpenRoundBracket,
CloseRoundBracket,
OpenCurlyBracket,
CloseCurlyBracket,
Colon,
Semicolon,
Literal(Literal),
Comment(Comment),
}
impl Token {
pub(super) fn with(self, info: crate::lexer::TokenInfo) -> SyntaxToken {
SyntaxToken { token: self, info: vec![info] }
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct Field(pub Vec<String>);
#[derive(Debug, PartialEq, Clone)]
pub struct Path(pub Vec<String>);
#[derive(Debug, PartialEq, Clone)]
pub enum Op {
// Set operations
Union,
Intersection,
// Arithmetic operations (also applicable to sets)
Plus,
Minus,
Multiply,
Divide,
// Logical operations
And,
Or,
}
impl Op {
pub fn as_str(&self) -> &'static str {
match self {
Self::Union => "u",
Self::Intersection => "n",
Self::Plus => "+",
Self::Minus => "-",
Self::Multiply => "*",
Self::Divide => "/",
Self::And => "&&",
Self::Or => "||",
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Functional {
Map,
Filter,
Sort,
Generate,
}
impl Functional {
pub fn as_str(&self) -> &'static str {
match self {
Self::Map => "=>",
Self::Filter => "x>",
Self::Sort => "~>",
Self::Generate => "<>",
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Literal {
Integer(i64),
Float(f64),
String(String),
}
impl Literal {
pub fn as_str(&self) -> String {
match self {
Self::Integer(int) => format!("{}", int),
Self::Float(float) => format!("{}", float),
Self::String(s) => format!("\"{}\"", s),
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Comment {
Line(String),
Block(String),
}
impl Comment {
pub fn as_str(&self) -> String {
match self {
Self::Line(comment) => format!("//{}\n", comment),
Self::Block(comment) => format!("/*{}*/", comment),
}
}
}
impl Token {
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
use core::fmt::Write;
match self {
Self::Operation(op) => write!(result, "{}", op.as_str()),
Self::Functional(fun) => write!(result, "{}", fun.as_str()),
Self::Equal => write!(result, "="),
Self::Field(parts) => {
for p in parts.0.iter() {
write!(result, ".{}", p).unwrap();
}
Ok(())
}
Self::Path(parts) => {
for (i, p) in parts.0.iter().enumerate() {
write!(result, "{}", p).unwrap();
if i != parts.0.len() - 1 {
write!(result, "::").unwrap();
}
}
Ok(())
}
Self::OpenRoundBracket => write!(result, "("),
Self::CloseRoundBracket => write!(result, ")"),
Self::OpenCurlyBracket => write!(result, "{{"),
Self::CloseCurlyBracket => write!(result, "}}"),
Self::Colon => write!(result, ":"),
Self::Semicolon => write!(result, ";"),
Self::Literal(l) => write!(result, "{}", l.as_str()),
Self::Comment(c) => write!(result, "{}", c.as_str()),
}
}
}
impl SyntaxToken {
pub fn tokenify<'a>(s: &'a str) -> impl core::iter::Iterator<Item=Result<Self, super::SyntaxError>> + 'a {
super::TokenParser::new(crate::lexer::Token::tokenify(s))
}
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn as_str(&self) -> String {
let mut s = String::new();
self.write_str(&mut s).unwrap();
s
}
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
self.token.write_str(result)
}
}

3
src/main.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}