Create initial language parser

This commit is contained in:
NGnius (Graham) 2024-05-30 20:03:56 -04:00
commit 453c48b686
17 changed files with 2062 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

129
Cargo.lock generated Normal file
View file

@ -0,0 +1,129 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "logos"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
dependencies = [
"beef",
"fnv",
"lazy_static",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
dependencies = [
"logos-codegen",
]
[[package]]
name = "muss2"
version = "0.1.0"
[[package]]
name = "muss2-lang"
version = "0.1.0"
dependencies = [
"logos",
"pretty_assertions",
]
[[package]]
name = "pretty_assertions"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
[[package]]
name = "syn"
version = "2.0.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

13
Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "muss2"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
[workspace]
members = [
"crates/lang"
]

12
crates/lang/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "muss2-lang"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
logos = { version = "0.14" }
[dev-dependencies]
pretty_assertions = "1.3.0"

View file

@ -0,0 +1,16 @@
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy,)]
pub enum LexError {
#[default]
UnrecognizedToken,
}
impl core::fmt::Display for LexError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
//use core::fmt::Write;
match self {
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
}
}
}
impl std::error::Error for LexError {}

View file

@ -0,0 +1,68 @@
mod errors;
pub use errors::LexError;
mod tokens;
pub use tokens::{Token, TokenInfo};
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> = . n_u_ :: is_a_ :: VaR1AbLe ( ) { } : ; -12345 12345.6789 \"char[]\" /* long\ncomment */ // short comment \n \n <>";
#[test]
fn parse_everything() {
let expected = vec![
Token::Union(TokenInfo { line: 0, column: 0..1, index: 0..1 }),
Token::Intersection(TokenInfo { line: 0, column: 2..3, index: 2..3 }),
Token::Plus(TokenInfo { line: 0, column: 4..5, index: 4..5 }),
Token::Minus(TokenInfo { line: 0, column: 6..7, index: 6..7 }),
Token::Multiply(TokenInfo { line: 0, column: 8..9, index: 8..9 }),
Token::Divide(TokenInfo { line: 0, column: 10..11, index: 10..11 }),
Token::And(TokenInfo { line: 0, column: 12..14, index: 12..14 }),
Token::Or(TokenInfo { line: 0, column: 15..17, index: 15..17 }),
Token::Map(TokenInfo { line: 0, column: 18..20, index: 18..20 }),
Token::Filter(TokenInfo { line: 0, column: 21..23, index: 21..23 }),
Token::Sort(TokenInfo { line: 0, column: 24..26, index: 24..26 }),
Token::Equal(TokenInfo { line: 0, column: 27..28, index: 27..28 }),
Token::Dot(TokenInfo { line: 0, column: 29..30, index: 29..30 }),
Token::Variable(("n_u_".into(), TokenInfo { line: 0, column: 31..35, index: 31..35 })),
Token::PathSeparator(TokenInfo { line: 0, column: 36..38, index: 36..38 }),
Token::Variable(("is_a_".into(), TokenInfo { line: 0, column: 39..44, index: 39..44 })),
Token::PathSeparator(TokenInfo { line: 0, column: 45..47, index: 45..47 }),
Token::Variable(("VaR1AbLe".into(), TokenInfo { line: 0, column: 48..56, index: 48..56 })),
Token::OpenRoundBracket(TokenInfo { line: 0, column: 57..58, index: 57..58 }),
Token::CloseRoundBracket(TokenInfo { line: 0, column: 59..60, index: 59..60 }),
Token::OpenCurlyBracket(TokenInfo { line: 0, column: 61..62, index: 61..62 }),
Token::CloseCurlyBracket(TokenInfo { line: 0, column: 63..64, index: 63..64 }),
Token::Colon(TokenInfo { line: 0, column: 65..66, index: 65..66 }),
Token::Semicolon(TokenInfo { line: 0, column: 67..68, index: 67..68 }),
Token::Integer((-12345, TokenInfo { line: 0, column: 69..75, index: 69..75 })),
Token::Float((12345.6789, TokenInfo { line: 0, column: 76..86, index: 76..86 })),
Token::String(("char[]".into(), TokenInfo { line: 0, column: 87..95, index: 87..95 })),
Token::LongComment((" long\ncomment ".into(), TokenInfo { line: 0, column: 96..114, index: 96..114 })),
Token::ShortComment((" short comment ".into(), TokenInfo { line: 1, column: 11..29, index: 115..133 })),
Token::Newline(TokenInfo { line: 2, column: 1..2, index: 134..135 }),
Token::Generate(TokenInfo { line: 3, column: 1..3, index: 136..138 }),
];
let mut actual = Vec::new();
for (index, token_result) in Token::tokenify(ALL_TOKENS_STR).enumerate() {
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
actual.push(token_result.unwrap());
}
assert_eq!(actual, expected)
}
#[test]
fn parse_reversability() {
let expected = format!("{} ", ALL_TOKENS_STR);
let actual = Token::stringify(Token::tokenify(&expected).map(|token_result| token_result.unwrap()));
assert_eq!(actual, expected)
}
}

View file

@ -0,0 +1,240 @@
use logos::Logos;
#[derive(Default, PartialEq, Clone)]
pub struct ExtraState {
line: usize,
line_start: usize,
start: usize,
end: usize,
}
impl ExtraState {
fn lexer_sync(lex: &mut logos::Lexer<Token>) {
let span = lex.span();
lex.extras.start = span.start;
lex.extras.end = span.end;
}
fn newline(lex: &mut logos::Lexer<Token>) -> TokenInfo {
Self::lexer_sync(lex);
let info = lex.extras.token_info();
lex.extras.line += 1;
lex.extras.line_start = lex.span().end;
info
}
fn token_info(&self) -> TokenInfo {
TokenInfo {
line: self.line,
column: (self.start - self.line_start)..(self.end - self.line_start),
index: self.start..self.end,
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct TokenInfo {
pub line: usize,
pub column: core::ops::Range<usize>,
pub index: core::ops::Range<usize>,
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
#[logos(error = super::LexError)]
#[logos(extras = ExtraState)]
pub enum Token {
// Operands
// Set operations
#[token("u", priority = 99, callback = all_cb)]
Union(TokenInfo),
#[token("n", priority = 99, callback = all_cb)]
Intersection(TokenInfo),
// Arithmetic operations (also applicable to sets)
#[token("+", callback = all_cb)]
Plus(TokenInfo),
#[token("-", callback = all_cb)]
Minus(TokenInfo),
#[token("*", callback = all_cb)]
Multiply(TokenInfo),
#[token("/", callback = all_cb)]
Divide(TokenInfo),
// Logical operations
#[token("&&", callback = all_cb)]
And(TokenInfo),
#[token("||", callback = all_cb)]
Or(TokenInfo),
// Functional
#[token("=>", callback = all_cb)]
Map(TokenInfo),
#[token("x>", callback = all_cb)]
Filter(TokenInfo),
#[token("~>", callback = all_cb)]
Sort(TokenInfo),
#[token("<>", callback = all_cb)]
Generate(TokenInfo),
// Declarations
// Basics
#[token("=", callback = all_cb)]
Equal(TokenInfo),
#[token("::", callback = all_cb)]
PathSeparator(TokenInfo),
#[token(".", callback = all_cb)]
Dot(TokenInfo),
#[regex("[a-zA-Z_][a-zA-Z_0-9]*", priority = 1, callback = variable_cb)]
Variable((String, TokenInfo)),
#[token("(", callback = all_cb)]
OpenRoundBracket(TokenInfo),
#[token(")", callback = all_cb)]
CloseRoundBracket(TokenInfo),
#[token("{", callback = all_cb)]
OpenCurlyBracket(TokenInfo),
#[token("}", callback = all_cb)]
CloseCurlyBracket(TokenInfo),
#[token(":", callback = all_cb)]
Colon(TokenInfo),
#[token(";", callback = all_cb)]
Semicolon(TokenInfo),
// Literals
#[regex("-?[1-9][0-9]*", priority = 1, callback = integer_cb)]
Integer((i64, TokenInfo)),
#[regex("-?[1-9][0-9]*\\.[0-9]+", priority = 99, callback = float_cb)]
Float((f64, TokenInfo)),
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, priority = 1, callback = string_cb)]
String((String, TokenInfo)),
/// Comments
#[regex(r#"\/\*([^\*]+(\*[^\/])?)*\*\/"#, priority = 1, callback = multiline_comment_cb)]
LongComment((String, TokenInfo)),
#[regex("\\/\\/[^\n]*\n", priority = 1, callback = oneline_comment_cb)]
ShortComment((String, TokenInfo)),
/// Ignore
#[regex(r"\n", newline_cb)]
Newline(TokenInfo),
}
fn all_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
ExtraState::lexer_sync(lex);
lex.extras.token_info()
}
fn variable_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
(slice.to_owned(), all_cb(lex))
}
fn integer_cb(lex: &mut logos::Lexer<Token>) -> (i64, TokenInfo) {
let slice = lex.slice();
(slice.parse().unwrap(), all_cb(lex))
}
fn float_cb(lex: &mut logos::Lexer<Token>) -> (f64, TokenInfo) {
let slice = lex.slice();
(slice.parse().unwrap(), all_cb(lex))
}
fn string_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
// TODO handle escaped chars
(slice[1..slice.len()-1].to_owned(), all_cb(lex))
}
fn multiline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
let info = all_cb(lex);
for (i, c) in slice.chars().enumerate() {
if c == '\n' {
lex.extras.line += 1;
lex.extras.line_start = lex.span().start + i + 1;
}
}
(slice[2..slice.len()-2].to_owned(), info)
}
fn oneline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
let slice = lex.slice();
let info = all_cb(lex);
lex.extras.line += 1;
lex.extras.line_start = lex.span().end;
(slice[2..slice.len()-1].to_owned(), info)
}
fn newline_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
ExtraState::newline(lex)
}
impl Token {
pub fn tokenify<'a>(s: &'a str) -> logos::Lexer<'a, Self> {
Token::lexer(s)
}
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
use core::fmt::Write;
let mut result = String::new();
tokens.for_each(|t| {
t.write_str(&mut result).unwrap();
write!(result, " ").unwrap();
});
result
}
pub fn as_str(&self) -> String {
let mut s = String::new();
self.write_str(&mut s).unwrap();
s
}
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
use core::fmt::Write;
match self {
Self::Union(_) => write!(result, "u"),
Self::Intersection(_) => write!(result, "n"),
Self::Plus(_) => write!(result, "+"),
Self::Minus(_) => write!(result, "-"),
Self::Multiply(_) => write!(result, "*"),
Self::Divide(_) => write!(result, "/"),
Self::And(_) => write!(result, "&&"),
Self::Or(_) => write!(result, "||"),
Self::Map(_) => write!(result, "=>"),
Self::Filter(_) => write!(result, "x>"),
Self::Sort(_) => write!(result, "~>"),
Self::Generate(_) => write!(result, "<>"),
Self::Equal(_) => write!(result, "="),
Self::PathSeparator(_) => write!(result, "::"),
Self::Dot(_) => write!(result, "."),
Self::Variable((name, _)) => write!(result, "{}", name),
Self::OpenRoundBracket(_) => write!(result, "("),
Self::CloseRoundBracket(_) => write!(result, ")"),
Self::OpenCurlyBracket(_) => write!(result, "{{"),
Self::CloseCurlyBracket(_) => write!(result, "}}"),
Self::Colon(_) => write!(result, ":"),
Self::Semicolon(_) => write!(result, ";"),
Self::Integer((int, _)) => write!(result, "{}", int),
Self::Float((float, _)) => write!(result, "{}", float),
Self::String((s, _)) => write!(result, "\"{}\"", s),
Self::LongComment((c, _)) => write!(result, "/*{}*/", c),
Self::ShortComment((c, _)) => write!(result, "//{}\n", c),
Self::Newline(_) => write!(result, "\n"),
}
}
pub fn is_ignore(&self) -> bool {
matches!(self, Self::Newline(_))
}
}

7
crates/lang/src/lib.rs Normal file
View file

@ -0,0 +1,7 @@
//! Language specification
//!
//! Parsing order: lexer -> syntax -> statement
pub mod lexer;
pub mod statement;
pub mod syntax;

View file

@ -0,0 +1,32 @@
#[derive(Debug, PartialEq, Clone)]
pub enum LanguageError {
InvalidSequence(Vec<crate::lexer::Token>),
InvalidSyntax(Vec<crate::syntax::SyntaxToken>),
UnexpectedEnd(Vec<crate::syntax::SyntaxToken>),
UnexpectedToken(crate::syntax::SyntaxToken),
UnrecognizedToken,
}
impl core::fmt::Display for LanguageError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
//use core::fmt::Write;
match self {
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
Self::InvalidSyntax(seq) => write!(f, "Invalid syntax {}", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
Self::UnexpectedEnd(seq) => write!(f, "Unexpected end of file {} <EOF>", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
Self::UnexpectedToken(token) => write!(f, "Unexpected token {}", token.as_str()),
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
}
}
}
impl std::error::Error for LanguageError {}
impl From<crate::syntax::SyntaxError> for LanguageError {
fn from(value: crate::syntax::SyntaxError) -> Self {
match value {
crate::syntax::SyntaxError::UnrecognizedToken => Self::UnrecognizedToken,
crate::syntax::SyntaxError::InvalidSequence(seq) => Self::InvalidSequence(seq),
}
}
}

View file

@ -0,0 +1,133 @@
//! High-level language
mod errors;
pub use errors::LanguageError;
mod parser;
pub use parser::LanguageParser;
mod tree;
pub use tree::{Statement, Notification, Param, Declare, DeclareFun, DeclareType, Module, Op, DeclareAssignVar, DeclareVar, AssignVar, Dyadic, CallFun};
// my_namespace { <>generate_fn()=>map_fn()x>filter_fn(_)~>sort_fn(_) }
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
fn assert_no_errors(iter: impl Iterator<Item=Result<Statement, LanguageError>>) -> Vec<Statement> {
let mut statements = Vec::new();
for (i, res) in iter.enumerate() {
match res {
Ok(statement) => if !statement.is_ignore() { statements.push(statement); },
Err(e) => {
let e_display = e.to_string();
match e {
LanguageError::InvalidSequence(_seq) => {},
LanguageError::InvalidSyntax(seq) => {
let bad_syntax = seq.last().expect("Empty invalid syntax token sequence");
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnexpectedEnd(seq) => {
let bad_syntax = seq.last().expect("Empty unexpected end token sequence");
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnexpectedToken(bad_syntax) => {
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
eprintln!(
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
e_display, bad_lex.line,
bad_lex.column.start, bad_lex.column.end,
bad_lex.index.start, bad_lex.index.end, i
);
},
LanguageError::UnrecognizedToken => {
eprintln!("Unrecognized token #{} ?!?!", i);
}
}
panic!("{} for token #{}", e_display, i);
}
}
}
statements
}
#[test]
fn parse_minimum_module() {
let parser = LanguageParser::lex("my_module {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Module(Module {
name: crate::syntax::Path(vec![
"my_module".into(),
]),
inner: Vec::new(),
}),
], parsed);
}
#[test]
fn parse_minimum_function_declaration() {
let parser = LanguageParser::lex("my_generator () <> {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Declare(Declare::Function(DeclareFun {
name: crate::syntax::Path(vec![
"my_generator".into(),
]),
params: Vec::new(),
type_: crate::syntax::Functional::Generate,
ops: Vec::new(),
}))
], parsed);
}
#[test]
fn parse_minimum_type_declaration() {
let parser = LanguageParser::lex("my_type = {}");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Declare(Declare::Type(DeclareType {
name: crate::syntax::Path(vec![
"my_type".into(),
]),
params: Vec::new(),
}))
], parsed);
}
#[test]
fn parse_minimum_entrypoint() {
let parser = LanguageParser::lex("<> my_generator ()");
let parsed = assert_no_errors(parser);
assert_eq!(vec![
Statement::Entrypoint(CallFun {
type_: crate::syntax::Functional::Generate,
var: crate::syntax::Path(vec![
"my_generator".into(),
]),
params: Vec::new(),
})
], parsed);
}
}

View file

@ -0,0 +1,769 @@
pub struct LanguageParser<'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> {
_idc: core::marker::PhantomData<&'a ()>,
iter: I,
lookahead: Option<crate::syntax::SyntaxToken>,
incomplete_modules: Vec<super::Module>,
}
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> LanguageParser<'a, I> {
pub fn new(tokens_in: I) -> Self {
Self {
_idc: Default::default(),
iter: tokens_in,
lookahead: None,
incomplete_modules: Vec::new(),
}
}
fn parse_incomplete_module(&mut self, name: crate::syntax::Path) -> Result<super::Module, super::LanguageError> {
Ok(super::Module { name, inner: Vec::new() })
}
fn parse_function_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareFun, super::LanguageError> {
// `Name (` (first 2 tokens) are already consumed
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
// function params
let mut params = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
self.lookahead = Some(lookahead);
params.push(self.parse_param()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
// function type
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let fn_type = if let crate::syntax::Token::Functional(fn_type) = token1.token {
fn_type
} else {
return Err(super::LanguageError::InvalidSyntax(vec![token1]));
};
// operations
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
if !matches!(token2.token, crate::syntax::Token::OpenCurlyBracket) {
return Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Functional(fn_type),
info: token1.info,
},
token2
]));
}
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let mut ops = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
self.lookahead = Some(lookahead);
ops.push(self.parse_op()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
Ok(super::DeclareFun {
name,
params,
type_: fn_type,
ops,
})
}
fn parse_type_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareType, super::LanguageError> {
// `Name =` (first 2 tokens) are already consumed
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
match token0.token {
crate::syntax::Token::OpenCurlyBracket => {
let mut lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
let mut params = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
self.lookahead = Some(lookahead);
params.push(self.parse_param()?);
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
}
Ok(super::DeclareType {
name,
params,
})
}
t => Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: t,
info: token0.info,
}
]))
}
}
fn parse_function_call(&mut self, fun: crate::syntax::Functional) -> Result<super::CallFun, super::LanguageError> {
// `Func` (first token) is already consumed
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let name = if let crate::syntax::Token::Path(name) = token0.token {
name
} else {
return Err(super::LanguageError::UnexpectedToken(token0));
};
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
};
if !matches!(token1.token, crate::syntax::Token::OpenRoundBracket) {
return Err(super::LanguageError::UnexpectedToken(token1));
}
let op_params = match self.parse_op_params() {
Ok(ops) => ops,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token1.info,
},
], e));
}
};
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if !matches!(token_last.token, crate::syntax::Token::CloseRoundBracket) {
return Err(super::LanguageError::UnexpectedToken(token1));
}
Ok(super::CallFun {
type_: fun,
var: name,
params: op_params,
})
}
fn parse_op(&mut self) -> Result<super::Op, super::LanguageError> {
let op = self.parse_inner_op(0)?;
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Semicolon = token_last.token {
Ok(op)
} else {
Err(super::LanguageError::UnexpectedToken(token_last))
}
}
fn extend_err_tokens(mut tokens: Vec<crate::syntax::SyntaxToken>, err: super::LanguageError) -> super::LanguageError {
match err {
super::LanguageError::InvalidSyntax(mut seq) => {
tokens.append(&mut seq);
super::LanguageError::InvalidSyntax(tokens)
},
super::LanguageError::UnexpectedEnd(mut seq) => {
tokens.append(&mut seq);
super::LanguageError::UnexpectedEnd(tokens)
}
e => e
}
}
fn parse_inner_op(&mut self, recursion_level: usize) -> Result<super::Op, super::LanguageError> {
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let op0 = match token0.token {
crate::syntax::Token::Path(var_name) => {
// variable-oriented operations
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
}
])),
};
match token1.token {
crate::syntax::Token::Colon => {
// Declare-assign or declare
let mut lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
])),
};
let type_name = if let crate::syntax::Token::Path(type_name) = lookahead.token {
let type_token_info = lookahead.info;
lookahead = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(type_name),
info: type_token_info,
},
])),
};
Some((type_name, type_token_info))
} else {
None
};
if let crate::syntax::Token::Equal = lookahead.token {
// Declare-Assign
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
// roughly equivalent to self.parse_inner_op(...).map_err(|e| { ... })
// (the closure captures variables which the compiler can't prove aren't used in this fn after)
let tokens = if let Some((type_name, type_token_info)) = type_name {
vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(type_name),
info: type_token_info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: lookahead.info,
},
]
} else {
vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: lookahead.info,
},
]
};
return Err(Self::extend_err_tokens(tokens, e));
}
};
super::Op::DeclareAssign(super::tree::DeclareAssignVar {
var: var_name,
type_: type_name.map(|x| x.0),
op: Box::new(inner_op),
})
} else {
// declare
self.lookahead = Some(lookahead);
super::Op::Declare(super::tree::DeclareVar {
var: var_name,
type_: type_name.map(|x| x.0),
})
}
},
crate::syntax::Token::Equal => {
// Assign
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
}
])),
};
if let crate::syntax::Token::Field(f) = token2.token {
let inner_op = self.parse_inner_op(recursion_level + 1)?;
super::Op::Assign(crate::statement::tree::AssignVar {
var: var_name,
field: Some(f),
op: Box::new(inner_op),
})
} else {
self.lookahead = Some(token2);
let inner_op = self.parse_inner_op(recursion_level + 1)?;
super::Op::Assign(crate::statement::tree::AssignVar {
var: var_name,
field: None,
op: Box::new(inner_op),
})
}
},
/*crate::syntax::Token::OpenRoundBracket => {
// Call
let op_params = match self.parse_op_params() {
Ok(ops) => ops,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token1.info,
},
], e));
}
};
self.lookahead.take().unwrap(); // always a closing round bracket; no need to verify
let token_filter = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
},
// TODO include tokens from op_params
])),
};
if let crate::syntax::Token::Functional(crate::syntax::Functional::Filter) = token_filter.token {
let filter_op = match self.parse_inner_op(recursion_level + 1) {
Ok(x) => x,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Equal,
info: token1.info,
},
// TODO include tokens from op_params
], e))
}
};
super::Op::Call(super::CallVar {
var: var_name,
params: op_params,
})
} else {
return Err(super::LanguageError::UnexpectedToken(token_filter));
}
}*/
t => {
// Retrieve
self.lookahead = Some(crate::syntax::SyntaxToken {
token: t,
info: token1.info,
});
super::Op::Retrieve(var_name)
}
}
},
crate::syntax::Token::Functional(fun) => {
// Call
match self.parse_function_call(fun.clone()) {
Ok(x) => super::Op::Call(x),
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Functional(fun),
info: token0.info,
}
], e));
}
}
}
crate::syntax::Token::Operation(unary_op) => {
// Unary operation
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Operation(unary_op),
info: token0.info,
}
], e));
}
};
super::Op::Unary(super::tree::Unary {
first: Box::new(inner_op),
op: unary_op,
})
},
crate::syntax::Token::Literal(literal) => {
super::Op::Literal(literal)
},
crate::syntax::Token::OpenRoundBracket => {
// Operation surrounded by brackets
let inner_op = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::OpenRoundBracket,
info: token0.info,
}
], e));
}
};
let token_last = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
// TODO include all tokens from inner_op
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::CloseRoundBracket = token_last.token {
super::Op::Bracketed(Box::new(inner_op))
} else {
// TODO maybe? include all tokens from inner_op
return Err(super::LanguageError::UnexpectedToken(token_last));
}
}
t => {
return Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: t,
info: token0.info,
}
]));
}
};
// check if operation continues (i.e. is dyadic)
let lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Operation(dyadic_op) = lookahead.token {
let op1 = match self.parse_inner_op(recursion_level + 1) {
Ok(op) => op,
Err(e) => {
return Err(Self::extend_err_tokens(vec![
// TODO add tokens of op0 too
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Operation(dyadic_op),
info: token0.info,
}
], e));
}
};
Ok(super::Op::Dyadic(super::Dyadic {
first: Box::new(op0),
op: dyadic_op,
second: Box::new(op1),
}))
} else {
self.lookahead = Some(lookahead);
Ok(op0)
}
}
fn parse_op_params(&mut self) -> Result<Vec<super::Op>, super::LanguageError> {
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
let mut ops = Vec::new();
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
self.lookahead = Some(lookahead);
ops.push(self.parse_op()?);
lookahead = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
// TODO add tokens of previous op(s)
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
}
self.lookahead = Some(lookahead);
Ok(ops)
}
// [!] no unhandled lookaheads
fn parse_param(&mut self) -> Result<super::Param, super::LanguageError> {
let token0 = if let Some(lookahead) = self.lookahead.take() {
lookahead
} else {
match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
}
};
if let crate::syntax::Token::Path(var_name) = token0.token {
let token1 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
}
])),
};
match token1.token {
crate::syntax::Token::Colon => {
// with type declaration
let token2 = match self.iter.next() {
Some(Err(e)) => return Err(e.into()),
Some(Ok(t)) => t,
None => return Err(super::LanguageError::UnexpectedEnd(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
}
])),
};
if let crate::syntax::Token::Path(ty_name) = token2.token {
Ok(super::Param {
name: var_name,
type_: Some(ty_name),
})
} else {
Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Colon,
info: token1.info,
},
token2,
]))
}
},
crate::syntax::Token::Semicolon => {
// without type declaration
Ok(super::Param {
name: var_name,
type_: None,
})
},
t => Err(super::LanguageError::InvalidSyntax(vec![
crate::syntax::SyntaxToken {
token: crate::syntax::Token::Path(var_name),
info: token0.info,
},
crate::syntax::SyntaxToken {
token: t,
info: token1.info,
}
]))
}
} else {
Err(super::LanguageError::InvalidSyntax(vec![token0]))
}
}
}
impl <'a> LanguageParser<'a, crate::syntax::TokenParser<'a, logos::Lexer<'a, crate::lexer::Token>>> {
pub fn lex(s: &'a str) -> Self {
Self::new(crate::syntax::TokenParser::new(crate::lexer::Token::tokenify(s)))
}