Create initial language parser
This commit is contained in:
commit
453c48b686
17 changed files with 2062 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
129
Cargo.lock
generated
Normal file
129
Cargo.lock
generated
Normal file
|
@ -0,0 +1,129 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-codegen"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex-syntax",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
|
||||
dependencies = [
|
||||
"logos-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "muss2"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "muss2-lang"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"logos",
|
||||
"pretty_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
|
||||
dependencies = [
|
||||
"diff",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.55"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "muss2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/lang"
|
||||
]
|
12
crates/lang/Cargo.toml
Normal file
12
crates/lang/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "muss2-lang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
logos = { version = "0.14" }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.3.0"
|
16
crates/lang/src/lexer/errors.rs
Normal file
16
crates/lang/src/lexer/errors.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy,)]
|
||||
pub enum LexError {
|
||||
#[default]
|
||||
UnrecognizedToken,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for LexError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
//use core::fmt::Write;
|
||||
match self {
|
||||
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexError {}
|
68
crates/lang/src/lexer/mod.rs
Normal file
68
crates/lang/src/lexer/mod.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
mod errors;
|
||||
pub use errors::LexError;
|
||||
|
||||
mod tokens;
|
||||
pub use tokens::{Token, TokenInfo};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> = . n_u_ :: is_a_ :: VaR1AbLe ( ) { } : ; -12345 12345.6789 \"char[]\" /* long\ncomment */ // short comment \n \n <>";
|
||||
|
||||
#[test]
|
||||
fn parse_everything() {
|
||||
let expected = vec![
|
||||
Token::Union(TokenInfo { line: 0, column: 0..1, index: 0..1 }),
|
||||
Token::Intersection(TokenInfo { line: 0, column: 2..3, index: 2..3 }),
|
||||
Token::Plus(TokenInfo { line: 0, column: 4..5, index: 4..5 }),
|
||||
Token::Minus(TokenInfo { line: 0, column: 6..7, index: 6..7 }),
|
||||
Token::Multiply(TokenInfo { line: 0, column: 8..9, index: 8..9 }),
|
||||
Token::Divide(TokenInfo { line: 0, column: 10..11, index: 10..11 }),
|
||||
Token::And(TokenInfo { line: 0, column: 12..14, index: 12..14 }),
|
||||
Token::Or(TokenInfo { line: 0, column: 15..17, index: 15..17 }),
|
||||
Token::Map(TokenInfo { line: 0, column: 18..20, index: 18..20 }),
|
||||
Token::Filter(TokenInfo { line: 0, column: 21..23, index: 21..23 }),
|
||||
Token::Sort(TokenInfo { line: 0, column: 24..26, index: 24..26 }),
|
||||
Token::Equal(TokenInfo { line: 0, column: 27..28, index: 27..28 }),
|
||||
Token::Dot(TokenInfo { line: 0, column: 29..30, index: 29..30 }),
|
||||
Token::Variable(("n_u_".into(), TokenInfo { line: 0, column: 31..35, index: 31..35 })),
|
||||
Token::PathSeparator(TokenInfo { line: 0, column: 36..38, index: 36..38 }),
|
||||
Token::Variable(("is_a_".into(), TokenInfo { line: 0, column: 39..44, index: 39..44 })),
|
||||
Token::PathSeparator(TokenInfo { line: 0, column: 45..47, index: 45..47 }),
|
||||
Token::Variable(("VaR1AbLe".into(), TokenInfo { line: 0, column: 48..56, index: 48..56 })),
|
||||
Token::OpenRoundBracket(TokenInfo { line: 0, column: 57..58, index: 57..58 }),
|
||||
Token::CloseRoundBracket(TokenInfo { line: 0, column: 59..60, index: 59..60 }),
|
||||
Token::OpenCurlyBracket(TokenInfo { line: 0, column: 61..62, index: 61..62 }),
|
||||
Token::CloseCurlyBracket(TokenInfo { line: 0, column: 63..64, index: 63..64 }),
|
||||
Token::Colon(TokenInfo { line: 0, column: 65..66, index: 65..66 }),
|
||||
Token::Semicolon(TokenInfo { line: 0, column: 67..68, index: 67..68 }),
|
||||
Token::Integer((-12345, TokenInfo { line: 0, column: 69..75, index: 69..75 })),
|
||||
Token::Float((12345.6789, TokenInfo { line: 0, column: 76..86, index: 76..86 })),
|
||||
Token::String(("char[]".into(), TokenInfo { line: 0, column: 87..95, index: 87..95 })),
|
||||
Token::LongComment((" long\ncomment ".into(), TokenInfo { line: 0, column: 96..114, index: 96..114 })),
|
||||
Token::ShortComment((" short comment ".into(), TokenInfo { line: 1, column: 11..29, index: 115..133 })),
|
||||
Token::Newline(TokenInfo { line: 2, column: 1..2, index: 134..135 }),
|
||||
Token::Generate(TokenInfo { line: 3, column: 1..3, index: 136..138 }),
|
||||
];
|
||||
|
||||
let mut actual = Vec::new();
|
||||
for (index, token_result) in Token::tokenify(ALL_TOKENS_STR).enumerate() {
|
||||
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
|
||||
actual.push(token_result.unwrap());
|
||||
}
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_reversability() {
|
||||
let expected = format!("{} ", ALL_TOKENS_STR);
|
||||
|
||||
let actual = Token::stringify(Token::tokenify(&expected).map(|token_result| token_result.unwrap()));
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
}
|
240
crates/lang/src/lexer/tokens.rs
Normal file
240
crates/lang/src/lexer/tokens.rs
Normal file
|
@ -0,0 +1,240 @@
|
|||
use logos::Logos;
|
||||
|
||||
#[derive(Default, PartialEq, Clone)]
|
||||
pub struct ExtraState {
|
||||
line: usize,
|
||||
line_start: usize,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl ExtraState {
|
||||
fn lexer_sync(lex: &mut logos::Lexer<Token>) {
|
||||
let span = lex.span();
|
||||
lex.extras.start = span.start;
|
||||
lex.extras.end = span.end;
|
||||
}
|
||||
|
||||
fn newline(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
Self::lexer_sync(lex);
|
||||
let info = lex.extras.token_info();
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().end;
|
||||
info
|
||||
}
|
||||
|
||||
fn token_info(&self) -> TokenInfo {
|
||||
TokenInfo {
|
||||
line: self.line,
|
||||
column: (self.start - self.line_start)..(self.end - self.line_start),
|
||||
index: self.start..self.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct TokenInfo {
|
||||
pub line: usize,
|
||||
pub column: core::ops::Range<usize>,
|
||||
pub index: core::ops::Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Clone)]
|
||||
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
|
||||
#[logos(error = super::LexError)]
|
||||
#[logos(extras = ExtraState)]
|
||||
pub enum Token {
|
||||
// Operands
|
||||
// Set operations
|
||||
#[token("u", priority = 99, callback = all_cb)]
|
||||
Union(TokenInfo),
|
||||
#[token("n", priority = 99, callback = all_cb)]
|
||||
Intersection(TokenInfo),
|
||||
// Arithmetic operations (also applicable to sets)
|
||||
#[token("+", callback = all_cb)]
|
||||
Plus(TokenInfo),
|
||||
#[token("-", callback = all_cb)]
|
||||
Minus(TokenInfo),
|
||||
#[token("*", callback = all_cb)]
|
||||
Multiply(TokenInfo),
|
||||
#[token("/", callback = all_cb)]
|
||||
Divide(TokenInfo),
|
||||
// Logical operations
|
||||
#[token("&&", callback = all_cb)]
|
||||
And(TokenInfo),
|
||||
#[token("||", callback = all_cb)]
|
||||
Or(TokenInfo),
|
||||
|
||||
// Functional
|
||||
#[token("=>", callback = all_cb)]
|
||||
Map(TokenInfo),
|
||||
#[token("x>", callback = all_cb)]
|
||||
Filter(TokenInfo),
|
||||
#[token("~>", callback = all_cb)]
|
||||
Sort(TokenInfo),
|
||||
#[token("<>", callback = all_cb)]
|
||||
Generate(TokenInfo),
|
||||
|
||||
// Declarations
|
||||
|
||||
|
||||
// Basics
|
||||
#[token("=", callback = all_cb)]
|
||||
Equal(TokenInfo),
|
||||
#[token("::", callback = all_cb)]
|
||||
PathSeparator(TokenInfo),
|
||||
#[token(".", callback = all_cb)]
|
||||
Dot(TokenInfo),
|
||||
#[regex("[a-zA-Z_][a-zA-Z_0-9]*", priority = 1, callback = variable_cb)]
|
||||
Variable((String, TokenInfo)),
|
||||
#[token("(", callback = all_cb)]
|
||||
OpenRoundBracket(TokenInfo),
|
||||
#[token(")", callback = all_cb)]
|
||||
CloseRoundBracket(TokenInfo),
|
||||
#[token("{", callback = all_cb)]
|
||||
OpenCurlyBracket(TokenInfo),
|
||||
#[token("}", callback = all_cb)]
|
||||
CloseCurlyBracket(TokenInfo),
|
||||
#[token(":", callback = all_cb)]
|
||||
Colon(TokenInfo),
|
||||
#[token(";", callback = all_cb)]
|
||||
Semicolon(TokenInfo),
|
||||
|
||||
// Literals
|
||||
#[regex("-?[1-9][0-9]*", priority = 1, callback = integer_cb)]
|
||||
Integer((i64, TokenInfo)),
|
||||
#[regex("-?[1-9][0-9]*\\.[0-9]+", priority = 99, callback = float_cb)]
|
||||
Float((f64, TokenInfo)),
|
||||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, priority = 1, callback = string_cb)]
|
||||
String((String, TokenInfo)),
|
||||
|
||||
/// Comments
|
||||
#[regex(r#"\/\*([^\*]+(\*[^\/])?)*\*\/"#, priority = 1, callback = multiline_comment_cb)]
|
||||
LongComment((String, TokenInfo)),
|
||||
#[regex("\\/\\/[^\n]*\n", priority = 1, callback = oneline_comment_cb)]
|
||||
ShortComment((String, TokenInfo)),
|
||||
|
||||
/// Ignore
|
||||
#[regex(r"\n", newline_cb)]
|
||||
Newline(TokenInfo),
|
||||
}
|
||||
|
||||
fn all_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
ExtraState::lexer_sync(lex);
|
||||
lex.extras.token_info()
|
||||
}
|
||||
|
||||
fn variable_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.to_owned(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn integer_cb(lex: &mut logos::Lexer<Token>) -> (i64, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.parse().unwrap(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn float_cb(lex: &mut logos::Lexer<Token>) -> (f64, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.parse().unwrap(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn string_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
// TODO handle escaped chars
|
||||
(slice[1..slice.len()-1].to_owned(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn multiline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
let info = all_cb(lex);
|
||||
for (i, c) in slice.chars().enumerate() {
|
||||
if c == '\n' {
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().start + i + 1;
|
||||
}
|
||||
}
|
||||
(slice[2..slice.len()-2].to_owned(), info)
|
||||
}
|
||||
|
||||
fn oneline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
let info = all_cb(lex);
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().end;
|
||||
(slice[2..slice.len()-1].to_owned(), info)
|
||||
}
|
||||
|
||||
fn newline_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
ExtraState::newline(lex)
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn tokenify<'a>(s: &'a str) -> logos::Lexer<'a, Self> {
|
||||
Token::lexer(s)
|
||||
}
|
||||
|
||||
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> String {
|
||||
let mut s = String::new();
|
||||
self.write_str(&mut s).unwrap();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
|
||||
use core::fmt::Write;
|
||||
match self {
|
||||
Self::Union(_) => write!(result, "u"),
|
||||
Self::Intersection(_) => write!(result, "n"),
|
||||
Self::Plus(_) => write!(result, "+"),
|
||||
Self::Minus(_) => write!(result, "-"),
|
||||
Self::Multiply(_) => write!(result, "*"),
|
||||
Self::Divide(_) => write!(result, "/"),
|
||||
Self::And(_) => write!(result, "&&"),
|
||||
Self::Or(_) => write!(result, "||"),
|
||||
Self::Map(_) => write!(result, "=>"),
|
||||
Self::Filter(_) => write!(result, "x>"),
|
||||
Self::Sort(_) => write!(result, "~>"),
|
||||
Self::Generate(_) => write!(result, "<>"),
|
||||
Self::Equal(_) => write!(result, "="),
|
||||
Self::PathSeparator(_) => write!(result, "::"),
|
||||
Self::Dot(_) => write!(result, "."),
|
||||
Self::Variable((name, _)) => write!(result, "{}", name),
|
||||
Self::OpenRoundBracket(_) => write!(result, "("),
|
||||
Self::CloseRoundBracket(_) => write!(result, ")"),
|
||||
Self::OpenCurlyBracket(_) => write!(result, "{{"),
|
||||
Self::CloseCurlyBracket(_) => write!(result, "}}"),
|
||||
Self::Colon(_) => write!(result, ":"),
|
||||
Self::Semicolon(_) => write!(result, ";"),
|
||||
Self::Integer((int, _)) => write!(result, "{}", int),
|
||||
Self::Float((float, _)) => write!(result, "{}", float),
|
||||
Self::String((s, _)) => write!(result, "\"{}\"", s),
|
||||
Self::LongComment((c, _)) => write!(result, "/*{}*/", c),
|
||||
Self::ShortComment((c, _)) => write!(result, "//{}\n", c),
|
||||
Self::Newline(_) => write!(result, "\n"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
matches!(self, Self::Newline(_))
|
||||
}
|
||||
}
|
7
crates/lang/src/lib.rs
Normal file
7
crates/lang/src/lib.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
//! Language specification
|
||||
//!
|
||||
//! Parsing order: lexer -> syntax -> statement
|
||||
|
||||
pub mod lexer;
|
||||
pub mod statement;
|
||||
pub mod syntax;
|
32
crates/lang/src/statement/errors.rs
Normal file
32
crates/lang/src/statement/errors.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum LanguageError {
|
||||
InvalidSequence(Vec<crate::lexer::Token>),
|
||||
InvalidSyntax(Vec<crate::syntax::SyntaxToken>),
|
||||
UnexpectedEnd(Vec<crate::syntax::SyntaxToken>),
|
||||
UnexpectedToken(crate::syntax::SyntaxToken),
|
||||
UnrecognizedToken,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for LanguageError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
//use core::fmt::Write;
|
||||
match self {
|
||||
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
|
||||
Self::InvalidSyntax(seq) => write!(f, "Invalid syntax {}", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
|
||||
Self::UnexpectedEnd(seq) => write!(f, "Unexpected end of file {} <EOF>", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
|
||||
Self::UnexpectedToken(token) => write!(f, "Unexpected token {}", token.as_str()),
|
||||
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LanguageError {}
|
||||
|
||||
impl From<crate::syntax::SyntaxError> for LanguageError {
|
||||
fn from(value: crate::syntax::SyntaxError) -> Self {
|
||||
match value {
|
||||
crate::syntax::SyntaxError::UnrecognizedToken => Self::UnrecognizedToken,
|
||||
crate::syntax::SyntaxError::InvalidSequence(seq) => Self::InvalidSequence(seq),
|
||||
}
|
||||
}
|
||||
}
|
133
crates/lang/src/statement/mod.rs
Normal file
133
crates/lang/src/statement/mod.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
//! High-level language
|
||||
mod errors;
|
||||
pub use errors::LanguageError;
|
||||
|
||||
mod parser;
|
||||
pub use parser::LanguageParser;
|
||||
|
||||
mod tree;
|
||||
pub use tree::{Statement, Notification, Param, Declare, DeclareFun, DeclareType, Module, Op, DeclareAssignVar, DeclareVar, AssignVar, Dyadic, CallFun};
|
||||
|
||||
// my_namespace { <>generate_fn()=>map_fn()x>filter_fn(_)~>sort_fn(_) }
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assert_no_errors(iter: impl Iterator<Item=Result<Statement, LanguageError>>) -> Vec<Statement> {
|
||||
let mut statements = Vec::new();
|
||||
for (i, res) in iter.enumerate() {
|
||||
match res {
|
||||
Ok(statement) => if !statement.is_ignore() { statements.push(statement); },
|
||||
Err(e) => {
|
||||
let e_display = e.to_string();
|
||||
match e {
|
||||
LanguageError::InvalidSequence(_seq) => {},
|
||||
LanguageError::InvalidSyntax(seq) => {
|
||||
let bad_syntax = seq.last().expect("Empty invalid syntax token sequence");
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnexpectedEnd(seq) => {
|
||||
let bad_syntax = seq.last().expect("Empty unexpected end token sequence");
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnexpectedToken(bad_syntax) => {
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnrecognizedToken => {
|
||||
eprintln!("Unrecognized token #{} ?!?!", i);
|
||||
}
|
||||
}
|
||||
panic!("{} for token #{}", e_display, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
statements
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_module() {
|
||||
let parser = LanguageParser::lex("my_module {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Module(Module {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_module".into(),
|
||||
]),
|
||||
inner: Vec::new(),
|
||||
}),
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_function_declaration() {
|
||||
let parser = LanguageParser::lex("my_generator () <> {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Declare(Declare::Function(DeclareFun {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_generator".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
type_: crate::syntax::Functional::Generate,
|
||||
ops: Vec::new(),
|
||||
}))
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_type_declaration() {
|
||||
let parser = LanguageParser::lex("my_type = {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Declare(Declare::Type(DeclareType {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_type".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
}))
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_entrypoint() {
|
||||
let parser = LanguageParser::lex("<> my_generator ()");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Entrypoint(CallFun {
|
||||
type_: crate::syntax::Functional::Generate,
|
||||
var: crate::syntax::Path(vec![
|
||||
"my_generator".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
})
|
||||
], parsed);
|
||||
}
|
||||
}
|
769
crates/lang/src/statement/parser.rs
Normal file
769
crates/lang/src/statement/parser.rs
Normal file
|
@ -0,0 +1,769 @@
|
|||
pub struct LanguageParser<'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> {
|
||||
_idc: core::marker::PhantomData<&'a ()>,
|
||||
iter: I,
|
||||
lookahead: Option<crate::syntax::SyntaxToken>,
|
||||
incomplete_modules: Vec<super::Module>,
|
||||
}
|
||||
|
||||
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> LanguageParser<'a, I> {
|
||||
pub fn new(tokens_in: I) -> Self {
|
||||
Self {
|
||||
_idc: Default::default(),
|
||||
iter: tokens_in,
|
||||
lookahead: None,
|
||||
incomplete_modules: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_incomplete_module(&mut self, name: crate::syntax::Path) -> Result<super::Module, super::LanguageError> {
|
||||
Ok(super::Module { name, inner: Vec::new() })
|
||||
}
|
||||
|
||||
fn parse_function_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareFun, super::LanguageError> {
|
||||
// `Name (` (first 2 tokens) are already consumed
|
||||
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
// function params
|
||||
let mut params = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
params.push(self.parse_param()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
// function type
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let fn_type = if let crate::syntax::Token::Functional(fn_type) = token1.token {
|
||||
fn_type
|
||||
} else {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![token1]));
|
||||
};
|
||||
// operations
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
if !matches!(token2.token, crate::syntax::Token::OpenCurlyBracket) {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Functional(fn_type),
|
||||
info: token1.info,
|
||||
},
|
||||
token2
|
||||
]));
|
||||
}
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let mut ops = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
ops.push(self.parse_op()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
Ok(super::DeclareFun {
|
||||
name,
|
||||
params,
|
||||
type_: fn_type,
|
||||
ops,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_type_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareType, super::LanguageError> {
|
||||
// `Name =` (first 2 tokens) are already consumed
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
match token0.token {
|
||||
crate::syntax::Token::OpenCurlyBracket => {
|
||||
let mut lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let mut params = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
params.push(self.parse_param()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
Ok(super::DeclareType {
|
||||
name,
|
||||
params,
|
||||
})
|
||||
}
|
||||
t => Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token0.info,
|
||||
}
|
||||
]))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_function_call(&mut self, fun: crate::syntax::Functional) -> Result<super::CallFun, super::LanguageError> {
|
||||
// `Func` (first token) is already consumed
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let name = if let crate::syntax::Token::Path(name) = token0.token {
|
||||
name
|
||||
} else {
|
||||
return Err(super::LanguageError::UnexpectedToken(token0));
|
||||
};
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
if !matches!(token1.token, crate::syntax::Token::OpenRoundBracket) {
|
||||
return Err(super::LanguageError::UnexpectedToken(token1));
|
||||
}
|
||||
let op_params = match self.parse_op_params() {
|
||||
Ok(ops) => ops,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token1.info,
|
||||
},
|
||||
], e));
|
||||
}
|
||||
};
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if !matches!(token_last.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
return Err(super::LanguageError::UnexpectedToken(token1));
|
||||
}
|
||||
Ok(super::CallFun {
|
||||
type_: fun,
|
||||
var: name,
|
||||
params: op_params,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_op(&mut self) -> Result<super::Op, super::LanguageError> {
|
||||
let op = self.parse_inner_op(0)?;
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Semicolon = token_last.token {
|
||||
Ok(op)
|
||||
} else {
|
||||
Err(super::LanguageError::UnexpectedToken(token_last))
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_err_tokens(mut tokens: Vec<crate::syntax::SyntaxToken>, err: super::LanguageError) -> super::LanguageError {
|
||||
match err {
|
||||
super::LanguageError::InvalidSyntax(mut seq) => {
|
||||
tokens.append(&mut seq);
|
||||
super::LanguageError::InvalidSyntax(tokens)
|
||||
},
|
||||
super::LanguageError::UnexpectedEnd(mut seq) => {
|
||||
tokens.append(&mut seq);
|
||||
super::LanguageError::UnexpectedEnd(tokens)
|
||||
}
|
||||
e => e
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_inner_op(&mut self, recursion_level: usize) -> Result<super::Op, super::LanguageError> {
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let op0 = match token0.token {
|
||||
crate::syntax::Token::Path(var_name) => {
|
||||
// variable-oriented operations
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
match token1.token {
|
||||
crate::syntax::Token::Colon => {
|
||||
// Declare-assign or declare
|
||||
let mut lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
])),
|
||||
};
|
||||
let type_name = if let crate::syntax::Token::Path(type_name) = lookahead.token {
|
||||
let type_token_info = lookahead.info;
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(type_name),
|
||||
info: type_token_info,
|
||||
},
|
||||
])),
|
||||
};
|
||||
Some((type_name, type_token_info))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let crate::syntax::Token::Equal = lookahead.token {
|
||||
// Declare-Assign
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
// roughly equivalent to self.parse_inner_op(...).map_err(|e| { ... })
|
||||
// (the closure captures variables which the compiler can't prove aren't used in this fn after)
|
||||
let tokens = if let Some((type_name, type_token_info)) = type_name {
|
||||
vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(type_name),
|
||||
info: type_token_info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: lookahead.info,
|
||||
},
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: lookahead.info,
|
||||
},
|
||||
]
|
||||
};
|
||||
return Err(Self::extend_err_tokens(tokens, e));
|
||||
}
|
||||
};
|
||||
super::Op::DeclareAssign(super::tree::DeclareAssignVar {
|
||||
var: var_name,
|
||||
type_: type_name.map(|x| x.0),
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
} else {
|
||||
// declare
|
||||
self.lookahead = Some(lookahead);
|
||||
super::Op::Declare(super::tree::DeclareVar {
|
||||
var: var_name,
|
||||
type_: type_name.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Equal => {
|
||||
// Assign
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Field(f) = token2.token {
|
||||
let inner_op = self.parse_inner_op(recursion_level + 1)?;
|
||||
super::Op::Assign(crate::statement::tree::AssignVar {
|
||||
var: var_name,
|
||||
field: Some(f),
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
} else {
|
||||
self.lookahead = Some(token2);
|
||||
let inner_op = self.parse_inner_op(recursion_level + 1)?;
|
||||
super::Op::Assign(crate::statement::tree::AssignVar {
|
||||
var: var_name,
|
||||
field: None,
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
}
|
||||
},
|
||||
/*crate::syntax::Token::OpenRoundBracket => {
|
||||
// Call
|
||||
let op_params = match self.parse_op_params() {
|
||||
Ok(ops) => ops,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token1.info,
|
||||
},
|
||||
], e));
|
||||
}
|
||||
};
|
||||
self.lookahead.take().unwrap(); // always a closing round bracket; no need to verify
|
||||
let token_filter = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
},
|
||||
// TODO include tokens from op_params
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Functional(crate::syntax::Functional::Filter) = token_filter.token {
|
||||
let filter_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
},
|
||||
// TODO include tokens from op_params
|
||||
], e))
|
||||
}
|
||||
};
|
||||
super::Op::Call(super::CallVar {
|
||||
var: var_name,
|
||||
params: op_params,
|
||||
})
|
||||
} else {
|
||||
return Err(super::LanguageError::UnexpectedToken(token_filter));
|
||||
}
|
||||
}*/
|
||||
t => {
|
||||
// Retrieve
|
||||
self.lookahead = Some(crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token1.info,
|
||||
});
|
||||
super::Op::Retrieve(var_name)
|
||||
}
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Functional(fun) => {
|
||||
// Call
|
||||
match self.parse_function_call(fun.clone()) {
|
||||
Ok(x) => super::Op::Call(x),
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Functional(fun),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
}
|
||||
}
|
||||
crate::syntax::Token::Operation(unary_op) => {
|
||||
// Unary operation
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Operation(unary_op),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
super::Op::Unary(super::tree::Unary {
|
||||
first: Box::new(inner_op),
|
||||
op: unary_op,
|
||||
})
|
||||
},
|
||||
crate::syntax::Token::Literal(literal) => {
|
||||
super::Op::Literal(literal)
|
||||
},
|
||||
crate::syntax::Token::OpenRoundBracket => {
|
||||
// Operation surrounded by brackets
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
// TODO include all tokens from inner_op
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::CloseRoundBracket = token_last.token {
|
||||
super::Op::Bracketed(Box::new(inner_op))
|
||||
} else {
|
||||
// TODO maybe? include all tokens from inner_op
|
||||
return Err(super::LanguageError::UnexpectedToken(token_last));
|
||||
}
|
||||
}
|
||||
t => {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token0.info,
|
||||
}
|
||||
]));
|
||||
}
|
||||
};
|
||||
|
||||
// check if operation continues (i.e. is dyadic)
|
||||
let lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Operation(dyadic_op) = lookahead.token {
|
||||
let op1 = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
// TODO add tokens of op0 too
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Operation(dyadic_op),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
Ok(super::Op::Dyadic(super::Dyadic {
|
||||
first: Box::new(op0),
|
||||
op: dyadic_op,
|
||||
second: Box::new(op1),
|
||||
}))
|
||||
} else {
|
||||
self.lookahead = Some(lookahead);
|
||||
Ok(op0)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_op_params(&mut self) -> Result<Vec<super::Op>, super::LanguageError> {
|
||||
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let mut ops = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
ops.push(self.parse_op()?);
|
||||
lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
// TODO add tokens of previous op(s)
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
}
|
||||
self.lookahead = Some(lookahead);
|
||||
Ok(ops)
|
||||
}
|
||||
|
||||
// [!] no unhandled lookaheads
|
||||
fn parse_param(&mut self) -> Result<super::Param, super::LanguageError> {
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Path(var_name) = token0.token {
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
match token1.token {
|
||||
crate::syntax::Token::Colon => {
|
||||
// with type declaration
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Path(ty_name) = token2.token {
|
||||
Ok(super::Param {
|
||||
name: var_name,
|
||||
type_: Some(ty_name),
|
||||
})
|
||||
} else {
|
||||
Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
token2,
|
||||
]))
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Semicolon => {
|
||||
// without type declaration
|
||||
Ok(super::Param {
|
||||
name: var_name,
|
||||
type_: None,
|
||||
})
|
||||
},
|
||||
t => Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token1.info,
|
||||
}
|
||||
]))
|
||||
}
|
||||
} else {
|
||||
Err(super::LanguageError::InvalidSyntax(vec![token0]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl <'a> LanguageParser<'a, crate::syntax::TokenParser<'a, logos::Lexer<'a, crate::lexer::Token>>> {
|
||||
pub fn lex(s: &'a str) -> Self {
|
||||
Self::new(crate::syntax::TokenParser::new(crate::lexer::Token::tokenify(s)))
|
||||
}
|
||||