Create initial language parser
This commit is contained in:
commit
453c48b686
17 changed files with 2062 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
129
Cargo.lock
generated
Normal file
129
Cargo.lock
generated
Normal file
|
@ -0,0 +1,129 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-codegen"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex-syntax",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
|
||||
dependencies = [
|
||||
"logos-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "muss2"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "muss2-lang"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"logos",
|
||||
"pretty_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
|
||||
dependencies = [
|
||||
"diff",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.55"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "muss2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/lang"
|
||||
]
|
12
crates/lang/Cargo.toml
Normal file
12
crates/lang/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "muss2-lang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
logos = { version = "0.14" }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.3.0"
|
16
crates/lang/src/lexer/errors.rs
Normal file
16
crates/lang/src/lexer/errors.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy,)]
|
||||
pub enum LexError {
|
||||
#[default]
|
||||
UnrecognizedToken,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for LexError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
//use core::fmt::Write;
|
||||
match self {
|
||||
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexError {}
|
68
crates/lang/src/lexer/mod.rs
Normal file
68
crates/lang/src/lexer/mod.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
mod errors;
|
||||
pub use errors::LexError;
|
||||
|
||||
mod tokens;
|
||||
pub use tokens::{Token, TokenInfo};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> = . n_u_ :: is_a_ :: VaR1AbLe ( ) { } : ; -12345 12345.6789 \"char[]\" /* long\ncomment */ // short comment \n \n <>";
|
||||
|
||||
#[test]
|
||||
fn parse_everything() {
|
||||
let expected = vec![
|
||||
Token::Union(TokenInfo { line: 0, column: 0..1, index: 0..1 }),
|
||||
Token::Intersection(TokenInfo { line: 0, column: 2..3, index: 2..3 }),
|
||||
Token::Plus(TokenInfo { line: 0, column: 4..5, index: 4..5 }),
|
||||
Token::Minus(TokenInfo { line: 0, column: 6..7, index: 6..7 }),
|
||||
Token::Multiply(TokenInfo { line: 0, column: 8..9, index: 8..9 }),
|
||||
Token::Divide(TokenInfo { line: 0, column: 10..11, index: 10..11 }),
|
||||
Token::And(TokenInfo { line: 0, column: 12..14, index: 12..14 }),
|
||||
Token::Or(TokenInfo { line: 0, column: 15..17, index: 15..17 }),
|
||||
Token::Map(TokenInfo { line: 0, column: 18..20, index: 18..20 }),
|
||||
Token::Filter(TokenInfo { line: 0, column: 21..23, index: 21..23 }),
|
||||
Token::Sort(TokenInfo { line: 0, column: 24..26, index: 24..26 }),
|
||||
Token::Equal(TokenInfo { line: 0, column: 27..28, index: 27..28 }),
|
||||
Token::Dot(TokenInfo { line: 0, column: 29..30, index: 29..30 }),
|
||||
Token::Variable(("n_u_".into(), TokenInfo { line: 0, column: 31..35, index: 31..35 })),
|
||||
Token::PathSeparator(TokenInfo { line: 0, column: 36..38, index: 36..38 }),
|
||||
Token::Variable(("is_a_".into(), TokenInfo { line: 0, column: 39..44, index: 39..44 })),
|
||||
Token::PathSeparator(TokenInfo { line: 0, column: 45..47, index: 45..47 }),
|
||||
Token::Variable(("VaR1AbLe".into(), TokenInfo { line: 0, column: 48..56, index: 48..56 })),
|
||||
Token::OpenRoundBracket(TokenInfo { line: 0, column: 57..58, index: 57..58 }),
|
||||
Token::CloseRoundBracket(TokenInfo { line: 0, column: 59..60, index: 59..60 }),
|
||||
Token::OpenCurlyBracket(TokenInfo { line: 0, column: 61..62, index: 61..62 }),
|
||||
Token::CloseCurlyBracket(TokenInfo { line: 0, column: 63..64, index: 63..64 }),
|
||||
Token::Colon(TokenInfo { line: 0, column: 65..66, index: 65..66 }),
|
||||
Token::Semicolon(TokenInfo { line: 0, column: 67..68, index: 67..68 }),
|
||||
Token::Integer((-12345, TokenInfo { line: 0, column: 69..75, index: 69..75 })),
|
||||
Token::Float((12345.6789, TokenInfo { line: 0, column: 76..86, index: 76..86 })),
|
||||
Token::String(("char[]".into(), TokenInfo { line: 0, column: 87..95, index: 87..95 })),
|
||||
Token::LongComment((" long\ncomment ".into(), TokenInfo { line: 0, column: 96..114, index: 96..114 })),
|
||||
Token::ShortComment((" short comment ".into(), TokenInfo { line: 1, column: 11..29, index: 115..133 })),
|
||||
Token::Newline(TokenInfo { line: 2, column: 1..2, index: 134..135 }),
|
||||
Token::Generate(TokenInfo { line: 3, column: 1..3, index: 136..138 }),
|
||||
];
|
||||
|
||||
let mut actual = Vec::new();
|
||||
for (index, token_result) in Token::tokenify(ALL_TOKENS_STR).enumerate() {
|
||||
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
|
||||
actual.push(token_result.unwrap());
|
||||
}
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_reversability() {
|
||||
let expected = format!("{} ", ALL_TOKENS_STR);
|
||||
|
||||
let actual = Token::stringify(Token::tokenify(&expected).map(|token_result| token_result.unwrap()));
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
}
|
240
crates/lang/src/lexer/tokens.rs
Normal file
240
crates/lang/src/lexer/tokens.rs
Normal file
|
@ -0,0 +1,240 @@
|
|||
use logos::Logos;
|
||||
|
||||
#[derive(Default, PartialEq, Clone)]
|
||||
pub struct ExtraState {
|
||||
line: usize,
|
||||
line_start: usize,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl ExtraState {
|
||||
fn lexer_sync(lex: &mut logos::Lexer<Token>) {
|
||||
let span = lex.span();
|
||||
lex.extras.start = span.start;
|
||||
lex.extras.end = span.end;
|
||||
}
|
||||
|
||||
fn newline(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
Self::lexer_sync(lex);
|
||||
let info = lex.extras.token_info();
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().end;
|
||||
info
|
||||
}
|
||||
|
||||
fn token_info(&self) -> TokenInfo {
|
||||
TokenInfo {
|
||||
line: self.line,
|
||||
column: (self.start - self.line_start)..(self.end - self.line_start),
|
||||
index: self.start..self.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct TokenInfo {
|
||||
pub line: usize,
|
||||
pub column: core::ops::Range<usize>,
|
||||
pub index: core::ops::Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Clone)]
|
||||
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
|
||||
#[logos(error = super::LexError)]
|
||||
#[logos(extras = ExtraState)]
|
||||
pub enum Token {
|
||||
// Operands
|
||||
// Set operations
|
||||
#[token("u", priority = 99, callback = all_cb)]
|
||||
Union(TokenInfo),
|
||||
#[token("n", priority = 99, callback = all_cb)]
|
||||
Intersection(TokenInfo),
|
||||
// Arithmetic operations (also applicable to sets)
|
||||
#[token("+", callback = all_cb)]
|
||||
Plus(TokenInfo),
|
||||
#[token("-", callback = all_cb)]
|
||||
Minus(TokenInfo),
|
||||
#[token("*", callback = all_cb)]
|
||||
Multiply(TokenInfo),
|
||||
#[token("/", callback = all_cb)]
|
||||
Divide(TokenInfo),
|
||||
// Logical operations
|
||||
#[token("&&", callback = all_cb)]
|
||||
And(TokenInfo),
|
||||
#[token("||", callback = all_cb)]
|
||||
Or(TokenInfo),
|
||||
|
||||
// Functional
|
||||
#[token("=>", callback = all_cb)]
|
||||
Map(TokenInfo),
|
||||
#[token("x>", callback = all_cb)]
|
||||
Filter(TokenInfo),
|
||||
#[token("~>", callback = all_cb)]
|
||||
Sort(TokenInfo),
|
||||
#[token("<>", callback = all_cb)]
|
||||
Generate(TokenInfo),
|
||||
|
||||
// Declarations
|
||||
|
||||
|
||||
// Basics
|
||||
#[token("=", callback = all_cb)]
|
||||
Equal(TokenInfo),
|
||||
#[token("::", callback = all_cb)]
|
||||
PathSeparator(TokenInfo),
|
||||
#[token(".", callback = all_cb)]
|
||||
Dot(TokenInfo),
|
||||
#[regex("[a-zA-Z_][a-zA-Z_0-9]*", priority = 1, callback = variable_cb)]
|
||||
Variable((String, TokenInfo)),
|
||||
#[token("(", callback = all_cb)]
|
||||
OpenRoundBracket(TokenInfo),
|
||||
#[token(")", callback = all_cb)]
|
||||
CloseRoundBracket(TokenInfo),
|
||||
#[token("{", callback = all_cb)]
|
||||
OpenCurlyBracket(TokenInfo),
|
||||
#[token("}", callback = all_cb)]
|
||||
CloseCurlyBracket(TokenInfo),
|
||||
#[token(":", callback = all_cb)]
|
||||
Colon(TokenInfo),
|
||||
#[token(";", callback = all_cb)]
|
||||
Semicolon(TokenInfo),
|
||||
|
||||
// Literals
|
||||
#[regex("-?[1-9][0-9]*", priority = 1, callback = integer_cb)]
|
||||
Integer((i64, TokenInfo)),
|
||||
#[regex("-?[1-9][0-9]*\\.[0-9]+", priority = 99, callback = float_cb)]
|
||||
Float((f64, TokenInfo)),
|
||||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, priority = 1, callback = string_cb)]
|
||||
String((String, TokenInfo)),
|
||||
|
||||
/// Comments
|
||||
#[regex(r#"\/\*([^\*]+(\*[^\/])?)*\*\/"#, priority = 1, callback = multiline_comment_cb)]
|
||||
LongComment((String, TokenInfo)),
|
||||
#[regex("\\/\\/[^\n]*\n", priority = 1, callback = oneline_comment_cb)]
|
||||
ShortComment((String, TokenInfo)),
|
||||
|
||||
/// Ignore
|
||||
#[regex(r"\n", newline_cb)]
|
||||
Newline(TokenInfo),
|
||||
}
|
||||
|
||||
fn all_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
ExtraState::lexer_sync(lex);
|
||||
lex.extras.token_info()
|
||||
}
|
||||
|
||||
fn variable_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.to_owned(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn integer_cb(lex: &mut logos::Lexer<Token>) -> (i64, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.parse().unwrap(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn float_cb(lex: &mut logos::Lexer<Token>) -> (f64, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
(slice.parse().unwrap(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn string_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
// TODO handle escaped chars
|
||||
(slice[1..slice.len()-1].to_owned(), all_cb(lex))
|
||||
}
|
||||
|
||||
fn multiline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
let info = all_cb(lex);
|
||||
for (i, c) in slice.chars().enumerate() {
|
||||
if c == '\n' {
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().start + i + 1;
|
||||
}
|
||||
}
|
||||
(slice[2..slice.len()-2].to_owned(), info)
|
||||
}
|
||||
|
||||
fn oneline_comment_cb(lex: &mut logos::Lexer<Token>) -> (String, TokenInfo) {
|
||||
let slice = lex.slice();
|
||||
let info = all_cb(lex);
|
||||
lex.extras.line += 1;
|
||||
lex.extras.line_start = lex.span().end;
|
||||
(slice[2..slice.len()-1].to_owned(), info)
|
||||
}
|
||||
|
||||
fn newline_cb(lex: &mut logos::Lexer<Token>) -> TokenInfo {
|
||||
ExtraState::newline(lex)
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn tokenify<'a>(s: &'a str) -> logos::Lexer<'a, Self> {
|
||||
Token::lexer(s)
|
||||
}
|
||||
|
||||
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> String {
|
||||
let mut s = String::new();
|
||||
self.write_str(&mut s).unwrap();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
|
||||
use core::fmt::Write;
|
||||
match self {
|
||||
Self::Union(_) => write!(result, "u"),
|
||||
Self::Intersection(_) => write!(result, "n"),
|
||||
Self::Plus(_) => write!(result, "+"),
|
||||
Self::Minus(_) => write!(result, "-"),
|
||||
Self::Multiply(_) => write!(result, "*"),
|
||||
Self::Divide(_) => write!(result, "/"),
|
||||
Self::And(_) => write!(result, "&&"),
|
||||
Self::Or(_) => write!(result, "||"),
|
||||
Self::Map(_) => write!(result, "=>"),
|
||||
Self::Filter(_) => write!(result, "x>"),
|
||||
Self::Sort(_) => write!(result, "~>"),
|
||||
Self::Generate(_) => write!(result, "<>"),
|
||||
Self::Equal(_) => write!(result, "="),
|
||||
Self::PathSeparator(_) => write!(result, "::"),
|
||||
Self::Dot(_) => write!(result, "."),
|
||||
Self::Variable((name, _)) => write!(result, "{}", name),
|
||||
Self::OpenRoundBracket(_) => write!(result, "("),
|
||||
Self::CloseRoundBracket(_) => write!(result, ")"),
|
||||
Self::OpenCurlyBracket(_) => write!(result, "{{"),
|
||||
Self::CloseCurlyBracket(_) => write!(result, "}}"),
|
||||
Self::Colon(_) => write!(result, ":"),
|
||||
Self::Semicolon(_) => write!(result, ";"),
|
||||
Self::Integer((int, _)) => write!(result, "{}", int),
|
||||
Self::Float((float, _)) => write!(result, "{}", float),
|
||||
Self::String((s, _)) => write!(result, "\"{}\"", s),
|
||||
Self::LongComment((c, _)) => write!(result, "/*{}*/", c),
|
||||
Self::ShortComment((c, _)) => write!(result, "//{}\n", c),
|
||||
Self::Newline(_) => write!(result, "\n"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
matches!(self, Self::Newline(_))
|
||||
}
|
||||
}
|
7
crates/lang/src/lib.rs
Normal file
7
crates/lang/src/lib.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
//! Language specification
|
||||
//!
|
||||
//! Parsing order: lexer -> syntax -> statement
|
||||
|
||||
pub mod lexer;
|
||||
pub mod statement;
|
||||
pub mod syntax;
|
32
crates/lang/src/statement/errors.rs
Normal file
32
crates/lang/src/statement/errors.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum LanguageError {
|
||||
InvalidSequence(Vec<crate::lexer::Token>),
|
||||
InvalidSyntax(Vec<crate::syntax::SyntaxToken>),
|
||||
UnexpectedEnd(Vec<crate::syntax::SyntaxToken>),
|
||||
UnexpectedToken(crate::syntax::SyntaxToken),
|
||||
UnrecognizedToken,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for LanguageError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
//use core::fmt::Write;
|
||||
match self {
|
||||
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
|
||||
Self::InvalidSyntax(seq) => write!(f, "Invalid syntax {}", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
|
||||
Self::UnexpectedEnd(seq) => write!(f, "Unexpected end of file {} <EOF>", crate::syntax::SyntaxToken::stringify_ref(seq.iter())),
|
||||
Self::UnexpectedToken(token) => write!(f, "Unexpected token {}", token.as_str()),
|
||||
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LanguageError {}
|
||||
|
||||
impl From<crate::syntax::SyntaxError> for LanguageError {
|
||||
fn from(value: crate::syntax::SyntaxError) -> Self {
|
||||
match value {
|
||||
crate::syntax::SyntaxError::UnrecognizedToken => Self::UnrecognizedToken,
|
||||
crate::syntax::SyntaxError::InvalidSequence(seq) => Self::InvalidSequence(seq),
|
||||
}
|
||||
}
|
||||
}
|
133
crates/lang/src/statement/mod.rs
Normal file
133
crates/lang/src/statement/mod.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
//! High-level language
|
||||
mod errors;
|
||||
pub use errors::LanguageError;
|
||||
|
||||
mod parser;
|
||||
pub use parser::LanguageParser;
|
||||
|
||||
mod tree;
|
||||
pub use tree::{Statement, Notification, Param, Declare, DeclareFun, DeclareType, Module, Op, DeclareAssignVar, DeclareVar, AssignVar, Dyadic, CallFun};
|
||||
|
||||
// my_namespace { <>generate_fn()=>map_fn()x>filter_fn(_)~>sort_fn(_) }
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assert_no_errors(iter: impl Iterator<Item=Result<Statement, LanguageError>>) -> Vec<Statement> {
|
||||
let mut statements = Vec::new();
|
||||
for (i, res) in iter.enumerate() {
|
||||
match res {
|
||||
Ok(statement) => if !statement.is_ignore() { statements.push(statement); },
|
||||
Err(e) => {
|
||||
let e_display = e.to_string();
|
||||
match e {
|
||||
LanguageError::InvalidSequence(_seq) => {},
|
||||
LanguageError::InvalidSyntax(seq) => {
|
||||
let bad_syntax = seq.last().expect("Empty invalid syntax token sequence");
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnexpectedEnd(seq) => {
|
||||
let bad_syntax = seq.last().expect("Empty unexpected end token sequence");
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnexpectedToken(bad_syntax) => {
|
||||
let bad_lex = bad_syntax.info.last().expect("Empty token info on syntax token");
|
||||
|
||||
eprintln!(
|
||||
"{} @ line {}, column {} to {} (index {} to {}), token #{}",
|
||||
e_display, bad_lex.line,
|
||||
bad_lex.column.start, bad_lex.column.end,
|
||||
bad_lex.index.start, bad_lex.index.end, i
|
||||
);
|
||||
},
|
||||
LanguageError::UnrecognizedToken => {
|
||||
eprintln!("Unrecognized token #{} ?!?!", i);
|
||||
}
|
||||
}
|
||||
panic!("{} for token #{}", e_display, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
statements
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_module() {
|
||||
let parser = LanguageParser::lex("my_module {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Module(Module {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_module".into(),
|
||||
]),
|
||||
inner: Vec::new(),
|
||||
}),
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_function_declaration() {
|
||||
let parser = LanguageParser::lex("my_generator () <> {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Declare(Declare::Function(DeclareFun {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_generator".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
type_: crate::syntax::Functional::Generate,
|
||||
ops: Vec::new(),
|
||||
}))
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_type_declaration() {
|
||||
let parser = LanguageParser::lex("my_type = {}");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Declare(Declare::Type(DeclareType {
|
||||
name: crate::syntax::Path(vec![
|
||||
"my_type".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
}))
|
||||
], parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_minimum_entrypoint() {
|
||||
let parser = LanguageParser::lex("<> my_generator ()");
|
||||
|
||||
let parsed = assert_no_errors(parser);
|
||||
assert_eq!(vec![
|
||||
Statement::Entrypoint(CallFun {
|
||||
type_: crate::syntax::Functional::Generate,
|
||||
var: crate::syntax::Path(vec![
|
||||
"my_generator".into(),
|
||||
]),
|
||||
params: Vec::new(),
|
||||
})
|
||||
], parsed);
|
||||
}
|
||||
}
|
769
crates/lang/src/statement/parser.rs
Normal file
769
crates/lang/src/statement/parser.rs
Normal file
|
@ -0,0 +1,769 @@
|
|||
pub struct LanguageParser<'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> {
|
||||
_idc: core::marker::PhantomData<&'a ()>,
|
||||
iter: I,
|
||||
lookahead: Option<crate::syntax::SyntaxToken>,
|
||||
incomplete_modules: Vec<super::Module>,
|
||||
}
|
||||
|
||||
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> LanguageParser<'a, I> {
|
||||
pub fn new(tokens_in: I) -> Self {
|
||||
Self {
|
||||
_idc: Default::default(),
|
||||
iter: tokens_in,
|
||||
lookahead: None,
|
||||
incomplete_modules: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_incomplete_module(&mut self, name: crate::syntax::Path) -> Result<super::Module, super::LanguageError> {
|
||||
Ok(super::Module { name, inner: Vec::new() })
|
||||
}
|
||||
|
||||
fn parse_function_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareFun, super::LanguageError> {
|
||||
// `Name (` (first 2 tokens) are already consumed
|
||||
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
// function params
|
||||
let mut params = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
params.push(self.parse_param()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
// function type
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let fn_type = if let crate::syntax::Token::Functional(fn_type) = token1.token {
|
||||
fn_type
|
||||
} else {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![token1]));
|
||||
};
|
||||
// operations
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
if !matches!(token2.token, crate::syntax::Token::OpenCurlyBracket) {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Functional(fn_type),
|
||||
info: token1.info,
|
||||
},
|
||||
token2
|
||||
]));
|
||||
}
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let mut ops = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
ops.push(self.parse_op()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
Ok(super::DeclareFun {
|
||||
name,
|
||||
params,
|
||||
type_: fn_type,
|
||||
ops,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_type_decl(&mut self, name: crate::syntax::Path) -> Result<super::DeclareType, super::LanguageError> {
|
||||
// `Name =` (first 2 tokens) are already consumed
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
match token0.token {
|
||||
crate::syntax::Token::OpenCurlyBracket => {
|
||||
let mut lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
let mut params = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseCurlyBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
params.push(self.parse_param()?);
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
}
|
||||
Ok(super::DeclareType {
|
||||
name,
|
||||
params,
|
||||
})
|
||||
}
|
||||
t => Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token0.info,
|
||||
}
|
||||
]))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_function_call(&mut self, fun: crate::syntax::Functional) -> Result<super::CallFun, super::LanguageError> {
|
||||
// `Func` (first token) is already consumed
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let name = if let crate::syntax::Token::Path(name) = token0.token {
|
||||
name
|
||||
} else {
|
||||
return Err(super::LanguageError::UnexpectedToken(token0));
|
||||
};
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
};
|
||||
if !matches!(token1.token, crate::syntax::Token::OpenRoundBracket) {
|
||||
return Err(super::LanguageError::UnexpectedToken(token1));
|
||||
}
|
||||
let op_params = match self.parse_op_params() {
|
||||
Ok(ops) => ops,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token1.info,
|
||||
},
|
||||
], e));
|
||||
}
|
||||
};
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if !matches!(token_last.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
return Err(super::LanguageError::UnexpectedToken(token1));
|
||||
}
|
||||
Ok(super::CallFun {
|
||||
type_: fun,
|
||||
var: name,
|
||||
params: op_params,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_op(&mut self) -> Result<super::Op, super::LanguageError> {
|
||||
let op = self.parse_inner_op(0)?;
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Semicolon = token_last.token {
|
||||
Ok(op)
|
||||
} else {
|
||||
Err(super::LanguageError::UnexpectedToken(token_last))
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_err_tokens(mut tokens: Vec<crate::syntax::SyntaxToken>, err: super::LanguageError) -> super::LanguageError {
|
||||
match err {
|
||||
super::LanguageError::InvalidSyntax(mut seq) => {
|
||||
tokens.append(&mut seq);
|
||||
super::LanguageError::InvalidSyntax(tokens)
|
||||
},
|
||||
super::LanguageError::UnexpectedEnd(mut seq) => {
|
||||
tokens.append(&mut seq);
|
||||
super::LanguageError::UnexpectedEnd(tokens)
|
||||
}
|
||||
e => e
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_inner_op(&mut self, recursion_level: usize) -> Result<super::Op, super::LanguageError> {
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let op0 = match token0.token {
|
||||
crate::syntax::Token::Path(var_name) => {
|
||||
// variable-oriented operations
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
match token1.token {
|
||||
crate::syntax::Token::Colon => {
|
||||
// Declare-assign or declare
|
||||
let mut lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
])),
|
||||
};
|
||||
let type_name = if let crate::syntax::Token::Path(type_name) = lookahead.token {
|
||||
let type_token_info = lookahead.info;
|
||||
lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(type_name),
|
||||
info: type_token_info,
|
||||
},
|
||||
])),
|
||||
};
|
||||
Some((type_name, type_token_info))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let crate::syntax::Token::Equal = lookahead.token {
|
||||
// Declare-Assign
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
// roughly equivalent to self.parse_inner_op(...).map_err(|e| { ... })
|
||||
// (the closure captures variables which the compiler can't prove aren't used in this fn after)
|
||||
let tokens = if let Some((type_name, type_token_info)) = type_name {
|
||||
vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(type_name),
|
||||
info: type_token_info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: lookahead.info,
|
||||
},
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: lookahead.info,
|
||||
},
|
||||
]
|
||||
};
|
||||
return Err(Self::extend_err_tokens(tokens, e));
|
||||
}
|
||||
};
|
||||
super::Op::DeclareAssign(super::tree::DeclareAssignVar {
|
||||
var: var_name,
|
||||
type_: type_name.map(|x| x.0),
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
} else {
|
||||
// declare
|
||||
self.lookahead = Some(lookahead);
|
||||
super::Op::Declare(super::tree::DeclareVar {
|
||||
var: var_name,
|
||||
type_: type_name.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Equal => {
|
||||
// Assign
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Field(f) = token2.token {
|
||||
let inner_op = self.parse_inner_op(recursion_level + 1)?;
|
||||
super::Op::Assign(crate::statement::tree::AssignVar {
|
||||
var: var_name,
|
||||
field: Some(f),
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
} else {
|
||||
self.lookahead = Some(token2);
|
||||
let inner_op = self.parse_inner_op(recursion_level + 1)?;
|
||||
super::Op::Assign(crate::statement::tree::AssignVar {
|
||||
var: var_name,
|
||||
field: None,
|
||||
op: Box::new(inner_op),
|
||||
})
|
||||
}
|
||||
},
|
||||
/*crate::syntax::Token::OpenRoundBracket => {
|
||||
// Call
|
||||
let op_params = match self.parse_op_params() {
|
||||
Ok(ops) => ops,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token1.info,
|
||||
},
|
||||
], e));
|
||||
}
|
||||
};
|
||||
self.lookahead.take().unwrap(); // always a closing round bracket; no need to verify
|
||||
let token_filter = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
},
|
||||
// TODO include tokens from op_params
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Functional(crate::syntax::Functional::Filter) = token_filter.token {
|
||||
let filter_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Equal,
|
||||
info: token1.info,
|
||||
},
|
||||
// TODO include tokens from op_params
|
||||
], e))
|
||||
}
|
||||
};
|
||||
super::Op::Call(super::CallVar {
|
||||
var: var_name,
|
||||
params: op_params,
|
||||
})
|
||||
} else {
|
||||
return Err(super::LanguageError::UnexpectedToken(token_filter));
|
||||
}
|
||||
}*/
|
||||
t => {
|
||||
// Retrieve
|
||||
self.lookahead = Some(crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token1.info,
|
||||
});
|
||||
super::Op::Retrieve(var_name)
|
||||
}
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Functional(fun) => {
|
||||
// Call
|
||||
match self.parse_function_call(fun.clone()) {
|
||||
Ok(x) => super::Op::Call(x),
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Functional(fun),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
}
|
||||
}
|
||||
crate::syntax::Token::Operation(unary_op) => {
|
||||
// Unary operation
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Operation(unary_op),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
super::Op::Unary(super::tree::Unary {
|
||||
first: Box::new(inner_op),
|
||||
op: unary_op,
|
||||
})
|
||||
},
|
||||
crate::syntax::Token::Literal(literal) => {
|
||||
super::Op::Literal(literal)
|
||||
},
|
||||
crate::syntax::Token::OpenRoundBracket => {
|
||||
// Operation surrounded by brackets
|
||||
let inner_op = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::OpenRoundBracket,
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
let token_last = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
// TODO include all tokens from inner_op
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::CloseRoundBracket = token_last.token {
|
||||
super::Op::Bracketed(Box::new(inner_op))
|
||||
} else {
|
||||
// TODO maybe? include all tokens from inner_op
|
||||
return Err(super::LanguageError::UnexpectedToken(token_last));
|
||||
}
|
||||
}
|
||||
t => {
|
||||
return Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token0.info,
|
||||
}
|
||||
]));
|
||||
}
|
||||
};
|
||||
|
||||
// check if operation continues (i.e. is dyadic)
|
||||
let lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Operation(dyadic_op) = lookahead.token {
|
||||
let op1 = match self.parse_inner_op(recursion_level + 1) {
|
||||
Ok(op) => op,
|
||||
Err(e) => {
|
||||
return Err(Self::extend_err_tokens(vec![
|
||||
// TODO add tokens of op0 too
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Operation(dyadic_op),
|
||||
info: token0.info,
|
||||
}
|
||||
], e));
|
||||
}
|
||||
};
|
||||
Ok(super::Op::Dyadic(super::Dyadic {
|
||||
first: Box::new(op0),
|
||||
op: dyadic_op,
|
||||
second: Box::new(op1),
|
||||
}))
|
||||
} else {
|
||||
self.lookahead = Some(lookahead);
|
||||
Ok(op0)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_op_params(&mut self) -> Result<Vec<super::Op>, super::LanguageError> {
|
||||
let mut lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
let mut ops = Vec::new();
|
||||
while !matches!(lookahead.token, crate::syntax::Token::CloseRoundBracket) {
|
||||
self.lookahead = Some(lookahead);
|
||||
ops.push(self.parse_op()?);
|
||||
lookahead = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
// TODO add tokens of previous op(s)
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
}
|
||||
self.lookahead = Some(lookahead);
|
||||
Ok(ops)
|
||||
}
|
||||
|
||||
// [!] no unhandled lookaheads
|
||||
fn parse_param(&mut self) -> Result<super::Param, super::LanguageError> {
|
||||
let token0 = if let Some(lookahead) = self.lookahead.take() {
|
||||
lookahead
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(Vec::new())),
|
||||
}
|
||||
};
|
||||
if let crate::syntax::Token::Path(var_name) = token0.token {
|
||||
let token1 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
match token1.token {
|
||||
crate::syntax::Token::Colon => {
|
||||
// with type declaration
|
||||
let token2 = match self.iter.next() {
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
}
|
||||
])),
|
||||
};
|
||||
if let crate::syntax::Token::Path(ty_name) = token2.token {
|
||||
Ok(super::Param {
|
||||
name: var_name,
|
||||
type_: Some(ty_name),
|
||||
})
|
||||
} else {
|
||||
Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Colon,
|
||||
info: token1.info,
|
||||
},
|
||||
token2,
|
||||
]))
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Semicolon => {
|
||||
// without type declaration
|
||||
Ok(super::Param {
|
||||
name: var_name,
|
||||
type_: None,
|
||||
})
|
||||
},
|
||||
t => Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(var_name),
|
||||
info: token0.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token1.info,
|
||||
}
|
||||
]))
|
||||
}
|
||||
} else {
|
||||
Err(super::LanguageError::InvalidSyntax(vec![token0]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl <'a> LanguageParser<'a, crate::syntax::TokenParser<'a, logos::Lexer<'a, crate::lexer::Token>>> {
|
||||
pub fn lex(s: &'a str) -> Self {
|
||||
Self::new(crate::syntax::TokenParser::new(crate::lexer::Token::tokenify(s)))
|
||||
}
|
||||
}
|
||||
|
||||
impl <'a, I: core::iter::Iterator<Item=Result<crate::syntax::SyntaxToken, crate::syntax::SyntaxError>> + 'a> core::iter::Iterator for LanguageParser<'a, I> {
|
||||
type Item = Result<super::Statement, super::LanguageError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let opt_next_token = if let Some(token) = self.lookahead.take() {
|
||||
Some(token)
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
}
|
||||
};
|
||||
if let Some(token) = opt_next_token {
|
||||
let statement = match token.token {
|
||||
crate::syntax::Token::Path(p0) => {
|
||||
let next_token = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => t,
|
||||
None => return Some(Err(super::LanguageError::UnexpectedEnd(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(p0),
|
||||
info: token.info,
|
||||
}
|
||||
]))),
|
||||
};
|
||||
match next_token.token {
|
||||
crate::syntax::Token::OpenCurlyBracket => {
|
||||
// module
|
||||
match self.parse_incomplete_module(p0.clone()) {
|
||||
Ok(module) => self.incomplete_modules.push(module),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
// skip capturing by model by immediately returning
|
||||
return Some(Ok(super::Statement::Notification(super::Notification::EnteringModule(p0))));
|
||||
},
|
||||
crate::syntax::Token::OpenRoundBracket => {
|
||||
// function declaration
|
||||
match self.parse_function_decl(p0) {
|
||||
Ok(fn_decl) => super::Statement::Declare(super::Declare::Function(fn_decl)),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Equal => {
|
||||
// type declaration
|
||||
match self.parse_type_decl(p0) {
|
||||
Ok(ty_decl) => super::Statement::Declare(super::Declare::Type(ty_decl)),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
},
|
||||
unrecognized => return Some(Err(super::LanguageError::InvalidSyntax(vec![
|
||||
crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::Path(p0),
|
||||
info: token.info,
|
||||
},
|
||||
crate::syntax::SyntaxToken {
|
||||
token: unrecognized,
|
||||
info: next_token.info,
|
||||
}
|
||||
]))),
|
||||
}
|
||||
},
|
||||
crate::syntax::Token::Functional(fun) => {
|
||||
match self.parse_function_call(fun) {
|
||||
Ok(f) => super::Statement::Entrypoint(f),
|
||||
Err(e) => return Some(Err(e))
|
||||
}
|
||||
}
|
||||
crate::syntax::Token::CloseCurlyBracket => {
|
||||
if let Some(module) = self.incomplete_modules.pop() {
|
||||
super::Statement::Module(module)
|
||||
} else {
|
||||
return Some(Err(super::LanguageError::UnexpectedToken(crate::syntax::SyntaxToken {
|
||||
token: crate::syntax::Token::CloseCurlyBracket,
|
||||
info: token.info
|
||||
})));
|
||||
}
|
||||
}
|
||||
t => return Some(Err(super::LanguageError::UnexpectedToken(crate::syntax::SyntaxToken {
|
||||
token: t,
|
||||
info: token.info
|
||||
}))),
|
||||
};
|
||||
if let Some(mut module) = self.incomplete_modules.pop() {
|
||||
module.inner.push(statement.clone());
|
||||
self.incomplete_modules.push(module);
|
||||
Some(Ok(super::Statement::Notification(super::Notification::CapturedByModule(Box::new(statement)))))
|
||||
} else {
|
||||
Some(Ok(statement))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
176
crates/lang/src/statement/tree.rs
Normal file
176
crates/lang/src/statement/tree.rs
Normal file
|
@ -0,0 +1,176 @@
|
|||
/// Statement declaration
|
||||
///
|
||||
/// Statement -> Declare
|
||||
/// Statement -> Module
|
||||
/// Statement -> CallFun [entrypoint]
|
||||
///
|
||||
/// Statements -> Statement Statements
|
||||
/// Statements -> DONE
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Statement {
|
||||
Declare(Declare),
|
||||
Module(Module),
|
||||
Entrypoint(CallFun),
|
||||
Notification(Notification),
|
||||
}
|
||||
|
||||
impl Statement {
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
matches!(self, Self::Notification(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// Fake tokens emitted by parser to avoid excessive recursion
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Notification {
|
||||
EnteringModule(crate::syntax::Path),
|
||||
CapturedByModule(Box<Statement>),
|
||||
}
|
||||
|
||||
/// Param declaration
|
||||
///
|
||||
/// Param -> Variable: Type
|
||||
/// OR (depending on context)
|
||||
/// Param -> Variable
|
||||
///
|
||||
/// Params -> Param; Params [semicolon-separated]
|
||||
/// Params -> ;
|
||||
/// Params -> Param
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Param {
|
||||
pub name: crate::syntax::Path,
|
||||
pub type_: Option<crate::syntax::Path>,
|
||||
}
|
||||
|
||||
/// Function or Type declaration
|
||||
///
|
||||
/// Declare -> DeclareFun
|
||||
/// Declare -> DeclareType
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Declare {
|
||||
Function(DeclareFun),
|
||||
Type(DeclareType),
|
||||
}
|
||||
|
||||
/// Function declaration
|
||||
///
|
||||
/// DeclareFun -> Name (Params) Func { Ops }
|
||||
/// Func -> => [map]
|
||||
/// Func -> x> [filter]
|
||||
/// Func -> ~> [sort]
|
||||
/// Func -> <> [generator]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DeclareFun {
|
||||
pub name: crate::syntax::Path,
|
||||
pub params: Vec<Param>,
|
||||
pub type_: crate::syntax::Functional,
|
||||
pub ops: Vec<Op>,
|
||||
}
|
||||
|
||||
/// Type declaration
|
||||
///
|
||||
/// DeclareType -> Name = { Params }
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DeclareType {
|
||||
pub name: crate::syntax::Path,
|
||||
pub params: Vec<Param>,
|
||||
}
|
||||
|
||||
/// Module declaration
|
||||
///
|
||||
/// Module -> Variable { Statements }
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Module {
|
||||
pub name: crate::syntax::Path,
|
||||
pub inner: Vec<Statement>,
|
||||
}
|
||||
|
||||
/// Operation declaration
|
||||
///
|
||||
/// Op -> Variable := Op [declare-assign]
|
||||
/// Op -> Variable: Type [declare]
|
||||
/// Op -> Variable = Op [assign]
|
||||
/// Op -> Variable Fields [retrieve]
|
||||
/// Op -> CallFun [invoke]
|
||||
/// Op -> Op DualOp Op [dyadic]
|
||||
/// Op -> UnaryOp Op [unary]
|
||||
/// Op -> Literal [literal]
|
||||
/// Op -> Bracketed [bracketed]
|
||||
/// Fields ->
|
||||
/// Fields -> .Name Fields
|
||||
/// DualOp -> SetOp
|
||||
/// DualOp -> n
|
||||
/// DualOp -> u
|
||||
/// DualOp -> +
|
||||
/// DualOp -> -
|
||||
/// DualOp -> *
|
||||
/// DualOp -> /
|
||||
/// DualOp -> &&
|
||||
/// DualOp -> ||
|
||||
/// SetOp -> n
|
||||
/// SetOp -> u
|
||||
/// UnaryOp -> -
|
||||
/// Literal -> "Name"
|
||||
/// Literal -> Integer
|
||||
/// Literal -> Float
|
||||
/// Bracketed -> (Op)
|
||||
///
|
||||
///
|
||||
/// Ops -> Op; Ops [semicolon-separated]
|
||||
/// Ops -> ;
|
||||
/// Ops -> DONE
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Op {
|
||||
DeclareAssign(DeclareAssignVar),
|
||||
Declare(DeclareVar),
|
||||
Assign(AssignVar),
|
||||
Retrieve(crate::syntax::Path),
|
||||
Call(CallFun),
|
||||
Dyadic(Dyadic),
|
||||
Unary(Unary),
|
||||
Literal(crate::syntax::Literal),
|
||||
Bracketed(Box<Op>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DeclareAssignVar {
|
||||
pub var: crate::syntax::Path,
|
||||
pub type_: Option<crate::syntax::Path>,
|
||||
pub op: Box<Op>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DeclareVar {
|
||||
pub var: crate::syntax::Path,
|
||||
pub type_: Option<crate::syntax::Path>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct AssignVar {
|
||||
pub var: crate::syntax::Path,
|
||||
pub field: Option<crate::syntax::Field>,
|
||||
pub op: Box<Op>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Dyadic {
|
||||
pub first: Box<Op>,
|
||||
pub op: crate::syntax::Op,
|
||||
pub second: Box<Op>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Unary {
|
||||
pub first: Box<Op>,
|
||||
pub op: crate::syntax::Op,
|
||||
}
|
||||
|
||||
/// Function call declaration
|
||||
///
|
||||
/// CallFun -> Func Name (Params)
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct CallFun {
|
||||
pub type_: crate::syntax::Functional,
|
||||
pub var: crate::syntax::Path,
|
||||
pub params: Vec<Op>,
|
||||
}
|
25
crates/lang/src/syntax/errors.rs
Normal file
25
crates/lang/src/syntax/errors.rs
Normal file
|
@ -0,0 +1,25 @@
|
|||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum SyntaxError {
|
||||
InvalidSequence(Vec<crate::lexer::Token>),
|
||||
UnrecognizedToken,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for SyntaxError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
//use core::fmt::Write;
|
||||
match self {
|
||||
Self::InvalidSequence(seq) => write!(f, "Invalid sequence {:?}", seq.as_slice()),
|
||||
Self::UnrecognizedToken => write!(f, "Unrecognized token"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SyntaxError {}
|
||||
|
||||
impl From<crate::lexer::LexError> for SyntaxError {
|
||||
fn from(value: crate::lexer::LexError) -> Self {
|
||||
match value {
|
||||
crate::lexer::LexError::UnrecognizedToken => Self::UnrecognizedToken,
|
||||
}
|
||||
}
|
||||
}
|
66
crates/lang/src/syntax/mod.rs
Normal file
66
crates/lang/src/syntax/mod.rs
Normal file
|
@ -0,0 +1,66 @@
|
|||
//! High-level syntax
|
||||
mod errors;
|
||||
pub use errors::SyntaxError;
|
||||
|
||||
mod parser;
|
||||
pub(crate) use parser::TokenParser;
|
||||
|
||||
mod tokens;
|
||||
pub use tokens::{SyntaxToken, Token, Literal, Op, Functional, Field, Path, Comment};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
const ALL_TOKENS_STR: &str = "u n + - * / && || => x> ~> <> = n_u_::is_a_::VaR1AbLe .th1s.is_A_.f13Ld ( ) { } ; -404 -1234.5 \"\" /* block comment */ // line comment \n";
|
||||
|
||||
#[test]
|
||||
fn parse_everything() {
|
||||
let expected = vec![
|
||||
Token::Operation(Op::Union),
|
||||
Token::Operation(Op::Intersection),
|
||||
Token::Operation(Op::Plus),
|
||||
Token::Operation(Op::Minus),
|
||||
Token::Operation(Op::Multiply),
|
||||
Token::Operation(Op::Divide),
|
||||
Token::Operation(Op::And),
|
||||
Token::Operation(Op::Or),
|
||||
Token::Functional(Functional::Map),
|
||||
Token::Functional(Functional::Filter),
|
||||
Token::Functional(Functional::Sort),
|
||||
Token::Functional(Functional::Generate),
|
||||
Token::Equal,
|
||||
Token::Path(Path(vec!["n_u_".into(), "is_a_".into(), "VaR1AbLe".into()])),
|
||||
Token::Field(Field(vec!["th1s".into(), "is_A_".into(), "f13Ld".into()])),
|
||||
Token::OpenRoundBracket,
|
||||
Token::CloseRoundBracket,
|
||||
Token::OpenCurlyBracket,
|
||||
Token::CloseCurlyBracket,
|
||||
Token::Semicolon,
|
||||
Token::Literal(Literal::Integer(-404)),
|
||||
Token::Literal(Literal::Float(-1234.5)),
|
||||
Token::Literal(Literal::String("".into())),
|
||||
Token::Comment(Comment::Block(" block comment ".into())),
|
||||
Token::Comment(Comment::Line(" line comment ".into())),
|
||||
];
|
||||
|
||||
let mut actual = Vec::new();
|
||||
for (index, token_result) in SyntaxToken::tokenify(ALL_TOKENS_STR).enumerate() {
|
||||
assert!(token_result.is_ok(), "Token #{} (expected: {:?}) failed to parse: {:?}", index, expected[index], token_result.err());
|
||||
actual.push(token_result.unwrap().token);
|
||||
}
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_reversability() {
|
||||
let expected = format!("{} ", ALL_TOKENS_STR);
|
||||
|
||||
let actual = SyntaxToken::stringify(SyntaxToken::tokenify(&expected).map(|token_result| token_result.unwrap()));
|
||||
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
}
|
184
crates/lang/src/syntax/parser.rs
Normal file
184
crates/lang/src/syntax/parser.rs
Normal file
|
@ -0,0 +1,184 @@
|
|||
pub(crate) struct TokenParser<'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> {
|
||||
_idc: core::marker::PhantomData<&'a ()>,
|
||||
iter: I,
|
||||
lookahead: Option<crate::lexer::Token>,
|
||||
}
|
||||
|
||||
impl <'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> TokenParser<'a, I> {
|
||||
pub fn new(tokens_in: I) -> Self {
|
||||
Self {
|
||||
_idc: Default::default(),
|
||||
iter: tokens_in,
|
||||
lookahead: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn rebuild_field_token_sequence(parts: Vec<String>, infos: &mut Vec<Option<crate::lexer::TokenInfo>>) -> Vec<crate::lexer::Token> {
|
||||
parts.into_iter()
|
||||
.flat_map(|var| [crate::lexer::Token::Dot(Self::take_first_some(infos).unwrap()), crate::lexer::Token::Variable((var, Self::take_first_some(infos).unwrap()))])
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn rebuild_path_token_sequence(parts: Vec<String>, infos: &mut Vec<Option<crate::lexer::TokenInfo>>) -> Vec<crate::lexer::Token> {
|
||||
let mut tokens: Vec<_> = parts.into_iter()
|
||||
.flat_map(|var| [crate::lexer::Token::Variable((var, Self::take_first_some(infos).unwrap())), crate::lexer::Token::PathSeparator(Self::take_first_some(infos).unwrap())])
|
||||
.collect();
|
||||
if !tokens.is_empty() {
|
||||
// remove trailing path separator
|
||||
tokens.pop();
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
fn take_first_some<T>(items: &mut Vec<Option<T>>) -> Option<T> {
|
||||
for i in items.iter_mut() {
|
||||
if let Some(item) = i.take() {
|
||||
return Some(item)
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl <'a, I: core::iter::Iterator<Item=Result<crate::lexer::Token, crate::lexer::LexError>> + 'a> core::iter::Iterator for TokenParser<'a, I> {
|
||||
type Item = Result<super::SyntaxToken, super::SyntaxError>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let opt_next_lex_token = if let Some(lex_token) = self.lookahead.take() {
|
||||
Some(lex_token)
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
}
|
||||
};
|
||||
if let Some(lex_token) = opt_next_lex_token {
|
||||
let translated = match lex_token {
|
||||
crate::lexer::Token::Union(info) => super::Token::Operation(super::Op::Union).with(info),
|
||||
crate::lexer::Token::Intersection(info) => super::Token::Operation(super::Op::Intersection).with(info),
|
||||
crate::lexer::Token::Plus(info) => super::Token::Operation(super::Op::Plus).with(info),
|
||||
crate::lexer::Token::Minus(info) => super::Token::Operation(super::Op::Minus).with(info),
|
||||
crate::lexer::Token::Multiply(info) => super::Token::Operation(super::Op::Multiply).with(info),
|
||||
crate::lexer::Token::Divide(info) => super::Token::Operation(super::Op::Divide).with(info),
|
||||
crate::lexer::Token::And(info) => super::Token::Operation(super::Op::And).with(info),
|
||||
crate::lexer::Token::Or(info) => super::Token::Operation(super::Op::Or).with(info),
|
||||
crate::lexer::Token::Map(info) => super::Token::Functional(super::Functional::Map).with(info),
|
||||
crate::lexer::Token::Filter(info) => super::Token::Functional(super::Functional::Filter).with(info),
|
||||
crate::lexer::Token::Sort(info) => super::Token::Functional(super::Functional::Sort).with(info),
|
||||
crate::lexer::Token::Generate(info) => super::Token::Functional(super::Functional::Generate).with(info),
|
||||
crate::lexer::Token::Equal(info) => super::Token::Equal.with(info),
|
||||
crate::lexer::Token::PathSeparator(info) => return Some(Err(super::SyntaxError::InvalidSequence(vec![crate::lexer::Token::PathSeparator(info)]))),
|
||||
crate::lexer::Token::Dot(info) => {
|
||||
// read all incoming dots and variable combos into a single path token
|
||||
// e.g. [Dot, Variable("x"), Dot, Variable("y"), Dot, Variable("z")] becomes Field(["x", "y", "z"])
|
||||
let mut parts = Vec::new();
|
||||
let mut infos = Vec::new();
|
||||
infos.push(Some(info));
|
||||
loop {
|
||||
let next_lex_token = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
};
|
||||
if let Some(next_lex_token) = next_lex_token {
|
||||
match next_lex_token {
|
||||
crate::lexer::Token::Variable((part, info)) => {
|
||||
parts.push(part);
|
||||
infos.push(Some(info));
|
||||
},
|
||||
invalid_token => {
|
||||
let last_dot = crate::lexer::Token::Dot(infos.pop().unwrap().unwrap());
|
||||
let mut sequence = Self::rebuild_field_token_sequence(parts, &mut infos);
|
||||
sequence.push(last_dot);
|
||||
sequence.push(invalid_token);
|
||||
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let last_dot = crate::lexer::Token::Dot(infos.pop().unwrap().unwrap());
|
||||
let mut sequence = Self::rebuild_field_token_sequence(parts, &mut infos);
|
||||
sequence.push(last_dot);
|
||||
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
|
||||
}
|
||||
self.lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
};
|
||||
if let Some(crate::lexer::Token::Dot(info)) = &self.lookahead {
|
||||
infos.push(Some(info.to_owned()));
|
||||
self.lookahead = None;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
super::SyntaxToken {
|
||||
token: super::Token::Field(super::Field(parts)),
|
||||
info: infos.into_iter().map(|x| x.unwrap()).collect(),
|
||||
}
|
||||
}
|
||||
crate::lexer::Token::Variable((root, info)) => {
|
||||
// read all incoming path separators and variable combos into a single path token
|
||||
// e.g. [Variable("x"), PathSeparator, Variable("y"), PathSeparator, Variable("z")] becomes Path(["x", "y", "z"])
|
||||
self.lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
};
|
||||
let mut parts = Vec::new();
|
||||
let mut infos = Vec::new();
|
||||
parts.push(root);
|
||||
infos.push(Some(info));
|
||||
while let Some(crate::lexer::Token::PathSeparator(path_info)) = &self.lookahead {
|
||||
infos.push(Some(path_info.to_owned()));
|
||||
let next_lex_token = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
};
|
||||
if let Some(crate::lexer::Token::Variable((part, info))) = next_lex_token {
|
||||
parts.push(part);
|
||||
infos.push(Some(info));
|
||||
} else {
|
||||
let last_sep = crate::lexer::Token::PathSeparator(infos.pop().unwrap().unwrap());
|
||||
let mut sequence = Self::rebuild_path_token_sequence(parts, &mut infos);
|
||||
sequence.push(last_sep);
|
||||
if let Some(lex_token) = next_lex_token {
|
||||
sequence.push(lex_token);
|
||||
return Some(Err(super::SyntaxError::InvalidSequence(sequence)))
|
||||
} else {
|
||||
return Some(Err(super::SyntaxError::InvalidSequence(sequence)));
|
||||
}
|
||||
}
|
||||
self.lookahead = match self.iter.next() {
|
||||
Some(Err(e)) => return Some(Err(e.into())),
|
||||
Some(Ok(t)) => Some(t),
|
||||
None => None,
|
||||
};
|
||||
}
|
||||
super::SyntaxToken {
|
||||
token: super::Token::Path(super::Path(parts)),
|
||||
info: infos.into_iter().map(|x| x.unwrap()).collect(),
|
||||
}
|
||||
},
|
||||
crate::lexer::Token::OpenRoundBracket(info) => super::Token::OpenRoundBracket.with(info),
|
||||
crate::lexer::Token::CloseRoundBracket(info) => super::Token::CloseRoundBracket.with(info),
|
||||
crate::lexer::Token::OpenCurlyBracket(info) => super::Token::OpenCurlyBracket.with(info),
|
||||
crate::lexer::Token::CloseCurlyBracket(info) => super::Token::CloseCurlyBracket.with(info),
|
||||
crate::lexer::Token::Colon(info) => super::Token::Colon.with(info),
|
||||
crate::lexer::Token::Semicolon(info) => super::Token::Semicolon.with(info),
|
||||
crate::lexer::Token::Integer((int, info)) => super::Token::Literal(super::Literal::Integer(int)).with(info),
|
||||
crate::lexer::Token::Float((float, info)) => super::Token::Literal(super::Literal::Float(float)).with(info),
|
||||
crate::lexer::Token::String((s, info)) => super::Token::Literal(super::Literal::String(s)).with(info),
|
||||
crate::lexer::Token::LongComment((c, info)) => super::Token::Comment(super::tokens::Comment::Block(c)).with(info),
|
||||
crate::lexer::Token::ShortComment((c, info)) => super::Token::Comment(super::tokens::Comment::Line(c)).with(info),
|
||||
crate::lexer::Token::Newline(_) => panic!("Got non-ignored newline"),
|
||||
};
|
||||
Some(Ok(translated))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
188
crates/lang/src/syntax/tokens.rs
Normal file
188
crates/lang/src/syntax/tokens.rs
Normal file
|
@ -0,0 +1,188 @@
|
|||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct SyntaxToken {
|
||||
pub token: Token,
|
||||
pub info: Vec<crate::lexer::TokenInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
Operation(Op),
|
||||
Functional(Functional),
|
||||
|
||||
// Basics
|
||||
Equal,
|
||||
Field(Field),
|
||||
Path(Path),
|
||||
OpenRoundBracket,
|
||||
CloseRoundBracket,
|
||||
OpenCurlyBracket,
|
||||
CloseCurlyBracket,
|
||||
Colon,
|
||||
Semicolon,
|
||||
|
||||
Literal(Literal),
|
||||
Comment(Comment),
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub(super) fn with(self, info: crate::lexer::TokenInfo) -> SyntaxToken {
|
||||
SyntaxToken { token: self, info: vec![info] }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Field(pub Vec<String>);
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Path(pub Vec<String>);
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Op {
|
||||
// Set operations
|
||||
Union,
|
||||
Intersection,
|
||||
// Arithmetic operations (also applicable to sets)
|
||||
Plus,
|
||||
Minus,
|
||||
Multiply,
|
||||
Divide,
|
||||
// Logical operations
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
impl Op {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Union => "u",
|
||||
Self::Intersection => "n",
|
||||
Self::Plus => "+",
|
||||
Self::Minus => "-",
|
||||
Self::Multiply => "*",
|
||||
Self::Divide => "/",
|
||||
Self::And => "&&",
|
||||
Self::Or => "||",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Functional {
|
||||
Map,
|
||||
Filter,
|
||||
Sort,
|
||||
Generate,
|
||||
}
|
||||
|
||||
impl Functional {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Map => "=>",
|
||||
Self::Filter => "x>",
|
||||
Self::Sort => "~>",
|
||||
Self::Generate => "<>",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Literal {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl Literal {
|
||||
pub fn as_str(&self) -> String {
|
||||
match self {
|
||||
Self::Integer(int) => format!("{}", int),
|
||||
Self::Float(float) => format!("{}", float),
|
||||
Self::String(s) => format!("\"{}\"", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Comment {
|
||||
Line(String),
|
||||
Block(String),
|
||||
}
|
||||
|
||||
impl Comment {
|
||||
pub fn as_str(&self) -> String {
|
||||
match self {
|
||||
Self::Line(comment) => format!("//{}\n", comment),
|
||||
Self::Block(comment) => format!("/*{}*/", comment),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
|
||||
use core::fmt::Write;
|
||||
match self {
|
||||
Self::Operation(op) => write!(result, "{}", op.as_str()),
|
||||
Self::Functional(fun) => write!(result, "{}", fun.as_str()),
|
||||
Self::Equal => write!(result, "="),
|
||||
Self::Field(parts) => {
|
||||
for p in parts.0.iter() {
|
||||
write!(result, ".{}", p).unwrap();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Self::Path(parts) => {
|
||||
for (i, p) in parts.0.iter().enumerate() {
|
||||
write!(result, "{}", p).unwrap();
|
||||
if i != parts.0.len() - 1 {
|
||||
write!(result, "::").unwrap();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Self::OpenRoundBracket => write!(result, "("),
|
||||
Self::CloseRoundBracket => write!(result, ")"),
|
||||
Self::OpenCurlyBracket => write!(result, "{{"),
|
||||
Self::CloseCurlyBracket => write!(result, "}}"),
|
||||
Self::Colon => write!(result, ":"),
|
||||
Self::Semicolon => write!(result, ";"),
|
||||
Self::Literal(l) => write!(result, "{}", l.as_str()),
|
||||
Self::Comment(c) => write!(result, "{}", c.as_str()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SyntaxToken {
|
||||
pub fn tokenify<'a>(s: &'a str) -> impl core::iter::Iterator<Item=Result<Self, super::SyntaxError>> + 'a {
|
||||
super::TokenParser::new(crate::lexer::Token::tokenify(s))
|
||||
}
|
||||
|
||||
pub fn stringify<'a>(tokens: impl core::iter::Iterator<Item=Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn stringify_ref<'a, 'b>(tokens: impl core::iter::Iterator<Item=&'b Self> + 'a) -> String {
|
||||
use core::fmt::Write;
|
||||
let mut result = String::new();
|
||||
tokens.for_each(|t| {
|
||||
t.write_str(&mut result).unwrap();
|
||||
write!(result, " ").unwrap();
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> String {
|
||||
let mut s = String::new();
|
||||
self.write_str(&mut s).unwrap();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn write_str(&self, result: &mut String) -> std::fmt::Result {
|
||||
self.token.write_str(result)
|
||||
}
|
||||
}
|
3
src/main.rs
Normal file
3
src/main.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
fn main() {
|
||||
println!("Hello, world!");
|
||||
}
|
Loading…
Reference in a new issue