Compare commits

..

9 commits

Author SHA1 Message Date
Erin b8a9cadc04 Multiple things 2023-10-04 19:02:02 +02:00
Erin b3be2a1358 New parser 2023-10-04 18:59:44 +02:00
Erin 47d44dcd04 Added some syntax 2023-10-04 02:38:27 +02:00
Erin 840c75f891 Macro 2023-10-03 23:28:57 +02:00
Erin 1190ac5bce AST goes brrrr 2023-10-03 02:20:25 +02:00
Erin b70865071a Add KW 2023-10-03 01:44:15 +02:00
Erin 3a26a02661 Lexer texts 2023-10-03 01:38:44 +02:00
Erin 399bd4f6a1 Added lexer 2023-10-03 01:21:47 +02:00
Erin a9a1e22760 Removed syntax 2023-10-03 00:30:31 +02:00
7 changed files with 284 additions and 319 deletions

28
Cargo.lock generated
View file

@ -80,6 +80,26 @@ version = "0.2.148"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
[[package]]
name = "literify"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd552332051e9b3db140d34a371dcc0ed378b72a9227b5273070af58ea34abf4"
dependencies = [
"litrs",
"proc-macro2",
"quote",
]
[[package]]
name = "litrs"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f17c3668f3cc1132437cdadc93dab05e52d592f06948d3f64828430c36e4a70"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.13.0" version = "0.13.0"
@ -118,6 +138,12 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.67" version = "1.0.67"
@ -158,7 +184,9 @@ dependencies = [
"bumpalo", "bumpalo",
"chumsky", "chumsky",
"lasso", "lasso",
"literify",
"logos", "logos",
"paste",
] ]
[[package]] [[package]]

View file

@ -7,4 +7,6 @@ edition = "2021"
bumpalo = { version = "3", features = ["collections"] } bumpalo = { version = "3", features = ["collections"] }
chumsky = "1.0.0-alpha" chumsky = "1.0.0-alpha"
lasso = "0.7" lasso = "0.7"
literify = "0.2"
logos = "0.13" logos = "0.13"
paste = "1.0"

3
rustfmt.toml Normal file
View file

@ -0,0 +1,3 @@
enum_discrim_align_threshold = 16
struct_field_align_threshold = 16
imports_granularity = "one"

View file

@ -1,21 +1,22 @@
// Rhea // Rhea
use bumpalo::Bump; use {logos::Logos, syntax::token::Token};
use logos::Logos;
use std::io::{stdin, Read};
use utils::default;
mod syntax; mod syntax;
mod utils; mod utils;
use {
bumpalo::Bump,
std::io::{stdin, Read},
utils::default,
};
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut buf = default(); let mut buf = default();
stdin().read_to_string(&mut buf)?; stdin().read_to_string(&mut buf)?;
let lexer = syntax::token::Token::lexer_with_extras(&buf, default());
let arena = Bump::new(); let arena = Bump::new();
syntax::parser::parse_lexer(lexer, &arena); syntax::parser::parse_lexer(Token::lexer(&buf), &arena);
Ok(()) Ok(())
} }

View file

@ -1,4 +1,4 @@
use {super::token::IntLit, chumsky::span::SimpleSpan, lasso::Spur}; use {chumsky::span::SimpleSpan, lasso::Spur};
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Spanned<T> { pub struct Spanned<T> {
@ -8,9 +8,16 @@ pub struct Spanned<T> {
impl<T> Spanned<T> { impl<T> Spanned<T> {
#[inline] #[inline]
pub fn new(item: T, span: SimpleSpan) -> Self { pub const fn new(item: T, span: SimpleSpan) -> Self {
Self { item, span } Self { item, span }
} }
pub fn map<U>(self, mut f: impl FnMut(T) -> U) -> Spanned<U> {
Spanned {
item: f(self.item),
span: self.span,
}
}
} }
pub type SpanExpr<'a> = Spanned<Expr<'a>>; pub type SpanExpr<'a> = Spanned<Expr<'a>>;
@ -24,60 +31,82 @@ pub enum Expr<'a> {
Path(ExprList<'a>), Path(ExprList<'a>),
Literal(Literal), Literal(Literal),
Call(ExprRef<'a>, ExprList<'a>), Call(ExprRef<'a>, ExprList<'a>),
Binary(Spanned<BinaryOperator>, ExprRef<'a>, ExprRef<'a>), Binary(Spanned<BinaryOp>, ExprRef<'a>, ExprRef<'a>),
Unary(Spanned<UnaryOperator>, ExprRef<'a>), Unary(Spanned<UnaryOp>, ExprRef<'a>),
BindLocal(Spanned<Pattern>, ExprRef<'a>, Option<ExprList<'a>>),
BindIn(
Spanned<Pattern>,
ExprRef<'a>,
ExprList<'a>,
Option<ExprList<'a>>,
),
Set(ExprRef<'a>, ExprRef<'a>), Set(ExprRef<'a>, ExprRef<'a>),
Match(ExprRef<'a>, &'a [(Spanned<Pattern>, SpanExpr<'a>)]), Loop(ExprList<'a>),
Func(&'a [(Spanned<Pattern>, Spanned<Type>)], Spanned<Type>, ExprRef<'a>),
Block(ExprList<'a>), Block(ExprList<'a>),
Unit, Definition(Definition<'a>),
Switch {
on: ExprRef<'a>,
branches: &'a [(Spanned<Switcher>, ExprRef<'a>)],
else_: ExprRef<'a>,
},
CondSwitch {
branches: &'a [(ExprRef<'a>, ExprRef<'a>)],
else_: ExprRef<'a>,
},
Break(Option<ExprRef<'a>>),
Return(Option<ExprRef<'a>>),
Uninit,
Continue,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Definition<'a> {
Binding {
kind: DefKind,
ident: Spanned<Ident>,
ty: Option<Spanned<Type>>,
init: ExprRef<'a>,
},
Func {
name: Spanned<Ident>,
params: &'a [(Spanned<Ident>, Spanned<Type>)],
ret: Spanned<Type>,
body: ExprList<'a>,
},
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Type { pub enum Type {
Ident(Ident), Ident(Ident),
Unit,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Pattern { pub enum Switcher {
Ident(Ident),
Literal(Literal), Literal(Literal),
None, }
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DefKind {
Const,
Var,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Literal { pub enum Literal {
String(Spur), String(Spur),
Integer(IntLit), Int(u64),
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BinaryOperator { pub enum BinaryOp {
Plus, Plus,
Minus, Minus,
Star, Star,
Slash, Slash,
And, Equ,
VLine, Neq,
Lt, Lt,
Gt, Gt,
Equ, LtEq,
Nequ, GtEq,
LtEqu,
GtEqu,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum UnaryOperator { pub enum UnaryOp {
Tilde,
Minus, Minus,
Not,
Star, Star,
} }

View file

@ -1,9 +1,9 @@
use super::ast::Type; use super::ast::{DefKind, Expr, ExprList, Type};
use { use {
super::{ super::{
ast::{BinaryOperator, Expr, Literal, Pattern, SpanExpr, Spanned, UnaryOperator}, ast::{Definition, Ident, SpanExpr, Spanned},
token::Token, token::{Token, T},
}, },
crate::utils::Pipe, crate::utils::Pipe,
bumpalo::Bump, bumpalo::Bump,
@ -23,195 +23,70 @@ macro_rules! equivmap {
}; };
} }
fn expr<'a, I>() -> impl Parser<'a, I, SpanExpr<'a>, Extra<'a>> + Clone fn ident<'a, I>() -> impl Parser<'a, I, Spanned<Ident>, Extra<'a>> + Clone + Copy
where where
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
{ {
recursive(|expr| { select!(Token::Ident(id) => id).map_with_span(Spanned::new)
let ident = select!(Token::Ident(id) => id);
let literal = select! {
Token::Int(a) => Literal::Integer(a),
Token::String(a) => Literal::String(a)
};
let pattern = select! {
Token::Ident(id) => Pattern::Ident(id),
Token::Underscore => Pattern::None,
} }
.or(literal.map(Pattern::Literal))
.map_with_span(Spanned::new);
let type_ = just([Token::LeftParen, Token::RightParen]) fn ty<'a, I>() -> impl Parser<'a, I, Spanned<Type>, Extra<'a>> + Clone + Copy
.to(Type::Unit) where
.or(ident.map(Type::Ident)) I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
.map_with_span(Spanned::new); {
ident().map(|i| i.map(Type::Ident))
}
let block = expr fn definition<'a, I>() -> impl Parser<'a, I, Spanned<Definition<'a>>, Extra<'a>> + Clone
.clone() where
.separated_by(just(Token::Semicolon)) I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
.allow_trailing() {
.pipe(arena_collect) let ident = ident();
.delimited_by(just(Token::LeftCurly), just(Token::RightCurly)); let ty = ty();
let func = just(Token::Func) let func = just(T!["func"])
.ignore_then( .ignore_then(ident)
pattern
.then_ignore(just(Token::Colon))
.then(type_)
.separated_by(just(Token::Comma))
.allow_trailing()
.pipe(arena_collect)
.delimited_by(just(Token::LeftParen), just(Token::RightParen)),
)
.then_ignore(just(Token::Colon))
.then(type_)
.then( .then(
just(Token::Equ) ident
.ignore_then(expr.clone()) .then_ignore(just(T![":"]))
.or(block.clone().map(Expr::Block).map_with_span(Spanned::new)), .then(ty)
) .separated_by(just(T![","]))
.map_with_state(|((params, ret), expr), _, state| {
Expr::Func(params, ret, state.arena.alloc(expr))
});
let atom = literal
.map(Expr::Literal)
.or(just([Token::LeftParen, Token::RightParen]).to(Expr::Unit))
.or(ident.map(Expr::Ident))
.or(func)
.map_with_span(Spanned::new)
.or(expr
.clone()
.delimited_by(just(Token::LeftParen), just(Token::RightParen)));
// <expr>(expr1, expr2, …)
let call = atom.clone().foldl_with_state(
expr.clone()
.separated_by(just(Token::Comma))
.allow_trailing() .allow_trailing()
.pipe(arena_collect) .pipe(arena_collect)
.delimited_by(just(Token::LeftParen), just(Token::RightParen)) .delimited_by(just(T!["("]), just(T![")"])),
.map_with_span(Spanned::new)
.repeated(),
|expr, paramlist, state: &mut State| {
Spanned::new(
Expr::Call(state.arena.alloc(expr), paramlist.item),
merge_spans(expr.span, paramlist.span),
) )
.then_ignore(just(T![""]))
.then(ty)
.then(just([T!["{"], T!["}"]]))
.map_with_state(
|(((name, params), ret), _body), _, state| Definition::Func {
name,
params,
ret,
body: state.arena.alloc_slice_copy(&[]),
}, },
); );
let path = call let binding = equivmap!(Token, DefKind, [Const, Var])
.clone() .then(ident)
.map_with_state(|item, _, state| bumpalo::vec![in state.arena; item]) .then(just(T![":"]).ignore_then(ty).or_not())
.foldl(
just(Token::Dot).ignore_then(call).repeated(),
|mut v, expr| {
v.push(expr);
v
},
)
.map(|v| Expr::Path(v.into_bump_slice()))
.map_with_span(Spanned::new);
/* let unary = equivmap!(Token, UnaryOperator, [Minus, Tilde])
.map_with_span(Spanned::new)
.repeated()
.foldr_with_state(call, |op, expr, state| {
Spanned::new(
Expr::Unary(op, state.arena.alloc(expr)),
merge_spans(op.span, expr.span),
)
});
*/
let unary = path.foldl_with_state(
just([Token::Dot, Token::Star])
.to(UnaryOperator::Star)
.or(just(Token::Tilde).to(UnaryOperator::Tilde))
.map_with_span(Spanned::new)
.repeated(),
|expr, op, state| {
Spanned::new(
Expr::Unary(op, state.arena.alloc(expr)),
merge_spans(expr.span, op.span),
)
},
);
// <exprL> OP <exprR>
let binary = unary.clone().foldl_with_state(
equivmap!(
Token,
BinaryOperator,
[Plus, Minus, Star, Slash, And, VLine, Lt, Gt, Equ, Nequ, LtEqu, GtEqu],
)
.map_with_span(Spanned::new)
.then(unary)
.repeated(),
|l, (op, r), state: &mut State| {
Spanned::new(
Expr::Binary(op, state.arena.alloc(l), state.arena.alloc(r)),
merge_spans(l.span, r.span),
)
},
);
let bind = {
let start = pattern.then_ignore(just(Token::Colon)).then(expr.clone()); // <pat> := <expr>
let else_ = just(Token::Else).ignore_then(block.clone()).or_not(); // else {…}
// <pat> := <expr> [else {…}]
let local = start.clone().then(else_.clone()).map_with_state(
|((pat, expr), else_), _, state| {
Expr::BindLocal(pat, &*state.arena.alloc(expr), else_)
},
);
// <pat> := <expr> {…} else {…}
let in_ = start.then(block.clone()).then(else_).map_with_state(
|(((pat, expr), block), else_), _, state| {
Expr::BindIn(pat, &*state.arena.alloc(expr), block, else_)
},
);
in_.or(local)
};
// <atom> ← <expr>
let set = atom
.clone()
.then_ignore(just(Token::LArrow))
.then(expr.clone())
.map_with_state(|(place, expr), _, state| {
Expr::Set(state.arena.alloc(place), state.arena.alloc(expr))
});
// <expr>.match { <pat> → <expr>, … }
let match_ = atom
.clone()
.then_ignore(just([Token::Dot, Token::Match]))
.then( .then(
pattern just(T!["="]).ignore_then(
.then_ignore(just(Token::RArrow)) just(T!["uninit"])
.then(expr) .to(Expr::Uninit)
.separated_by(just(Token::Comma)) .map_with_span(Spanned::new),
.allow_trailing() ),
.pipe(arena_collect)
.delimited_by(just(Token::LeftCurly), just(Token::RightCurly)),
) )
.map_with_state(|(expr, branches), _, state| { .map_with_state(
Expr::Match(state.arena.alloc(expr), branches) |(((kind, ident), ty), init), _, state| Definition::Binding {
}); kind,
ident,
ty,
init: state.arena.alloc(init),
},
);
bind.or(set) func.or(binding).map_with_span(Spanned::new)
.or(match_)
.or(block.map(Expr::Block))
.map_with_span(Spanned::new)
.or(binary)
.or(atom)
})
} }
pub struct State<'a> { pub struct State<'a> {
@ -227,8 +102,8 @@ pub fn parse_input<'a>(
) -> ParseResult { ) -> ParseResult {
println!( println!(
"{:?}", "{:?}",
expr() definition()
.separated_by(just(Token::Semicolon)) .separated_by(just(T![";"]))
.allow_trailing() .allow_trailing()
.pipe(arena_collect) .pipe(arena_collect)
.parse_with_state(input, &mut State { arena }) .parse_with_state(input, &mut State { arena })

View file

@ -1,112 +1,139 @@
use lasso::Spur; use lasso::{Rodeo, Spur};
use logos::Lexer; use logos::{Lexer, Logos};
use {lasso::Rodeo, logos::Logos};
#[derive(Default)] #[derive(Default)]
pub struct Lextras { pub struct Extras {
pub interner: Rodeo, pub interner: Rodeo,
} }
#[derive(Copy, Clone, Debug, PartialEq, Eq)] macro_rules! token_def {
pub enum IntLit { (
Signed(i64), unit { $($u_name:ident : $($u_tok:literal),* $(,)?;)* }
Unsigned(u64), keyword { $($kw:tt),* $(,)* }
else { $($e_tt:tt)* }
) => {
literify::literify!(paste::paste! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Extras)]
#[logos(skip r"[ \t\n\f]+")]
#[logos(skip r"\\.*")]
pub enum Token {
$(
$(#[token($u_tok)])*
$u_name,
)*
$(
#[token(~($kw))]
[<$kw:camel>],
)*
$($e_tt)*
} }
#[derive(Logos, Copy, Clone, Debug, PartialEq, Eq)] macro_rules! T {
#[logos(extras = Lextras)] $($(
#[logos(skip r"[ \t\n\f]+")] ($u_tok) => { $crate::syntax::token::Token::$u_name };
#[logos(skip r"-- .*")] )*)*
#[rustfmt::skip]
pub enum Token {
#[token("(")] LeftParen,
#[token(")")] RightParen,
#[token("{")] LeftCurly,
#[token("}")] RightCurly,
#[token(".")] Dot,
#[token(",")] Comma,
#[token(":")] Colon,
#[token(";")] Semicolon,
#[token("_")] Underscore,
#[token("")] //____ $((~($kw)) => { $crate::syntax::token::Token::[<$kw:camel>] };)*
#[token("<-")] LArrow, }
#[token("")] //____ });
#[token("->")] RArrow, };
}
#[token(":>")] Pipe, token_def!(
unit {
LeftParen : "(";
RightParen: ")";
LeftCurly : "{";
RightCurly: "}";
Dot : ".";
Comma : ",";
Colon : ":";
Semicolon : ";";
#[token("+")] Plus, LArrow: "", "<-";
#[token("-")] Minus, RArrow: "", "->";
#[token("*")] Star,
#[token("/")] Slash,
#[token("&")] And,
#[token("|")] VLine,
#[token("~")] Tilde,
#[token("<")] Lt, Plus : "+";
#[token(">")] Gt, Minus : "-";
#[token("=")] Equ, Star : "*";
#[token("") ] //__ Slash : "/";
#[token("/=")] Nequ, Precent: "%";
#[token("") ] //___
#[token("<=")] LtEqu,
#[token("") ] //___,
#[token(">=")] GtEqu,
#[token("match")] Match, Equ : "=";
#[token("else")] Else, Neq : "", "/=";
#[token("loop")] Loop, Lt : "<";
#[token("const")] Const, Gt : ">";
#[token("var")] Var, LtEq : "", "<=";
#[token("func")] Func, GtEq : "", ">=";
// Modules aren't real here ondra just variables with imported functions }
#[token("module")] Module,
keyword { func, var, const, include, switch, loop,
return, break, continue, uninit, asm }
else {
#[regex( #[regex(
r"\p{XID_Start}\p{XID_Continue}*", r"\p{XID_Start}\p{XID_Continue}*",
|l| l.extras.interner.get_or_intern(l.slice()) |l| intern(l, l.slice()),
)] Ident(Spur), )] Ident(Spur),
#[token("»", better_string)]
#[regex( #[regex(
"\"[^\"]*\"", "\"[^\"]*\"",
|l| { |l| {
let slice = l.slice(); let s = l.slice();
l.extras.interner.get_or_intern(&slice[1..slice.len() - 1]) intern(l, &s[1..s.len() - 1])
} },
)] String(Spur), )] String(Spur),
#[regex( #[regex(
"-?[0-9]+", "[0-9]+",
|l| { |l| l.slice().parse::<u64>().ok()
Some(if let Some(slice) = l.slice().strip_prefix('-') { )] Int(u64),
IntLit::Signed(slice.parse::<i64>().ok()?)
} else {
IntLit::Unsigned(l.slice().parse::<u64>().ok()?)
})
}
)] Int(IntLit),
Invalid, Invalid,
} }
);
// For Evy, with love. pub(crate) use T;
fn better_string(lexer: &mut Lexer<Token>) -> Option<Spur> {
let mut count = 1; fn intern(lexer: &mut Lexer<'_, Token>, s: &str) -> Spur {
for (ix, chr) in lexer.remainder().char_indices() { lexer.extras.interner.get_or_intern(s)
match chr {
'«' => count -= 1,
'»' => count += 1,
_ => (),
} }
if count == 0 { #[cfg(test)]
let slice = &lexer.remainder()[..ix]; mod tests {
lexer.bump(ix + '«'.len_utf8()); use super::*;
return Some(lexer.extras.interner.get_or_intern(slice));
#[test]
fn ident() {
let mut lexer = Token::lexer("いえぶる able");
assert_eq!(
lexer.next(),
Some(Ok(Token::Ident(intern(&mut lexer, "いえぶる")))),
);
assert_eq!(
lexer.next(),
Some(Ok(Token::Ident(intern(&mut lexer, "able")))),
);
}
#[test]
fn string() {
let mut lexer = Token::lexer("\"sussy\" \"baka");
assert_eq!(
lexer.next(),
Some(Ok(Token::String(intern(&mut lexer, "sussy")))),
);
assert_eq!(lexer.next(), Some(Err(())),);
}
#[test]
fn symbol_alt() {
let mut lexer = Token::lexer("-> →");
assert_eq!(lexer.next(), lexer.next());
} }
} }
None
}