mirror of https://github.com/azur1s/bobbylisp.git
459 lines
14 KiB
Rust
459 lines
14 KiB
Rust
use chumsky::prelude::*;
|
|
|
|
use super::{ expr::*, ty::Type };
|
|
|
|
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
|
|
// let num = text::int(10)
|
|
// .then(just('.').then(text::digits(10)).or_not())
|
|
// .slice()
|
|
// .from_str()
|
|
// .unwrapped()
|
|
// .map(Token::Int);
|
|
let int = text::int(10)
|
|
.slice()
|
|
.from_str()
|
|
.unwrapped()
|
|
.map(Token::Int);
|
|
|
|
let strn = just('"')
|
|
.ignore_then(none_of('"').repeated())
|
|
.then_ignore(just('"'))
|
|
.map_slice(Token::Str);
|
|
|
|
fn id_filter<C>(c: &C) -> bool where C: text::Char {
|
|
c.to_char().is_ascii_alphabetic()
|
|
|| "_'".contains(c.to_char())
|
|
}
|
|
let id = any()
|
|
.filter(id_filter)
|
|
.then(any()
|
|
.filter(id_filter)
|
|
.repeated())
|
|
.slice();
|
|
|
|
let word = id.map(|s: &str| match s {
|
|
"true" => Token::Bool(true),
|
|
"false" => Token::Bool(false),
|
|
"let" => Token::Let,
|
|
"in" => Token::In,
|
|
"fun" => Token::Func,
|
|
"return" => Token::Return,
|
|
"if" => Token::If,
|
|
"then" => Token::Then,
|
|
"else" => Token::Else,
|
|
_ => Token::Ident(s),
|
|
});
|
|
|
|
let sym = choice((
|
|
just("()").to(Token::Unit),
|
|
just("\\").to(Token::Lambda),
|
|
just("->").to(Token::Arrow),
|
|
just("|>").to(Token::Pipe),
|
|
|
|
just('+').to(Token::Add),
|
|
just('-').to(Token::Sub),
|
|
just('*').to(Token::Mul),
|
|
just('/').to(Token::Div),
|
|
just('%').to(Token::Rem),
|
|
just("==").to(Token::Eq),
|
|
just("!=").to(Token::Ne),
|
|
just("<=").to(Token::Le),
|
|
just(">=").to(Token::Ge),
|
|
just('<').to(Token::Lt),
|
|
just('>').to(Token::Gt),
|
|
just("&&").to(Token::And),
|
|
just("||").to(Token::Or),
|
|
just('!').to(Token::Not),
|
|
|
|
just('=').to(Token::Assign),
|
|
just(',').to(Token::Comma),
|
|
just(':').to(Token::Colon),
|
|
just(';').to(Token::Semicolon),
|
|
));
|
|
|
|
let delim = choice((
|
|
just('(').to(Token::Open(Delim::Paren)),
|
|
just(')').to(Token::Close(Delim::Paren)),
|
|
just('[').to(Token::Open(Delim::Brack)),
|
|
just(']').to(Token::Close(Delim::Brack)),
|
|
just('{').to(Token::Open(Delim::Brace)),
|
|
just('}').to(Token::Close(Delim::Brace)),
|
|
));
|
|
|
|
let token = choice((
|
|
int,
|
|
strn,
|
|
word,
|
|
sym,
|
|
delim,
|
|
));
|
|
|
|
let comment = just("//")
|
|
.then(any().and_is(just('\n').not()).repeated())
|
|
.padded();
|
|
|
|
token
|
|
.map_with_span(move |tok, span| (tok, span))
|
|
.padded_by(comment.repeated())
|
|
.padded()
|
|
// If we get an error, skip to the next character and try again.
|
|
.recover_with(skip_then_retry_until(any().ignored(), end()))
|
|
.repeated()
|
|
.collect()
|
|
}
|
|
|
|
// (a, s) -> (Box::new(a), s)
|
|
fn boxspan<T>(a: Spanned<T>) -> Spanned<Box<T>> {
|
|
(Box::new(a.0), a.1)
|
|
}
|
|
|
|
// Lifetime 'tokens is the lifetime of the token buffer from the lexer.
|
|
type ParserInput<'tokens, 'src> =
|
|
chumsky::input::SpannedInput<
|
|
Token<'src>,
|
|
Span,
|
|
&'tokens [(Token<'src>, Span)]
|
|
>;
|
|
|
|
pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
|
|
'tokens,
|
|
ParserInput<'tokens, 'src>,
|
|
Spanned<Expr<'src>>,
|
|
extra::Err<Rich<'tokens, Token<'src>, Span>>,
|
|
> + Clone {
|
|
recursive(|expr| {
|
|
let lit = select! {
|
|
Token::Unit => Expr::Lit(Lit::Unit),
|
|
Token::Bool(b) => Expr::Lit(Lit::Bool(b)),
|
|
Token::Int(n) => Expr::Lit(Lit::Int(n)),
|
|
Token::Str(s) => Expr::Lit(Lit::Str(s)),
|
|
};
|
|
|
|
let symbol = select! {
|
|
Token::Ident(s) => s,
|
|
};
|
|
|
|
let ident = symbol
|
|
.map(Expr::Ident);
|
|
|
|
let paren_expr = expr.clone()
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Paren)),
|
|
just(Token::Close(Delim::Paren)),
|
|
)
|
|
.map(|e: Spanned<Expr>| e.0);
|
|
|
|
let lambda = just(Token::Func)
|
|
.ignore_then(
|
|
(symbol
|
|
.then(type_parser().or_not())
|
|
.separated_by(just(Token::Comma))
|
|
.collect::<Vec<_>>()
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Paren)),
|
|
just(Token::Close(Delim::Paren)),
|
|
))
|
|
.or(just(Token::Unit).to(Vec::new()))
|
|
)
|
|
.then(type_parser().or_not())
|
|
.then_ignore(just(Token::Arrow))
|
|
.then(expr.clone())
|
|
.map(|((args, ret), body)| Expr::Lambda(args, ret, boxspan(body)));
|
|
|
|
// ident (: type)?
|
|
let bind = symbol
|
|
.then(
|
|
just(Token::Colon)
|
|
.ignore_then(type_parser())
|
|
.or_not()
|
|
)
|
|
.then_ignore(just(Token::Assign))
|
|
.then(expr.clone())
|
|
.map(|((name, ty), expr)| (name, ty, boxspan(expr)));
|
|
|
|
let let_or_define = just(Token::Let)
|
|
.ignore_then(bind)
|
|
.then(
|
|
just(Token::In)
|
|
.ignore_then(expr.clone())
|
|
.or_not()
|
|
)
|
|
.map(|((name, ty, expr), body)| match body {
|
|
Some(body) => Expr::Let { name, ty, value: expr, body: boxspan(body) },
|
|
None => Expr::Define { name, ty, value: expr },
|
|
});
|
|
|
|
let if_ = just(Token::If)
|
|
.ignore_then(expr.clone())
|
|
.then_ignore(just(Token::Then))
|
|
.then(expr.clone())
|
|
.then_ignore(just(Token::Else))
|
|
.then(expr.clone())
|
|
.map(|((cond, t), f)| Expr::If {
|
|
cond: boxspan(cond),
|
|
t: boxspan(t),
|
|
f: boxspan(f)
|
|
});
|
|
|
|
let block = expr.clone()
|
|
.map(boxspan)
|
|
.then_ignore(just(Token::Semicolon))
|
|
.repeated()
|
|
.collect::<Vec<_>>()
|
|
.then(expr.clone()
|
|
.map(boxspan)
|
|
.or_not())
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Brace)),
|
|
just(Token::Close(Delim::Brace)),
|
|
)
|
|
.map(|(mut exprs, end)| {
|
|
let void = end.is_none();
|
|
if let Some(end) = end {
|
|
exprs.push(end);
|
|
}
|
|
Expr::Block {
|
|
exprs,
|
|
void,
|
|
}
|
|
});
|
|
|
|
let atom = lit
|
|
.or(ident)
|
|
.or(paren_expr)
|
|
.or(lambda)
|
|
.or(let_or_define)
|
|
.or(if_)
|
|
.or(block)
|
|
.map_with_span(|e, s| (e, s))
|
|
.boxed()
|
|
.labelled("(atomic) expression");
|
|
|
|
let call = atom
|
|
.then(
|
|
expr.clone()
|
|
.separated_by(just(Token::Comma))
|
|
.allow_trailing()
|
|
.collect::<Vec<_>>()
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Paren)),
|
|
just(Token::Close(Delim::Paren)),
|
|
)
|
|
.or_not()
|
|
)
|
|
.map_with_span(|(f, args), s| match args {
|
|
Some(args) => (Expr::Call(boxspan(f), args), s),
|
|
None => (f.0, f.1),
|
|
});
|
|
|
|
let op = choice((
|
|
just(Token::Sub).to(UnaryOp::Neg),
|
|
just(Token::Not).to(UnaryOp::Not),
|
|
));
|
|
let unary = op
|
|
.map_with_span(|op, s| (op, s))
|
|
.repeated()
|
|
.foldr(
|
|
call,
|
|
|op, expr| {
|
|
let span = op.1.start..expr.1.end;
|
|
(Expr::Unary(op.0, boxspan(expr)), span.into())
|
|
});
|
|
|
|
let op = choice((
|
|
just(Token::Mul).to(BinaryOp::Mul),
|
|
just(Token::Div).to(BinaryOp::Div),
|
|
just(Token::Rem).to(BinaryOp::Rem),
|
|
));
|
|
let product = unary.clone()
|
|
.foldl(
|
|
op.then(unary).repeated(),
|
|
|a, (op, b)| {
|
|
let span = a.1.start..b.1.end;
|
|
(Expr::Binary(op, boxspan(a), boxspan(b)), span.into())
|
|
}
|
|
);
|
|
|
|
let op = choice((
|
|
just(Token::Add).to(BinaryOp::Add),
|
|
just(Token::Sub).to(BinaryOp::Sub),
|
|
));
|
|
let sum = product.clone()
|
|
.foldl(
|
|
op.then(product).repeated(),
|
|
|a, (op, b)| {
|
|
let span = a.1.start..b.1.end;
|
|
(Expr::Binary(op, boxspan(a), boxspan(b)), span.into())
|
|
}
|
|
);
|
|
|
|
let op = choice((
|
|
just(Token::Eq).to(BinaryOp::Eq),
|
|
just(Token::Ne).to(BinaryOp::Ne),
|
|
just(Token::Lt).to(BinaryOp::Lt),
|
|
just(Token::Le).to(BinaryOp::Le),
|
|
just(Token::Gt).to(BinaryOp::Gt),
|
|
just(Token::Ge).to(BinaryOp::Ge),
|
|
));
|
|
let comparison = sum.clone()
|
|
.foldl(
|
|
op.then(sum).repeated(),
|
|
|a, (op, b)| {
|
|
let span = a.1.start..b.1.end;
|
|
(Expr::Binary(op, boxspan(a), boxspan(b)), span.into())
|
|
}
|
|
);
|
|
|
|
let op = choice((
|
|
just(Token::And).to(BinaryOp::And),
|
|
just(Token::Or).to(BinaryOp::Or),
|
|
));
|
|
let logical = comparison.clone()
|
|
.foldl(
|
|
op.then(comparison).repeated(),
|
|
|a, (op, b)| {
|
|
let span = a.1.start..b.1.end;
|
|
(Expr::Binary(op, boxspan(a), boxspan(b)), span.into())
|
|
}
|
|
);
|
|
|
|
let pipe = logical.clone()
|
|
.foldl(
|
|
just(Token::Pipe).to(BinaryOp::Pipe)
|
|
.then(logical).repeated(),
|
|
|a, (op, b)| {
|
|
let span = a.1.start..b.1.end;
|
|
(Expr::Binary(op, boxspan(a), boxspan(b)), span.into())
|
|
}
|
|
);
|
|
|
|
pipe
|
|
.labelled("expression")
|
|
})
|
|
}
|
|
|
|
pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser<
|
|
'tokens,
|
|
ParserInput<'tokens, 'src>,
|
|
Type,
|
|
extra::Err<Rich<'tokens, Token<'src>, Span>>,
|
|
> + Clone {
|
|
recursive(|ty| {
|
|
let lit_ty = select! {
|
|
Token::Ident("Bool") => Type::Bool,
|
|
Token::Ident("Int") => Type::Int,
|
|
Token::Ident("Str") => Type::Str,
|
|
// TODO: Support type variables in both the parser and the type checker.
|
|
Token::Ident(_) => Type::Var(69),
|
|
Token::Unit => Type::Unit,
|
|
}.validate(|tys, span, emitter| {
|
|
if let Type::Var(_) = tys {
|
|
emitter.emit(Rich::custom(span,
|
|
"Type variables are not yet supported.".to_string()
|
|
));
|
|
}
|
|
tys
|
|
});
|
|
|
|
let tys_paren = ty.clone()
|
|
.separated_by(just(Token::Comma))
|
|
.allow_trailing()
|
|
.collect::<Vec<_>>()
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Paren)),
|
|
just(Token::Close(Delim::Paren)),
|
|
);
|
|
|
|
let func = tys_paren.clone()
|
|
.then_ignore(just(Token::Arrow))
|
|
.then(ty.clone())
|
|
.map(|(ta, tr)| Type::Func(ta, Box::new(tr)));
|
|
|
|
let tuple = tys_paren
|
|
.validate(|tys, span, emitter| {
|
|
if tys.is_empty() {
|
|
emitter.emit(Rich::custom(span,
|
|
"Tuple must have at least one element. Use `()` for the unit type."
|
|
.to_string()
|
|
));
|
|
}
|
|
tys
|
|
})
|
|
.map(Type::Tuple);
|
|
|
|
let array = ty.clone()
|
|
.delimited_by(
|
|
just(Token::Open(Delim::Brack)),
|
|
just(Token::Close(Delim::Brack)),
|
|
)
|
|
.map(|t| Type::Array(Box::new(t)));
|
|
|
|
lit_ty
|
|
.or(array)
|
|
.or(func)
|
|
.or(tuple)
|
|
.boxed()
|
|
.labelled("type")
|
|
})
|
|
}
|
|
|
|
pub fn exprs_parser<'tokens, 'src: 'tokens>() -> impl Parser<
|
|
'tokens,
|
|
ParserInput<'tokens, 'src>,
|
|
Vec<Spanned<Expr<'src>>>,
|
|
extra::Err<Rich<'tokens, Token<'src>, Span>>,
|
|
> + Clone {
|
|
expr_parser()
|
|
.separated_by(just(Token::Semicolon))
|
|
.allow_trailing()
|
|
.collect::<Vec<_>>()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_type_parser() {
|
|
let input = "(() -> () -> () -> (num)) -> bool";
|
|
let (ts, errs) = lexer().parse(input).into_output_errors();
|
|
|
|
assert!(ts.is_some());
|
|
assert!(errs.is_empty());
|
|
|
|
if let Some(ts) = ts {
|
|
let (ast, parse_errs) = type_parser()
|
|
.map_with_span(|ty, span| (ty, span))
|
|
.parse(ts.as_slice().spanned((input.len()..input.len()).into()))
|
|
.into_output_errors();
|
|
|
|
println!("{:?}", ast);
|
|
println!("{:?}", parse_errs);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_expr_parser_atom() {
|
|
let input = "
|
|
let id : (A) -> A = (\\x -> x) in {
|
|
if false
|
|
then id(3.14)
|
|
else id(true);
|
|
}
|
|
";
|
|
let (ast, errs) = lexer().parse(input).into_output_errors();
|
|
|
|
assert!(ast.is_some());
|
|
assert!(errs.is_empty());
|
|
|
|
if let Some(ast) = ast {
|
|
let (ast, parse_errs) = expr_parser()
|
|
.map_with_span(|ty, span| (ty, span))
|
|
.parse(ast.as_slice().spanned((input.len()..input.len()).into()))
|
|
.into_output_errors();
|
|
|
|
println!("{:?}", ast);
|
|
println!("{:?}", parse_errs);
|
|
}
|
|
}
|
|
} |