azur 2023-03-28 20:32:30 +07:00
commit e87c6dccc1
16 changed files with 1734 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

137
Cargo.lock generated Normal file
View File

@ -0,0 +1,137 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "1.0.0-alpha.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "379cdc19530b72a1e76d94a350676eaea1455375533eb38f18dfa712f9996902"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]]
name = "holymer"
version = "0.1.0"
dependencies = [
"ariadne",
"chumsky",
]
[[package]]
name = "libc"
version = "0.2.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "holymer"
version = "0.1.0"
edition = "2021"
[dependencies]
ariadne = "0.2.0"
chumsky = "1.0.0-alpha.3"

7
hh.hlm Normal file
View File

@ -0,0 +1,7 @@
\x -> x // => x : t0
\x -> x + 1 // => x : Int
(\x -> x)(1) // => x : Int
let a : num = 1,
b : num = a + 1,
c : num = b + 1;

19
ret.hlm Normal file
View File

@ -0,0 +1,19 @@
let something: bool = true;
let a: num = 4 in
println(a);
fun foo(x: bool): num = {
fun id(a: T): T = id;
if !id(x) then
return 1;
42
};
fun main(): () = {
let r: num = foo(something);
println(r);
};

2
rust-toolchain.toml Normal file
View File

@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"

229
spec.md Normal file
View File

@ -0,0 +1,229 @@
# Specification
## Syntax
---
### Expressions
- Literals
A literal is a value that is written directly
into the source code.
- Number
An number literal is of type `f64` and can
be expressed with or without a decimal point.
- Examples: `1`, `3.14`, `.5`
```ebnf
Number:
Digits + (maybe '.' + Digits).
(* Optional whole number, e.g. .5 *)
('.' + Digits).
Digits:
one or more of 0..9.
```
- String
A string literal can consist of zero or more
characters enclosed in double quotes (`"`)
- Examples: `"Hello, World"`,
`"They said \"Hi\""`,
`"Foo\nBar"`
```ebnf
String:
'"' + (zero or more of Character) + '"'.
Character:
any character except '"' or '\'.
escape sequences.
```
- Boolean
A boolean literal can be either `true` or
`false`.
```ebnf
Boolean:
'true' or 'false'.
```
- Unit
A unit literal is a value that represents
the absence of a value.
```ebnf
Unit:
'()'.
```
- Identifiers
An identifier is a name that is used to refer
to a variable, function, or other entity.
- Examples: `foo`, `barBaz`, `add2`
```ebnf
Identifier:
(Letter + zero or more of LetterOrDigit) but
not any of Keywords.
Letter:
one of a..z or A..Z.
LetterOrDigit:
Letter or one of 0..9.
```
- Operators
An operator is a symbol that is used to
represent an operation.
```ebnf
Binary:
one of (
(* Arithmetic *)
+ - * / %
(* Comparison *)
== != < <= > >=
(* Logical *)
&& ||
).
Unary:
one of (- !).
```
- Application (Function Call)
An application is an expression that calls a
function with a list of arguments.
It is not necessary that the callee is a
function, but it must be an expression that
evaluates to a function.
```ebnf
Arguments:
zero or more of Expression delimited by ','.
Application:
Expression + '(' + Arguments + ')'.
```
- Examples:
```rust
foo(1, 2, 3)
(\x -> x + 1)(2)
```
- If-Else
An if-else expression is an expression that
evaluates to one of two expressions depending
on the value of a condition.
```ebnf
IfElse:
'if' + Expression + 'then' + Expression + 'else' + Expression.
```
- Examples:
```rust
if true then 1 else 2
if 1 == 2 then "foo" else "bar"
```
- Let Binding(s)
There are 2 types of let bindings:
- "Imperative" let bindings, which are
similar to variable declarations in
imperative languages (Javascript, Rust, etc.).
```ebnf
Bindings:
one or more of Binding delimited by ','.
Let:
'let' + Bindings.
```
- Example:
```rust
let x = 1 // -> ()
x + 1 // -> 2
```
- "Functional" let bindings, which are
similar to variable declarations in
functional languages (ML-family, etc.).
```ebnf
LetIn:
'let' + Bindings + 'in' + Expression.
```
- Example:
```rust
let x = 1, y = 2 in
x + y // -> 3
```
- Block & Return
A block is a sequence of expressions that are
evaluated in order and the value of the last
expression is returned (if not ended with a
semicolon).
A return expression is an expression that
will exit the current block and return the
value of the expression. It is not necessary
to use a return expression in a block, but
it could be useful for early termination.
Any use of a return expression outside of a
block is not allowed.
```ebnf
Block:
'{' + zero or more of Expression + '}'.
Return:
'return' + Expression.
```
- Examples:
```rust
{
let x = 1;
let y = 2;
x + y
}
```
```rust
fun foo(): num = {
if true then
return 1;
let bar = 42;
bar
};
```
### Keywords
Keywords are reserved words that cannot be
used as identifiers. They are used to
represent constructs of the language.
```ebnf
Keywords:
if then else
let fun return
```

82
src/main.rs Normal file
View File

@ -0,0 +1,82 @@
use ariadne::{sources, Color, Label, Report, ReportKind};
use chumsky::{Parser, prelude::Input};
use self::{parse::parser::{lexer, exprs_parser}, typing::check::check};
pub mod parse;
pub mod typing;
fn main() {
let src = "
(\\x : num, y : num, z : num -> x)()
".to_string();
let filename = "?".to_string();
let (ts, errs) = lexer().parse(&src).into_output_errors();
let parse_errs = if let Some(tokens) = &ts {
let (ast, parse_errs) = exprs_parser()
.map_with_span(|ast, span| (ast, span))
.parse(tokens.as_slice().spanned((src.len()..src.len()).into()))
.into_output_errors();
if let Some(ast) = ast.filter(|_| errs.len() + parse_errs.len() == 0) {
match check(ast.0) {
Ok(tast) => println!("{:?}", tast),
Err(ty_err) => {
let mut r = Report::build(ReportKind::Error, filename.clone(), ty_err.loc.start)
.with_message(ty_err.msg)
.with_label(Label::new((filename.clone(), ty_err.loc.into_range()))
.with_message(match ty_err.note {
Some(note) => note,
None => "while type checking this expression".to_string(),
})
.with_color(Color::Red)
);
if let Some((hint, loc)) = ty_err.hint {
r = r.with_label(Label::new((filename.clone(), loc.into_range()))
.with_message(hint)
.with_color(Color::Yellow),
);
}
r.finish()
.print(sources([(
filename.clone(),
src.clone(),
)]))
.unwrap();
}
}
}
parse_errs
} else {
Vec::new()
};
errs.into_iter()
.map(|e| e.map_token(|c| c.to_string()))
.chain(
parse_errs
.into_iter()
.map(|e| e.map_token(|tok| tok.to_string())),
)
.for_each(|e| {
Report::build(ReportKind::Error, filename.clone(), e.span().start)
.with_message(e.to_string())
.with_label(
Label::new((filename.clone(), e.span().into_range()))
.with_message(e.reason().to_string())
.with_color(Color::Red),
)
// .with_labels(e.contexts().map(|(label, span)| {
// Label::new((filename.clone(), span.into_range()))
// .with_message(format!("while parsing this {}", label))
// .with_color(Color::Yellow)
// }))
.finish()
.print(sources([(filename.clone(), src.clone())]))
.unwrap()
});
}

35
src/parse/mod.rs Normal file
View File

@ -0,0 +1,35 @@
pub mod parser;
#[cfg(test)]
mod tests {
use chumsky::prelude::*;
use super::parser::*;
#[test]
fn simple() {
let src = "let x = 1 + (), y = foo in x + !(y)";
let (ts, errs) = lexer().parse(src).into_output_errors();
assert!(errs.is_empty());
assert_eq!(ts, Some(vec![
(Token::Let, Span::new(0, 3)),
(Token::Ident("x"), Span::new(4, 5)),
(Token::Assign, Span::new(6, 7)),
(Token::Num(1.0), Span::new(8, 9)),
(Token::Add, Span::new(10, 11)),
(Token::Unit, Span::new(12, 14)),
(Token::Comma, Span::new(14, 15)),
(Token::Ident("y"), Span::new(16, 17)),
(Token::Assign, Span::new(18, 19)),
(Token::Ident("foo"), Span::new(20, 23)),
(Token::In, Span::new(24, 26)),
(Token::Ident("x"), Span::new(27, 28)),
(Token::Add, Span::new(29, 30)),
(Token::Not, Span::new(31, 32)),
(Token::Open(Delim::Paren), Span::new(32, 33)),
(Token::Ident("y"), Span::new(33, 34)),
(Token::Close(Delim::Paren), Span::new(34, 35)),
]));
}
}

462
src/parse/parser.rs Normal file
View File

@ -0,0 +1,462 @@
use std::fmt::{
Display,
Formatter,
self,
};
use chumsky::prelude::*;
use crate::typing::ty::Type;
#[derive(Clone, Debug, PartialEq)]
pub enum Delim { Paren, Brack, Brace }
// The tokens of the language.
// 'src is the lifetime of the source code string.
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
Unit, Bool(bool), Num(f64), Str(&'src str),
Ident(&'src str),
Add, Sub, Mul, Div, Rem,
Eq, Ne, Lt, Gt, Le, Ge,
And, Or, Not,
Assign, Comma, Colon, Semicolon,
Open(Delim), Close(Delim),
Lambda, Arrow,
Let, In, Func, Return, If, Then, Else,
}
impl<'src> Display for Token<'src> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Token::Unit => write!(f, "()"),
Token::Bool(b) => write!(f, "{}", b),
Token::Num(n) => write!(f, "{}", n),
Token::Str(s) => write!(f, "\"{}\"", s),
Token::Ident(s) => write!(f, "{}", s),
Token::Add => write!(f, "+"),
Token::Sub => write!(f, "-"),
Token::Mul => write!(f, "*"),
Token::Div => write!(f, "/"),
Token::Rem => write!(f, "%"),
Token::Eq => write!(f, "=="),
Token::Ne => write!(f, "!="),
Token::Lt => write!(f, "<"),
Token::Gt => write!(f, ">"),
Token::Le => write!(f, "<="),
Token::Ge => write!(f, ">="),
Token::And => write!(f, "&&"),
Token::Or => write!(f, "||"),
Token::Not => write!(f, "!"),
Token::Assign => write!(f, "="),
Token::Comma => write!(f, ","),
Token::Colon => write!(f, ":"),
Token::Semicolon => write!(f, ";"),
Token::Open(d) => write!(f, "{}", match d {
Delim::Paren => "(",
Delim::Brack => "[",
Delim::Brace => "{",
}),
Token::Close(d) => write!(f, "{}", match d {
Delim::Paren => ")",
Delim::Brack => "]",
Delim::Brace => "}",
}),
Token::Lambda => write!(f, "\\"),
Token::Arrow => write!(f, "->"),
Token::Let => write!(f, "let"),
Token::In => write!(f, "in"),
Token::Func => write!(f, "func"),
Token::Return => write!(f, "return"),
Token::If => write!(f, "if"),
Token::Then => write!(f, "then"),
Token::Else => write!(f, "else"),
}
}
}
pub type Span = SimpleSpan<usize>;
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
let num = text::int(10)
.then(just('.').then(text::digits(10)).or_not())
.slice()
.from_str()
.unwrapped()
.map(Token::Num);
let strn = just('"')
.ignore_then(none_of('"').repeated())
.then_ignore(just('"'))
.map_slice(Token::Str);
let word = text::ident().map(|s: &str| match s {
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"unit" => Token::Unit,
"let" => Token::Let,
"in" => Token::In,
"func" => Token::Func,
"return" => Token::Return,
"if" => Token::If,
"then" => Token::Then,
"else" => Token::Else,
_ => Token::Ident(s),
});
let sym = choice((
just("\\").to(Token::Lambda),
just("->").to(Token::Arrow),
just('+').to(Token::Add),
just('-').to(Token::Sub),
just('*').to(Token::Mul),
just('/').to(Token::Div),
just('%').to(Token::Rem),
just("==").to(Token::Eq),
just("!=").to(Token::Ne),
just("<=").to(Token::Le),
just(">=").to(Token::Ge),
just('<').to(Token::Lt),
just('>').to(Token::Gt),
just("&&").to(Token::And),
just("||").to(Token::Or),
just('!').to(Token::Not),
just('=').to(Token::Assign),
just(',').to(Token::Comma),
just(':').to(Token::Colon),
just(';').to(Token::Semicolon),
));
let delim = choice((
just('(').to(Token::Open(Delim::Paren)),
just(')').to(Token::Close(Delim::Paren)),
just('[').to(Token::Open(Delim::Brack)),
just(']').to(Token::Close(Delim::Brack)),
just('{').to(Token::Open(Delim::Brace)),
just('}').to(Token::Close(Delim::Brace)),
));
let token = choice((
num,
strn,
word,
sym,
delim,
));
token
.map_with_span(|tok, span| (tok, span))
.padded()
// If we get an error, skip to the next character and try again.
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}
#[derive(Clone, Debug, PartialEq)]
pub enum Lit<'src> {
Unit,
Bool(bool),
Num(f64),
Str(&'src str),
}
#[derive(Clone, Debug)]
pub enum UnaryOp { Neg, Not }
#[derive(Clone, Debug)]
pub enum BinaryOp {
Add, Sub, Mul, Div, Rem,
And, Or,
Eq, Ne, Lt, Le, Gt, Ge,
}
pub type Spanned<T> = (T, Span);
type Binding<'src> =
(&'src str, Option<Type>, Spanned<Box<Expr<'src>>>);
// Clone is needed for type checking since the type checking
// algorithm is recursive and sometimes consume the AST.
#[derive(Clone, Debug)]
pub enum Expr<'src> {
Lit(Lit<'src>),
Ident(&'src str),
Unary(UnaryOp, Spanned<Box<Self>>),
Binary(BinaryOp, Spanned<Box<Self>>, Spanned<Box<Self>>),
Lambda(Vec<(&'src str, Option<Type>)>, Spanned<Box<Self>>),
Call(Spanned<Box<Self>>, Vec<Spanned<Self>>),
If {
cond: Spanned<Box<Self>>,
t: Spanned<Box<Self>>,
f: Spanned<Box<Self>>,
},
Let {
bindings: Vec<Binding<'src>>,
body: Spanned<Box<Self>>,
},
Assign(Vec<Binding<'src>>),
Block {
exprs: Vec<Spanned<Box<Self>>>,
},
}
// (a, s) -> (Box::new(a), s)
fn boxspan<T>(a: Spanned<T>) -> Spanned<Box<T>> {
(Box::new(a.0), a.1)
}
// Lifetime 'tokens is the lifetime of the token buffer from the lexer.
type ParserInput<'tokens, 'src> =
chumsky::input::SpannedInput<
Token<'src>,
Span,
&'tokens [(Token<'src>, Span)]
>;
pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Spanned<Expr<'src>>,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
recursive(|expr| {
let lit = select! {
Token::Unit => Expr::Lit(Lit::Unit),
Token::Bool(b) => Expr::Lit(Lit::Bool(b)),
Token::Num(n) => Expr::Lit(Lit::Num(n)),
Token::Str(s) => Expr::Lit(Lit::Str(s)),
};
let symbol = select! {
Token::Ident(s) => s,
};
let ident = symbol
.map(Expr::Ident);
let paren_expr = expr.clone()
.delimited_by(
just(Token::Open(Delim::Paren)),
just(Token::Close(Delim::Paren)),
)
.map(|e: Spanned<Expr>| e.0);
let lambda = just(Token::Lambda)
.ignore_then(
(
symbol
.then(
just(Token::Colon)
.ignore_then(type_parser())
.or_not())
)
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
)
.then_ignore(just(Token::Arrow))
.then(expr.clone())
.map(|(args, body)| Expr::Lambda(args, boxspan(body)));
// (ident (: type)?)*
let binds = symbol
.then(
just(Token::Colon)
.ignore_then(type_parser())
.or_not()
)
.then_ignore(just(Token::Assign))
.then(expr.clone())
.map(|((name, ty), expr)| (name, ty, boxspan(expr)))
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>();
let let_or_assign = just(Token::Let)
.ignore_then(binds)
.then(
just(Token::In)
.ignore_then(expr.clone())
.or_not()
)
.map(|(bindings, body)| match body {
Some(body) => Expr::Let { bindings, body: boxspan(body) },
None => Expr::Assign(bindings),
});
let if_ = just(Token::If)
.ignore_then(expr.clone())
.then_ignore(just(Token::Then))
.then(expr.clone())
.then_ignore(just(Token::Else))
.then(expr.clone())
.map(|((cond, t), f)| Expr::If {
cond: boxspan(cond),
t: boxspan(t),
f: boxspan(f)
});
let block = expr.clone()
.map(boxspan)
.separated_by(just(Token::Semicolon))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(
just(Token::Open(Delim::Brace)),
just(Token::Close(Delim::Brace)),
)
.map(|exprs| Expr::Block { exprs });
let atom = lit
.or(ident)
.or(paren_expr)
.or(lambda)
.or(let_or_assign)
.or(if_)
.or(block)
.map_with_span(|e, s| (e, s))
.boxed();
let call = atom
.then(
expr.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(
just(Token::Open(Delim::Paren)),
just(Token::Close(Delim::Paren)),
)
.or_not()
)
.map_with_span(|(f, args), s| match args {
Some(args) => (Expr::Call(boxspan(f), args), s),
None => (f.0, f.1),
});
call
})
}
pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Type,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
recursive(|ty| {
let lit_ty = select! {
Token::Ident("bool") => Type::Bool,
Token::Ident("num") => Type::Num,
Token::Ident("str") => Type::Str,
Token::Unit => Type::Unit,
Token::Ident(s) => Type::Var(s.to_string()),
};
let tys_paren = ty.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(
just(Token::Open(Delim::Paren)),
just(Token::Close(Delim::Paren)),
);
let func = tys_paren.clone()
.then_ignore(just(Token::Arrow))
.then(ty.clone())
.map(|(ta, tr)| Type::Func(ta, Box::new(tr)));
let tuple = tys_paren
.validate(|tys, span, emitter| {
if tys.is_empty() {
emitter.emit(Rich::custom(span,
"Tuple must have at least one element. Use `()` for the unit type."
.to_string()
));
}
tys
})
.map(Type::Tuple);
let array = just(Token::Open(Delim::Brack))
.ignore_then(ty.clone())
.then_ignore(just(Token::Close(Delim::Brack)))
.map(|t| Type::Array(Box::new(t)));
lit_ty
.or(array)
.or(func)
.or(tuple)
})
}
pub fn exprs_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Vec<Spanned<Expr<'src>>>,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
expr_parser()
.separated_by(just(Token::Semicolon))
.allow_trailing()
.collect::<Vec<_>>()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_type_parser() {
let input = "(() -> () -> () -> (num)) -> bool";
let (ts, errs) = lexer().parse(input).into_output_errors();
assert!(ts.is_some());
assert!(errs.is_empty());
if let Some(ts) = ts {
let (ast, parse_errs) = type_parser()
.map_with_span(|ty, span| (ty, span))
.parse(ts.as_slice().spanned((input.len()..input.len()).into()))
.into_output_errors();
println!("{:?}", ast);
println!("{:?}", parse_errs);
}
}
#[test]
fn test_expr_parser_atom() {
let input = "
let id : (A) -> A = (\\x -> x) in {
if false
then id(3.14)
else id(true);
}
";
let (ast, errs) = lexer().parse(input).into_output_errors();
assert!(ast.is_some());
assert!(errs.is_empty());
if let Some(ast) = ast {
let (ast, parse_errs) = expr_parser()
.map_with_span(|ty, span| (ty, span))
.parse(ast.as_slice().spanned((input.len()..input.len()).into()))
.into_output_errors();
println!("{:?}", ast);
println!("{:?}", parse_errs);
}
}
}

195
src/typing/_infer.rs Normal file
View File

@ -0,0 +1,195 @@
use super::{
ty::Type,
typed::TExpr,
};
use crate::{parse::{
ptree::*,
span::*,
}, span};
#[derive(Clone, Debug)]
struct TypeEnv {
bindings: Vec<(String, Type)>,
funcs: Vec<(String, Vec<Type>, Type)>,
}
impl TypeEnv {
fn new() -> Self {
Self {
bindings: Vec::new(),
funcs: Vec::new(),
}
}
fn bind(&mut self, name: String, ty: Type) {
self.bindings.push((name, ty));
}
fn bind_func(&mut self, name: String, args: Vec<Type>, ret_ty: Type) {
self.funcs.push((name, args, ret_ty));
}
fn lookup(&self, name: &str) -> Option<Type> {
self.bindings.iter()
.rev()
.find(|(n, _)| *n == name)
.map(|(_, t)| t.clone())
}
fn lookup_func(&self, name: &str) -> Option<(Vec<Type>, Type)> {
self.funcs.iter()
.rev()
.find(|(n, _, _)| *n == name)
.map(|(_, args, ret_ty)| (args.clone(), ret_ty.clone()))
}
}
struct TypeError {
msg: String,
loc: Span,
}
fn type_expr(env: &mut TypeEnv, expr: Spanned<Expr>) -> Result<Spanned<TExpr>, TypeError> {
match expr.value {
Expr::Lit(lit) => match lit {
Lit::Unit => Ok(span!(TExpr::Lit(Lit::Unit), expr.span)),
Lit::Bool(x) => Ok(span!(TExpr::Lit(Lit::Bool(x)), expr.span)),
Lit::Num(x) => Ok(span!(TExpr::Lit(Lit::Num(x)), expr.span)),
Lit::Str(x) => Ok(span!(TExpr::Lit(Lit::Str(x)), expr.span)),
},
Expr::Ident(name) => {
let ty = env.lookup(&name)
.ok_or(TypeError {
msg: format!("unknown identifier `{}`", name),
loc: expr.span.clone(),
})?;
Ok(span!(TExpr::Ident(name, ty), expr.span))
},
Expr::Unary(op, expr) => {
let span = expr.span.clone();
let texpr = type_expr(env, *expr)?;
let ret_ty = match op {
UnaryOp::Neg => Type::Num,
UnaryOp::Not => Type::Bool,
};
Ok(span!(
TExpr::Unary { op, expr: Box::new(texpr), ret_ty },
span
))
},
Expr::Binary(op, lhs, rhs) => {
let span = lhs.span.clone();
let tlhs = type_expr(env, *lhs)?;
let trhs = type_expr(env, *rhs)?;
let ret_ty = match op {
BinaryOp::Add
| BinaryOp::Sub
| BinaryOp::Mul
| BinaryOp::Div
| BinaryOp::Rem => Type::Num,
BinaryOp::And
| BinaryOp::Or => Type::Bool,
BinaryOp::Eq
| BinaryOp::Ne
| BinaryOp::Lt
| BinaryOp::Le
| BinaryOp::Gt
| BinaryOp::Ge => Type::Bool,
};
Ok(span!(
TExpr::Binary { op, lhs: Box::new(tlhs), rhs: Box::new(trhs), ret_ty },
span
))
},
Expr::Call(func, args) => {
let span = func.span.clone();
match func.value {
Expr::Ident(name) => {
// Get the function's argument and return types
let (arg_tys, ret_ty) = env.lookup_func(&name)
.ok_or(TypeError {
msg: format!("unknown function `{}`", name),
loc: span.clone(),
})?;
// Create a typed identifier
let tfunc = TExpr::Ident(
name,
Type::Func(arg_tys.clone(), Box::new(ret_ty.clone()))
);
// Check that the number of arguments matches
if arg_tys.len() != args.len() {
return Err(TypeError {
msg: format!(
"expected {} arguments, got {}",
arg_tys.len(), args.len()
),
loc: span,
});
}
// Type check the arguments
let mut targs = Vec::new();
for (arg, ty) in args.into_iter().zip(arg_tys) {
let targ = type_expr(env, arg)?;
if targ.value.ty() != &ty {
return Err(TypeError {
msg: format!(
"expected argument of type `{}`, got `{}`",
ty, targ.value.ty()
),
loc: targ.span,
});
}
targs.push(targ);
}
Ok(span!(
TExpr::Call {
func: Box::new(span!(tfunc, span.clone())),
args: targs,
ret_ty
},
span
))
},
Expr::Lambda(args, body) => {
// Create a new type environment
let mut new_env = env.clone();
// Bind the arguments to the new environment and also infer their types
let mut arg_tys = Vec::new();
for (arg, maybe_ty) in args {
let ty = match maybe_ty {
Some(ty) => ty,
None => todo!(), // TODO: infer the type
};
arg_tys.push((arg.clone(), ty.clone()));
env.bind(arg, ty);
}
// Type check the body
let tbody = type_expr(&mut new_env, *body)?;
// Return the typed lambda expression
Ok(span!(
TExpr::Lambda {
params: arg_tys,
body: Box::new(tbody.clone()),
ret_ty: tbody.value.ty().clone(),
},
span
))
},
_ => todo!(),
}
},
_ => todo!(),
}
}

376
src/typing/check.rs Normal file
View File

@ -0,0 +1,376 @@
use crate::parse::parser::{
Span, Spanned,
UnaryOp, BinaryOp, Lit, Expr,
};
use super::{ty::Type, typed::TExpr};
#[derive(Clone, Debug)]
struct TypeEnv<'src> {
bindings: Vec<(&'src str, Type)>,
funcs: Vec<(&'src str, Vec<Type>, Type)>,
}
impl<'src> TypeEnv<'src> {
fn new() -> Self {
Self {
bindings: Vec::new(),
funcs: Vec::new(),
}
}
/// Bind a type to a name.
fn bind(&mut self, name: &'src str, ty: Type) {
self.bindings.push((name, ty));
}
/// Bind a function (parameters and return type) to a name.
fn bind_func(&mut self, name: &'src str, args: Vec<Type>, ret_ty: Type) {
self.funcs.push((name, args, ret_ty));
}
fn lookup(&self, name: &str) -> Option<Type> {
self.bindings.iter()
.rev()
.find(|(n, _)| *n == name)
.map(|(_, t)| t.clone())
}
fn lookup_func(&self, name: &str) -> Option<(Vec<Type>, Type)> {
self.funcs.iter()
.rev()
.find(|(n, _, _)| *n == name)
.map(|(_, args, ret_ty)| (args.clone(), ret_ty.clone()))
}
}
#[derive(Debug)]
pub struct TypeError {
pub msg: String,
pub note: Option<String>,
pub hint: Option<(String, Span)>,
pub loc: Span,
}
impl TypeError {
fn new(msg: String, loc: Span) -> Self {
Self {
msg,
note: None,
hint: None,
loc,
}
}
fn with_note(mut self, note: String) -> Self {
self.note = Some(note);
self
}
fn with_hint(mut self, hint: String, loc: Span) -> Self {
self.hint = Some((hint, loc));
self
}
}
fn type_expr<'src>(
env: &mut TypeEnv<'src>, expr: Spanned<Expr<'src>>
) -> Result<Spanned<TExpr<'src>>, TypeError> {
macro_rules! oks { // Spanned Ok macro.
($e:expr $(,)?) => {
Ok(($e, expr.1))
};
}
macro_rules! unbox { // Unbox a Spanned<Box<T>> into a Spanned<T>.
($e:expr) => {
(*$e.0, $e.1)
};
}
macro_rules! sbox { // Box the first value of a Spanned<T>.
($e:expr) => {
(Box::new($e.0), $e.1)
};
}
match expr.0 {
Expr::Lit(lit) => match lit {
Lit::Unit => oks!(TExpr::Lit(Lit::Unit)),
Lit::Bool(x) => oks!(TExpr::Lit(Lit::Bool(x))),
Lit::Num(x) => oks!(TExpr::Lit(Lit::Num(x))),
Lit::Str(x) => oks!(TExpr::Lit(Lit::Str(x))),
}
Expr::Ident(name) => {
let ty = env.lookup(name)
.ok_or(TypeError::new(format!("unknown identifier `{}`", name), expr.1))?;
oks!(TExpr::Ident(name, ty))
}
Expr::Unary(op, e) => {
let te = type_expr(env, unbox!(e))?;
let ret_ty = match op {
UnaryOp::Neg => Type::Num,
UnaryOp::Not => Type::Bool,
};
oks!(TExpr::Unary {
op,
expr: sbox!(te),
ret_ty,
})
}
Expr::Binary(op, lhs, rhs) => {
let tlhs = type_expr(env, unbox!(lhs))?;
let trhs = type_expr(env, unbox!(rhs))?;
let ret_ty = match op {
BinaryOp::Add
| BinaryOp::Sub
| BinaryOp::Mul
| BinaryOp::Div
| BinaryOp::Rem => Type::Num,
BinaryOp::And
| BinaryOp::Or => Type::Bool,
BinaryOp::Eq
| BinaryOp::Ne
| BinaryOp::Lt
| BinaryOp::Le
| BinaryOp::Gt
| BinaryOp::Ge => Type::Bool,
};
oks!(TExpr::Binary {
op,
lhs: sbox!(tlhs),
rhs: sbox!(trhs),
ret_ty,
})
}
Expr::Lambda(args, body) => {
// Create a new type environment.
let mut new_env = env.clone();
// Bind the arguments to the new environment.
let mut arg_tys = Vec::new();
for (arg, maybe_ty) in args {
let ty = match maybe_ty {
Some(ty) => ty,
None => todo!(), // TODO: infer the type of the argument after type checking the body.
};
arg_tys.push((arg, ty.clone()));
new_env.bind(arg, ty);
}
// Type check the body.
let tbody = type_expr(&mut new_env, unbox!(body))?;
// Return the typed lambda expression.
oks!(TExpr::Lambda {
params: arg_tys,
body: sbox!(tbody.clone()),
ret_ty: tbody.0.ty().clone(),
})
}
Expr::Call(func, cargs) => {
// Get span of the arguments.
let args_span = cargs.iter()
.map(|arg| arg.1.into_range())
.fold(None, |acc: Option<std::ops::Range<usize>>, range| match acc {
Some(acc) => Some(acc.start..range.end),
None => Some(range),
})
.unwrap_or(func.1.end..func.1.end+2);
// Type check the arguments.
let mut targs = Vec::new();
for arg in cargs {
let targ = type_expr(env, arg)?;
targs.push(targ);
}
// Type check the function (callee).
let tfunc = type_expr(env, unbox!(func))?;
// Get the function type of the callee. (if any).
if let Some((param_tys, ret_ty)) = tfunc.0.clone().as_fn() {
// Check if the number of arguments match the number of parameters.
if param_tys.len() != targs.len() {
return Err(TypeError::new(
format!(
"expected {} arguments, got {}",
param_tys.len(),
targs.len(),
),
args_span.into(),
).with_note(format!(
"expected {} arguments",
param_tys.len(),
)).with_hint(
format!(
"this expect arguments of type `{}`",
param_tys.iter().map(|ty| ty.to_string()).collect::<Vec<_>>().join(", ")
),
func.1,
));
}
// Check if the types of the arguments match the types of the parameters.
for (arg, param) in targs.iter().zip(param_tys.iter()) {
if arg.0.ty() != param {
return Err(TypeError::new(
format!(
"expected argument of type `{}`, got `{}`",
param,
arg.0.ty(),
),
arg.1,
).with_note(format!(
"expected argument of type `{}`",
param,
)));
}
}
// Return the typed call expression.
oks!(TExpr::Call {
func: sbox!(tfunc),
args: targs,
ret_ty,
})
} else {
Err(TypeError::new(
format!("expected function, got `{}`", tfunc.0.ty()),
tfunc.1,
))
}
}
Expr::If { cond, t, f } => {
let tcond = type_expr(env, unbox!(cond))?;
let tt = type_expr(env, unbox!(t))?;
let tf = type_expr(env, unbox!(f))?;
// Check if the condition is of type `bool`.
if tcond.0.ty() != &Type::Bool {
return Err(TypeError::new(
format!("expected condition of type `bool`, got `{}`", tcond.0.ty()),
tcond.1,
));
}
// Check if the true and false branches have the same type.
if tt.0.ty() != tf.0.ty() {
return Err(TypeError::new(
format!(
"expected the branches to have the same type, got `{}` and `{}`",
tt.0.ty(),
tf.0.ty(),
),
tf.1,
).with_note(format!(
"expected this branch to be type of `{}`",
tt.0.ty(),
)));
}
oks!(TExpr::If {
cond: sbox!(tcond),
br_ty: tt.0.ty().clone(),
t: sbox!(tt),
f: sbox!(tf),
})
}
Expr::Let { bindings, body } => {
// Create a new type environment.
let mut new_env = env.clone();
// Type check the bindings.
let mut tbindings = Vec::new();
for (name, maybe_ty, expr) in bindings {
let ty = match maybe_ty {
Some(ty) => ty,
None => todo!(), // TODO: infer.
};
let texpr = type_expr(&mut new_env, unbox!(expr))?;
// Check if the type of the binding matches the type of the expression.
if texpr.0.ty() != &ty {
return Err(TypeError::new(
format!(
"expected the binding to be of type `{}`, got `{}`",
ty,
texpr.0.ty(),
),
texpr.1,
).with_note(format!(
"expected this binding to be of type `{}`",
ty,
)));
}
tbindings.push((name, ty.clone(), sbox!(texpr)));
new_env.bind(name, ty);
}
// Type check the body.
let tbody = type_expr(&mut new_env, unbox!(body))?;
// Return the typed let expression.
oks!(TExpr::Let {
bindings: tbindings,
body: sbox!(tbody),
})
}
Expr::Assign(bindings) => {
// Create a new type environment.
let mut new_env = env.clone();
// Type check the bindings.
let mut tbindings = Vec::new();
for (name, maybe_ty, expr) in bindings {
let ty = match maybe_ty {
Some(ty) => ty,
None => todo!(), // TODO: infer.
};
let texpr = type_expr(&mut new_env, unbox!(expr))?;
// Check if the type of the binding matches the type of the expression.
if texpr.0.ty() != &ty {
return Err(TypeError::new(
format!(
"expected the binding to be of type `{}`, got `{}`",
ty,
texpr.0.ty(),
),
texpr.1,
).with_note(format!(
"expected this binding to be of type `{}`",
ty,
)));
}
tbindings.push((name, ty.clone(), sbox!(texpr)));
new_env.bind(name, ty);
}
// Return the typed assign expression.
oks!(TExpr::Assign(tbindings))
}
_ => todo!(),
}
}
pub fn check(es: Vec<Spanned<Expr<'_>>>) -> Result<Vec<Spanned<TExpr<'_>>>, TypeError> {
let mut env = TypeEnv::new();
let mut tes = Vec::new();
for e in es {
let te = type_expr(&mut env, e)?;
tes.push(te);
}
Ok(tes)
}

3
src/typing/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod ty;
pub mod check;
pub mod typed;

38
src/typing/ty.rs Normal file
View File

@ -0,0 +1,38 @@
use std::fmt::{self, Display, Formatter};
// TODO: Introduce lifetime here to reduce cloning.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Type {
Unit, Bool, Num, Str,
Func(Vec<Type>, Box<Type>),
Tuple(Vec<Type>),
Array(Box<Type>),
Var(String),
}
impl Display for Type {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Type::Unit => write!(f, "Unit"),
Type::Bool => write!(f, "Bool"),
Type::Num => write!(f, "Num"),
Type::Str => write!(f, "Str"),
Type::Func(ref args, ref ret) => {
write!(f, "({}", args[0])?;
for arg in &args[1..] {
write!(f, " {}", arg)?;
}
write!(f, ") -> {}", ret)
}
Type::Tuple(ref tys) => {
write!(f, "({}", tys[0])?;
for ty in &tys[1..] {
write!(f, " {}", ty)?;
}
write!(f, ")")
}
Type::Array(ref ty) => write!(f, "[{}]", ty),
Type::Var(ref id) => write!(f, "{}", id),
}
}
}

94
src/typing/typed.rs Normal file
View File

@ -0,0 +1,94 @@
use super::ty::Type;
use crate::parse::parser::{
BinaryOp,
UnaryOp,
Lit,
Spanned,
};
type TypedBinding<'src> =
(&'src str, Type, Spanned<Box<TExpr<'src>>>);
// Typed version of the expression.
#[derive(Clone, Debug)]
pub enum TExpr<'src> {
Lit(Lit<'src>),
Ident(&'src str, Type),
Unary {
op: UnaryOp,
expr: Spanned<Box<Self>>,
ret_ty: Type,
},
Binary {
op: BinaryOp,
lhs: Spanned<Box<Self>>,
rhs: Spanned<Box<Self>>,
ret_ty: Type,
},
Lambda {
params: Vec<(&'src str, Type)>,
body: Spanned<Box<Self>>,
ret_ty: Type,
},
Call {
func: Spanned<Box<Self>>,
args: Vec<Spanned<Self>>,
ret_ty: Type,
},
If {
cond: Spanned<Box<Self>>,
t: Spanned<Box<Self>>,
f: Spanned<Box<Self>>,
br_ty: Type,
},
Let {
bindings: Vec<TypedBinding<'src>>,
body: Spanned<Box<Self>>,
},
Assign(Vec<TypedBinding<'src>>),
Block {
exprs: Vec<Self>,
ret_ty: Type,
},
}
impl<'src> TExpr<'src> {
pub fn ty(&self) -> &Type {
match self {
TExpr::Lit(lit) => match lit {
Lit::Unit => &Type::Unit,
Lit::Bool(_) => &Type::Bool,
Lit::Num(_) => &Type::Num,
Lit::Str(_) => &Type::Str,
},
TExpr::Ident(_, ty) => ty,
TExpr::Unary { ret_ty, .. } => ret_ty,
TExpr::Binary { ret_ty, .. } => ret_ty,
TExpr::Lambda { ret_ty, .. } => ret_ty,
TExpr::Call { ret_ty, .. } => ret_ty,
TExpr::If { br_ty, .. } => br_ty,
// Get the type from the body.
TExpr::Let { body, .. } => body.0.ty(),
// Assignment is always unit.
TExpr::Assign { .. } => &Type::Unit,
// Get the type from the last expression in the block
// if the expression is not ended with a semicolon.
TExpr::Block { ret_ty, .. } => ret_ty,
}
}
pub fn as_fn(self) -> Option<(Vec<Type>, Type)> {
match self {
TExpr::Ident(_, Type::Func(params, ret_ty)) => Some((params, *ret_ty)),
TExpr::Lambda { params, ret_ty, .. } => {
let p = params.into_iter()
.map(|(_, ty)| ty)
.collect();
Some((p, ret_ty))
}
_ => None,
}
}
}

46
test.hlm Normal file
View File

@ -0,0 +1,46 @@
-- Source
fun make_add() : (num, num) -> num =
\a, b -> num = {
println(a + b);
return a + b;
};
let add = make_add();
fun main() = {
let foo = 34;
let bar = 35 in {
let r = add(foo, bar);
match r
| 69 -> println(r);
| 42 -> println("What");
| _ -> println("Unreachable");
};
};
--- Alpha
Convert `match` to `if-else`
Convert `let` to `call`
---
(fun make_add []
(lambda [a b] (do
(println (+ a b))
(+ a b)
)))
(fun add [a b] (+ a b))
(fun main [] (do
(def foo 34)
((lambda [bar] (do
(def r (add foo bar))
(if (= r 69) (println r)
(= r 42) (println "What")
true (println "Unreachable"))
)) bar)
))