Rename stuff, Initialize IRs

tc
azur 2023-04-26 00:50:40 +07:00
parent 019bc88186
commit ca469f096e
15 changed files with 299 additions and 306 deletions

4
Cargo.lock generated
View File

@ -212,6 +212,10 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "ir"
version = "0.1.0"
[[package]]
name = "is-terminal"
version = "0.4.7"

View File

@ -4,4 +4,5 @@ members = [
"bin",
"syntax",
"typing",
"ir",
]

View File

@ -9,3 +9,7 @@ chumsky = "1.0.0-alpha.3"
clap = { version = "4.2.4", features = ["derive"] }
syntax = { path = "../syntax" }
typing = { path = "../typing" }
[[bin]]
name = "hc"
path = "src/main.rs"

8
ir/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "ir"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

190
ir/src/lib.rs Normal file
View File

@ -0,0 +1,190 @@
use std::fmt::{Display, Formatter, Result as FmtResult};
#[derive(Debug, Clone)]
enum IRExpr<'src> {
Int(i64),
Var(&'src str),
Call(&'src str, Vec<Self>),
}
impl Display for IRExpr<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
match self {
IRExpr::Int(x) => write!(f, "{x}"),
IRExpr::Var(x) => write!(f, "{x}"),
IRExpr::Call(name, args) => {
write!(f, "{name}(")?;
for (i, arg) in args.iter().enumerate() {
if i > 0 { write!(f, ", ")?; }
write!(f, "{arg}")?;
}
write!(f, ")")
}
}
}
}
#[derive(Debug, Clone)]
enum IR<'src> {
Define {
name: &'src str,
value: Box<IRExpr<'src>>,
},
IRExpr(IRExpr<'src>),
Block {
id: usize,
body: Vec<Self>,
},
Func {
name: &'src str,
args: Vec<&'src str>,
body: Vec<Self>,
},
}
fn display_ir(ir: &IR, indent: usize) -> String {
let mut s = String::new();
for _ in 0..indent { s.push(' '); }
match ir {
IR::Define { name, value } => s.push_str(&format!("{name} = {value}")),
IR::IRExpr(expr) => s.push_str(&format!("{expr}")),
IR::Block { id, body } => {
s.push_str(&format!("{id}:\n"));
for ir in body {
s.push_str(&display_ir(ir, indent + 4));
s.push_str("\n");
}
},
IR::Func { name, args, body } => {
s.push_str(&format!("{name} "));
for (i, arg) in args.iter().enumerate() {
if i > 0 { s.push_str(" "); }
s.push_str(&format!("{arg}"));
}
s.push_str(":\n");
for ir in body {
s.push_str(&display_ir(ir, indent + 4));
s.push_str("\n");
}
}
}
s
}
impl Display for IR<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
write!(f, "{}", display_ir(self, 0))
}
}
#[cfg(test)]
mod tests {
use super::{
IR::*,
IRExpr::*
};
#[test]
fn test_ir() {
let fns = [
Func {
name: "my_add",
args: vec!["a", "b"],
body: vec![
Block {
id: 0,
body: vec![
Define {
name: "v0",
value: Call(
"add",
vec![
Var("a"),
Var("b"),
]
).into(),
}
]
},
]
},
Func {
name: "factorial",
args: vec!["n"],
body: vec![
Block {
id: 0,
body: vec![
Define {
name: "v0",
value: Call(
"eq",
vec![
Var("n"),
Int(1),
]
).into(),
},
IRExpr(Call(
"jf",
vec![
Var("v0"),
Int(1),
]
)),
IRExpr(Call(
"ret",
vec![
Var("n"),
]
)),
]
},
Block {
id: 1,
body: vec![
Define {
name: "v0",
value: Call(
"isub",
vec![
Var("n"),
Int(1),
]
).into(),
},
Define {
name: "v1",
value: Call(
"call",
vec![
Var("factorial"),
Var("v0"),
]
).into(),
},
Define {
name: "v2",
value: Call(
"imul",
vec![
Var("n"),
Var("v1"),
]
).into(),
},
IRExpr(Call(
"ret",
vec![
Var("v2"),
]
)),
]
},
]
}
];
fns.iter().for_each(|ir| println!("{}", ir));
}
}

View File

@ -1 +0,0 @@
let add = \x : num, y : num -> num = x + y;

229
spec.md
View File

@ -1,229 +0,0 @@
# Specification
## Syntax
---
### Expressions
- Literals
A literal is a value that is written directly
into the source code.
- Number
An number literal is of type `f64` and can
be expressed with or without a decimal point.
- Examples: `1`, `3.14`, `.5`
```ebnf
Number:
Digits + (maybe '.' + Digits).
(* Optional whole number, e.g. .5 *)
('.' + Digits).
Digits:
one or more of 0..9.
```
- String
A string literal can consist of zero or more
characters enclosed in double quotes (`"`)
- Examples: `"Hello, World"`,
`"They said \"Hi\""`,
`"Foo\nBar"`
```ebnf
String:
'"' + (zero or more of Character) + '"'.
Character:
any character except '"' or '\'.
escape sequences.
```
- Boolean
A boolean literal can be either `true` or
`false`.
```ebnf
Boolean:
'true' or 'false'.
```
- Unit
A unit literal is a value that represents
the absence of a value.
```ebnf
Unit:
'()'.
```
- Identifiers
An identifier is a name that is used to refer
to a variable, function, or other entity.
- Examples: `foo`, `barBaz`, `add2`
```ebnf
Identifier:
(Letter + zero or more of LetterOrDigit) but
not any of Keywords.
Letter:
one of a..z or A..Z.
LetterOrDigit:
Letter or one of 0..9.
```
- Operators
An operator is a symbol that is used to
represent an operation.
```ebnf
Binary:
one of (
(* Arithmetic *)
+ - * / %
(* Comparison *)
== != < <= > >=
(* Logical *)
&& ||
).
Unary:
one of (- !).
```
- Application (Function Call)
An application is an expression that calls a
function with a list of arguments.
It is not necessary that the callee is a
function, but it must be an expression that
evaluates to a function.
```ebnf
Arguments:
zero or more of Expression delimited by ','.
Application:
Expression + '(' + Arguments + ')'.
```
- Examples:
```rust
foo(1, 2, 3)
(\x -> x + 1)(2)
```
- If-Else
An if-else expression is an expression that
evaluates to one of two expressions depending
on the value of a condition.
```ebnf
IfElse:
'if' + Expression + 'then' + Expression + 'else' + Expression.
```
- Examples:
```rust
if true then 1 else 2
if 1 == 2 then "foo" else "bar"
```
- Let Binding(s)
There are 2 types of let bindings:
- "Imperative" let bindings, which are
similar to variable declarations in
imperative languages (Javascript, Rust, etc.).
```ebnf
Bindings:
one or more of Binding delimited by ','.
Let:
'let' + Bindings.
```
- Example:
```rust
let x = 1 // -> ()
x + 1 // -> 2
```
- "Functional" let bindings, which are
similar to variable declarations in
functional languages (ML-family, etc.).
```ebnf
LetIn:
'let' + Bindings + 'in' + Expression.
```
- Example:
```rust
let x = 1, y = 2 in
x + y // -> 3
```
- Block & Return
A block is a sequence of expressions that are
evaluated in order and the value of the last
expression is returned (if not ended with a
semicolon).
A return expression is an expression that
will exit the current block and return the
value of the expression. It is not necessary
to use a return expression in a block, but
it could be useful for early termination.
Any use of a return expression outside of a
block is not allowed.
```ebnf
Block:
'{' + zero or more of Expression + '}'.
Return:
'return' + Expression.
```
- Examples:
```rust
{
let x = 1;
let y = 2;
x + y
}
```
```rust
fun foo(): num = {
if true then
return 1;
let bar = 42;
bar
};
```
### Keywords
Keywords are reserved words that cannot be
used as identifiers. They are used to
represent constructs of the language.
```ebnf
Keywords:
if then else
let fun return
```

View File

@ -10,7 +10,7 @@ pub enum Delim { Paren, Brack, Brace }
// 'src is the lifetime of the source code string.
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
Unit, Bool(bool), Num(f64), Str(&'src str),
Unit, Bool(bool), Int(i64), Str(&'src str),
Ident(&'src str),
Add, Sub, Mul, Div, Rem,
@ -29,7 +29,7 @@ impl<'src> Display for Token<'src> {
match self {
Token::Unit => write!(f, "()"),
Token::Bool(b) => write!(f, "{}", b),
Token::Num(n) => write!(f, "{}", n),
Token::Int(n) => write!(f, "{}", n),
Token::Str(s) => write!(f, "\"{}\"", s),
Token::Ident(s) => write!(f, "{}", s),
@ -82,7 +82,7 @@ pub type Span = SimpleSpan<usize>;
pub enum Lit<'src> {
Unit,
Bool(bool),
Num(f64),
Int(i64),
Str(&'src str),
}

View File

@ -1,37 +1,3 @@
pub mod expr;
pub mod parser;
pub mod ty;
#[cfg(test)]
mod tests {
use chumsky::prelude::*;
use super::{ expr::*, parser::* };
#[test]
fn simple() {
let src = "let x = 1 + (), y = foo in x + !(y)";
let (ts, errs) = lexer().parse(src).into_output_errors();
assert!(errs.is_empty());
assert_eq!(ts, Some(vec![
(Token::Let, Span::new(0, 3)),
(Token::Ident("x"), Span::new(4, 5)),
(Token::Assign, Span::new(6, 7)),
(Token::Num(1.0), Span::new(8, 9)),
(Token::Add, Span::new(10, 11)),
(Token::Unit, Span::new(12, 14)),
(Token::Comma, Span::new(14, 15)),
(Token::Ident("y"), Span::new(16, 17)),
(Token::Assign, Span::new(18, 19)),
(Token::Ident("foo"), Span::new(20, 23)),
(Token::In, Span::new(24, 26)),
(Token::Ident("x"), Span::new(27, 28)),
(Token::Add, Span::new(29, 30)),
(Token::Not, Span::new(31, 32)),
(Token::Open(Delim::Paren), Span::new(32, 33)),
(Token::Ident("y"), Span::new(33, 34)),
(Token::Close(Delim::Paren), Span::new(34, 35)),
]));
}
}

View File

@ -3,12 +3,17 @@ use chumsky::prelude::*;
use super::{ expr::*, ty::Type };
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
let num = text::int(10)
.then(just('.').then(text::digits(10)).or_not())
// let num = text::int(10)
// .then(just('.').then(text::digits(10)).or_not())
// .slice()
// .from_str()
// .unwrapped()
// .map(Token::Int);
let int = text::int(10)
.slice()
.from_str()
.unwrapped()
.map(Token::Num);
.map(Token::Int);
let strn = just('"')
.ignore_then(none_of('"').repeated())
@ -31,7 +36,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, e
"false" => Token::Bool(false),
"let" => Token::Let,
"in" => Token::In,
"func" => Token::Func,
"fn" => Token::Func,
"return" => Token::Return,
"if" => Token::If,
"then" => Token::Then,
@ -75,15 +80,20 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, e
));
let token = choice((
num,
int,
strn,
word,
sym,
delim,
));
let comment = just("//")
.then(any().and_is(just('\n').not()).repeated())
.padded();
token
.map_with_span(move |tok, span| (tok, span))
.padded_by(comment.repeated())
.padded()
// If we get an error, skip to the next character and try again.
.recover_with(skip_then_retry_until(any().ignored(), end()))
@ -114,7 +124,7 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
let lit = select! {
Token::Unit => Expr::Lit(Lit::Unit),
Token::Bool(b) => Expr::Lit(Lit::Bool(b)),
Token::Num(n) => Expr::Lit(Lit::Num(n)),
Token::Int(n) => Expr::Lit(Lit::Int(n)),
Token::Str(s) => Expr::Lit(Lit::Str(s)),
};
@ -132,20 +142,25 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
)
.map(|e: Spanned<Expr>| e.0);
// \x : t, y : t -> rt = e
let lambda = just(Token::Lambda)
// func (x t, y t) : rt = e
// func x, y = e
let lambda = just(Token::Func)
.ignore_then(
(
symbol.then(
just(Token::Colon)
.ignore_then(type_parser())
.or_not())
).separated_by(just(Token::Comma))
.allow_trailing()
symbol
.map(|s| (s, None))
.or(symbol
.then(type_parser())
.delimited_by(
just(Token::Open(Delim::Paren)),
just(Token::Close(Delim::Paren)),
)
.map(|(s, t)| (s, Some(t)))
)
.repeated()
.collect::<Vec<_>>()
)
.then(
just(Token::Arrow)
just(Token::Colon)
.ignore_then(type_parser())
.or_not()
)
@ -219,7 +234,8 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
.or(if_)
.or(block)
.map_with_span(|e, s| (e, s))
.boxed();
.boxed()
.labelled("(atomic) expression");
let call = atom
.then(
@ -322,7 +338,7 @@ pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser<
recursive(|ty| {
let lit_ty = select! {
Token::Ident("Bool") => Type::Bool,
Token::Ident("Num") => Type::Num,
Token::Ident("Int") => Type::Int,
Token::Ident("Str") => Type::Str,
// TODO: Support type variables in both the parser and the type checker.
Token::Ident(_) => Type::Var(69),
@ -362,9 +378,11 @@ pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser<
})
.map(Type::Tuple);
let array = just(Token::Open(Delim::Brack))
.ignore_then(ty.clone())
.then_ignore(just(Token::Close(Delim::Brack)))
let array = ty.clone()
.delimited_by(
just(Token::Open(Delim::Brack)),
just(Token::Close(Delim::Brack)),
)
.map(|t| Type::Array(Box::new(t)));
lit_ty

View File

@ -3,7 +3,7 @@ use std::fmt::{self, Display, Formatter};
// TODO: Introduce lifetime here to reduce cloning.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Type {
Unit, Bool, Num, Str,
Unit, Bool, Int, Str,
Var(usize), // This type is only used during type inference.
Func(Vec<Type>, Box<Type>),
Tuple(Vec<Type>),
@ -15,7 +15,7 @@ impl Display for Type {
match *self {
Type::Unit => write!(f, "Unit"),
Type::Bool => write!(f, "Bool"),
Type::Num => write!(f, "Num"),
Type::Int => write!(f, "Int"),
Type::Str => write!(f, "Str"),
Type::Var(id) => write!(f, "{}", itoa(id)),
Type::Func(ref args, ref ret) => {

View File

@ -1 +1,11 @@
let f = \f, g, h, a, b, c, d = ;
let addi = fn x y = x + y;
let factorial = fn x =
if x == 1
then x
else x * factorial(x - 1);
let result = factorial(addi(2, 3));
let println = fn x = ();
println(result);

18
test.ssa Normal file
View File

@ -0,0 +1,18 @@
my_add x y:
0:
v0 = iadd x y
ret v0
factorial x:
0:
v0 = eq x 1
jf v0 1
ret x
1:
v0 = isub x 1
v1 = call factorial v0
v2 = imul x v1
ret v2
v0 = call my_add 2 3
v1 = call factorial v0

4
test2.hlm Normal file
View File

@ -0,0 +1,4 @@
let factorial = fn x =
if x == 1
then x
else x * factorial(x - 1);

View File

@ -68,7 +68,7 @@ impl<'src> Infer<'src> {
fn occurs(&self, i: usize, t: Type) -> bool {
use Type::*;
match t {
Unit | Bool | Num | Str => false,
Unit | Bool | Int | Str => false,
Var(j) => {
if let Some(t) = self.subst(j) {
if t != Var(j) {
@ -92,7 +92,7 @@ impl<'src> Infer<'src> {
// Literal types
(Unit, Unit)
| (Bool, Bool)
| (Num, Num)
| (Int, Int)
| (Str, Str) => Ok(()),
// Variable
@ -298,9 +298,9 @@ impl<'src> Infer<'src> {
self.add_constraint(expected, Type::Bool, span);
ok!(TExpr::Lit(Lit::Bool(b)))
}
Lit::Num(i) => {
self.add_constraint(expected, Type::Num, span);
ok!(TExpr::Lit(Lit::Num(i)))
Lit::Int(i) => {
self.add_constraint(expected, Type::Int, span);
ok!(TExpr::Lit(Lit::Int(i)))
}
Lit::Str(s) => {
self.add_constraint(expected, Type::Str, span);
@ -326,14 +326,14 @@ impl<'src> Infer<'src> {
// The type of the left and right hand side are inferred and
// the expected type is determined by the operator
Expr::Unary(op, e) => match op {
// Numeric operators (Num -> Num)
// Numeric operators (Int -> Int)
UnaryOp::Neg => {
let (te, err) = self.infer(unbox!(e), Type::Num);
self.add_constraint(expected, Type::Num, span);
let (te, err) = self.infer(unbox!(e), Type::Int);
self.add_constraint(expected, Type::Int, span);
(TExpr::Unary {
op,
expr: (Box::new(te), span),
ret_ty: Type::Num,
ret_ty: Type::Int,
}, err)
},
// Boolean operators (Bool -> Bool)
@ -348,22 +348,22 @@ impl<'src> Infer<'src> {
},
}
Expr::Binary(op, lhs, rhs) => match op {
// Numeric operators (Num -> Num -> Num)
// Numeric operators (Int -> Int -> Int)
BinaryOp::Add
| BinaryOp::Sub
| BinaryOp::Mul
| BinaryOp::Div
| BinaryOp::Rem
=> {
let (lt, mut errs0) = self.infer(unbox!(lhs), Type::Num);
let (rt, errs1) = self.infer(unbox!(rhs), Type::Num);
let (lt, mut errs0) = self.infer(unbox!(lhs), Type::Int);
let (rt, errs1) = self.infer(unbox!(rhs), Type::Int);
errs0.extend(errs1);
self.add_constraint(expected, Type::Num, span);
self.add_constraint(expected, Type::Int, span);
(TExpr::Binary {
op,
lhs: (Box::new(lt), lhs.1),
rhs: (Box::new(rt), rhs.1),
ret_ty: Type::Num,
ret_ty: Type::Int,
}, errs0)
},
// Boolean operators (Bool -> Bool -> Bool)
@ -528,8 +528,8 @@ impl<'src> Infer<'src> {
},
Expr::Define { name, ty, value } => {
let ty = ty.unwrap_or(self.fresh());
let (val_ty, errs) = self.infer(unbox!(value), ty.clone());
self.env.insert(name.clone(), ty.clone());
let (val_ty, errs) = self.infer(unbox!(value), ty.clone());
self.constraints.push((expected, Type::Unit, e.1));