From 291d18a92da7be19b8b8e710f567fe906534ad17 Mon Sep 17 00:00:00 2001 From: azur Date: Wed, 17 May 2023 03:00:38 +0700 Subject: [PATCH] IR pipeline --- Cargo.lock | 10 +++++ Cargo.toml | 1 + bin/Cargo.toml | 1 + bin/src/main.rs | 9 +++- ir/Cargo.toml | 9 ++++ ir/src/lib.rs | 100 +++++++++++++++++++++++++++++++++++++++++++ ref.hlm | 2 + simple.hlm | 7 +-- simple.ir | 28 ++++++++++++ syntax/src/expr.rs | 2 +- syntax/src/parser.rs | 28 ++++-------- test.hlm | 12 +----- test.ssa | 18 -------- ty.hlm | 2 + typing/src/infer.rs | 74 ++++++++++++++++++++++---------- 15 files changed, 227 insertions(+), 76 deletions(-) create mode 100644 ir/Cargo.toml create mode 100644 ir/src/lib.rs create mode 100644 ref.hlm create mode 100644 simple.ir delete mode 100644 test.ssa create mode 100644 ty.hlm diff --git a/Cargo.lock b/Cargo.lock index 177147d..eeb5a5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,7 @@ dependencies = [ "ariadne", "chumsky", "clap", + "ir", "syntax", "typing", ] @@ -212,6 +213,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ir" +version = "0.1.0" +dependencies = [ + "chumsky", + "syntax", + "typing", +] + [[package]] name = "is-terminal" version = "0.4.7" diff --git a/Cargo.toml b/Cargo.toml index 4a07dc3..2384fd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,5 @@ members = [ "bin", "syntax", "typing", + "ir", ] \ No newline at end of file diff --git a/bin/Cargo.toml b/bin/Cargo.toml index a8ed189..9e4c661 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -9,6 +9,7 @@ chumsky = "1.0.0-alpha.3" clap = { version = "4.2.4", features = ["derive"] } syntax = { path = "../syntax" } typing = { path = "../typing" } +ir = { path = "../ir" } [[bin]] name = "hc" diff --git a/bin/src/main.rs b/bin/src/main.rs index f98bdf6..778b065 100644 --- a/bin/src/main.rs +++ b/bin/src/main.rs @@ -1,7 +1,9 @@ use ariadne::{sources, Color, Label, Report, ReportKind}; use chumsky::{Parser, prelude::Input}; + use syntax::parser::{lexer, exprs_parser}; use typing::infer::{infer_exprs, InferErrorKind}; +use ir::Lowerer; pub mod args; @@ -27,6 +29,7 @@ fn main() { // Typecheck if there are no lexing or parsing errors if let Some(ast) = ast.filter(|_| errs.len() + parse_errs.len() == 0) { let (ast, e) = infer_exprs(ast.0); + // If there is an error, print it if !e.is_empty() { e.into_iter() .for_each(|e| { @@ -49,8 +52,12 @@ fn main() { .print(sources([(filename.clone(), src.clone())])) .unwrap() }); + // Else go to the next stage } else { - ast.iter().for_each(|node| println!("{:?}", node.0)); + // ast.iter().for_each(|node| println!("{:?}", node.0)); + let mut l = Lowerer::new(); + let irs = l.lower_texprs(ast); + irs.iter().for_each(|ir| println!("{:?}", ir)); } }; diff --git a/ir/Cargo.toml b/ir/Cargo.toml new file mode 100644 index 0000000..6a69a64 --- /dev/null +++ b/ir/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ir" +version = "0.1.0" +edition = "2021" + +[dependencies] +chumsky = "1.0.0-alpha.3" +syntax = { path = "../syntax" } +typing = { path = "../typing" } \ No newline at end of file diff --git a/ir/src/lib.rs b/ir/src/lib.rs new file mode 100644 index 0000000..6216aaf --- /dev/null +++ b/ir/src/lib.rs @@ -0,0 +1,100 @@ +use chumsky::span::SimpleSpan; +use syntax::expr::{Lit, UnaryOp, BinaryOp}; +use typing::typed::TExpr; + +#[derive(Clone, Debug)] +pub enum IExpr<'src> { + IntPush(i64), + IntAdd, + IntSub, + VarLoad(&'src str), + VarStore(&'src str), + FnPush(Vec), + Call, + Ret, +} + +#[derive(Clone, Debug)] +pub struct Lowerer { +} + +impl Lowerer { + pub fn new() -> Self { + Self {} + } + + fn lower_texpr<'a>(self: &mut Self, e: TExpr<'a>) -> Vec> { + use IExpr::*; + match e { + TExpr::Lit(l) => match l { + Lit::Unit => todo!(), + Lit::Bool(_) => todo!(), + Lit::Int(n) => vec![IntPush(n)], + Lit::Str(_) => todo!(), + } + TExpr::Ident(s) => vec![VarLoad(s)], + TExpr::Unary { op, expr, .. } => { + let mut expr = self.lower_texpr(*expr.0); + expr.push(match op { + UnaryOp::Neg => IntSub, + UnaryOp::Not => todo!(), + }); + expr + } + TExpr::Binary { op, lhs, rhs, .. } if op == BinaryOp::Pipe => { + println!("{lhs:?}"); + println!("{rhs:?}"); + todo!() + } + TExpr::Binary { op, lhs, rhs, .. } => { + let mut lhs = self.lower_texpr(*lhs.0); + let mut rhs = self.lower_texpr(*rhs.0); + lhs.append(&mut rhs); + lhs.push(match op { + BinaryOp::Add => IExpr::IntAdd, + BinaryOp::Sub => IExpr::IntSub, + BinaryOp::Mul => todo!(), + BinaryOp::Div => todo!(), + BinaryOp::Rem => todo!(), + BinaryOp::Eq => todo!(), + BinaryOp::Ne => todo!(), + BinaryOp::Lt => todo!(), + BinaryOp::Gt => todo!(), + BinaryOp::Le => todo!(), + BinaryOp::Ge => todo!(), + BinaryOp::And => todo!(), + BinaryOp::Or => todo!(), + BinaryOp::Pipe => unreachable!(), + }); + lhs + } + + TExpr::Lambda { body, .. } => { + let mut es = self.lower_texpr(*body.0); + es.push(IExpr::Ret); + vec![IExpr::FnPush(es)] + }, + TExpr::Call { func, args } => { + let mut es: Vec = args.into_iter() + .flat_map(|(e, _)| self.lower_texpr(e)) + .collect(); + es.append(&mut self.lower_texpr(*func.0)); + es.push(IExpr::Call); + es + }, + TExpr::Define { name, value, .. } => { + let mut es = self.lower_texpr(*value.0); + es.push(IExpr::VarStore(name)); + es + }, + + e => unimplemented!("{:?}", e) + } + } + + pub fn lower_texprs<'a>(self: &mut Self, e: Vec<(TExpr<'a>, SimpleSpan)>) -> Vec> { + e.into_iter() + .flat_map(|(e, _)| self.lower_texpr(e)) + .collect() + } +} \ No newline at end of file diff --git a/ref.hlm b/ref.hlm new file mode 100644 index 0000000..4539473 --- /dev/null +++ b/ref.hlm @@ -0,0 +1,2 @@ +let by_ref = fn (&x: int) = *x + 1; +let by_value = fn (x: int) = x + 1; \ No newline at end of file diff --git a/simple.hlm b/simple.hlm index 79dadc7..9b197c1 100644 --- a/simple.hlm +++ b/simple.hlm @@ -1,4 +1,5 @@ -let succ = fn x = x + 1; -let add = fn a b = a + b; +let a = fun (x Int, y) Int -> x + y; +let b = fun (x, y) -> x + y; +a(34, 35); -let res = 10 |> succ |> fn a = add(a, 1) |> add; \ No newline at end of file +fun (x Int, y) Int -> x + y; \ No newline at end of file diff --git a/simple.ir b/simple.ir new file mode 100644 index 0000000..3ac4b1e --- /dev/null +++ b/simple.ir @@ -0,0 +1,28 @@ +succ x: + var_load x + int_push 1 + int_add + ret + +add a b: + var_load a + var_load b + int_add + ret + +dedu x: + var_load x + int_push 1 + int_sub + ret + +_lambda_0 a: + var_load a + int_push 1 + call add + ret + +int_push 10 +call succ +call _lambda_0 +call dedu \ No newline at end of file diff --git a/syntax/src/expr.rs b/syntax/src/expr.rs index 22fae5b..71fca1b 100644 --- a/syntax/src/expr.rs +++ b/syntax/src/expr.rs @@ -100,7 +100,7 @@ impl Display for UnaryOp { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum BinaryOp { Add, Sub, Mul, Div, Rem, And, Or, diff --git a/syntax/src/parser.rs b/syntax/src/parser.rs index 0c7bc01..c327ab8 100644 --- a/syntax/src/parser.rs +++ b/syntax/src/parser.rs @@ -36,7 +36,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, e "false" => Token::Bool(false), "let" => Token::Let, "in" => Token::In, - "fn" => Token::Func, + "fun" => Token::Func, "return" => Token::Return, "if" => Token::If, "then" => Token::Then, @@ -143,29 +143,19 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< ) .map(|e: Spanned| e.0); - // func (x t, y t) : rt = e - // func x, y = e let lambda = just(Token::Func) .ignore_then( symbol - .map(|s| (s, None)) - .or(symbol - .then(type_parser()) - .delimited_by( - just(Token::Open(Delim::Paren)), - just(Token::Close(Delim::Paren)), - ) - .map(|(s, t)| (s, Some(t))) - ) - .repeated() + .then(type_parser().or_not()) + .separated_by(just(Token::Comma)) .collect::>() + .delimited_by( + just(Token::Open(Delim::Paren)), + just(Token::Close(Delim::Paren)), + ) ) - .then( - just(Token::Colon) - .ignore_then(type_parser()) - .or_not() - ) - .then_ignore(just(Token::Assign)) + .then(type_parser().or_not()) + .then_ignore(just(Token::Arrow)) .then(expr.clone()) .map(|((args, ret), body)| Expr::Lambda(args, ret, boxspan(body))); diff --git a/test.hlm b/test.hlm index 15b2506..266df6a 100644 --- a/test.hlm +++ b/test.hlm @@ -1,11 +1 @@ -let addi = fn x y = x + y; - -let factorial = fn x = - if x == 1 - then x - else x * factorial(x - 1); - -let result = factorial(addi(2, 3)); - -let println = fn x = (); -println(result); +(fun (x) -> x + 1)(68); \ No newline at end of file diff --git a/test.ssa b/test.ssa deleted file mode 100644 index d2acfa6..0000000 --- a/test.ssa +++ /dev/null @@ -1,18 +0,0 @@ -my_add x y: - 0: - v0 = iadd x y - ret v0 - -factorial x: - 0: - v0 = eq x 1 - jf v0 1 - ret x - 1: - v0 = isub x 1 - v1 = call factorial v0 - v2 = imul x v1 - ret v2 - -v0 = call my_add 2 3 -v1 = call factorial v0 \ No newline at end of file diff --git a/ty.hlm b/ty.hlm new file mode 100644 index 0000000..26ec1e6 --- /dev/null +++ b/ty.hlm @@ -0,0 +1,2 @@ +let f = fn x y z = x + y + z in f(1); +let g = fn x = x + 1; g(1, 2); \ No newline at end of file diff --git a/typing/src/infer.rs b/typing/src/infer.rs index 341cfcc..d07ff95 100644 --- a/typing/src/infer.rs +++ b/typing/src/infer.rs @@ -187,11 +187,23 @@ impl<'src> Infer<'src> { (Func(a1, r1), Func(a2, r2)) => { // Check the number of arguments if a1.len() != a2.len() { - return Err(InferError::new("Argument length mismatch", c.span) + let mut e = InferError::new("Argument length mismatch", c.span) .add_error(format!( - "This function should take {} arguments, found {}", - a1.len(), a2.len() - ), c.span)); + "This function is expected to take {} arguments, found {}", + a2.len(), a1.len() + ), c.span); + if a2.len() > a1.len() { + // Get the types of the needed arguments + let mut args = Vec::new(); + for i in a1.len()..a2.len() { + args.push(self.substitute(a2[i].clone()).to_string()); + } + e = e.add_hint(format!( + "Need arguments of type `{}` to call this function", + args.join(", ") + ), c.span); + } + return Err(e); } // Unify the arguments for (a1, a2) in a1.into_iter().zip(a2.into_iter()) { @@ -231,11 +243,15 @@ impl<'src> Infer<'src> { } /// Solve the constraints by unifying them - fn solve(&mut self) -> Result<(), InferError> { + + fn solve(&mut self) -> Vec { + let mut errors = Vec::new(); for c in self.constraints.clone().into_iter() { - self.unify(c)?; + if let Err(e) = self.unify(c) { + errors.push(e); + } } - Ok(()) + errors } /// Substitute the type variables with the substitutions @@ -394,10 +410,10 @@ impl<'src> Infer<'src> { Type::Func(_, _) => "function", _ => "value", }; - ( - TExpr::Ident(x), - vec![InferError::new(format!("Undefined {}", kind), span)] - ) + (TExpr::Ident(x), vec![ + InferError::new(format!("Undefined {}", kind), span) + .add_error(format!("`{}` is not defined", x), span) + ]) } } @@ -620,6 +636,17 @@ impl<'src> Infer<'src> { let (bt, berrs) = inf.infer(unbox!(body), expected.clone()); errs.extend(berrs); + for s in inf.subst { + if !self.subst.contains(&s) { + self.subst.push(s); + } + } + for c in inf.constraints { + if !self.constraints.contains(&c) { + self.constraints.push(c); + } + } + (TExpr::Let { name, ty, value: (Box::new(vt), value.1), @@ -688,29 +715,30 @@ impl<'src> Infer<'src> { /// Infer a list of expressions pub fn infer_exprs(es: Vec<(Expr, SimpleSpan)>) -> (Vec<(TExpr, SimpleSpan)>, Vec) { let mut inf = Infer::new(); - let mut typed_exprs = vec![]; + // Type expressions + let mut tes = vec![]; + // Unsubstituted typed expressions let mut errors = vec![]; for e in es { let span = e.1; let fresh = inf.fresh(); + // Infer the types let (te, err) = inf.infer(e, fresh); - typed_exprs.push((te, span)); + + // Push the expression to the list + tes.push((te.clone(), span)); + if !err.is_empty() { errors.extend(err); } } - match inf.solve() { - Ok(_) => { - typed_exprs = typed_exprs.into_iter() - .map(|(x, s)| (inf.substitute_texp(x), s)) - .collect(); - } - Err(e) => { - errors.push(e); - } + let solve_errors = inf.solve(); + if !solve_errors.is_empty() { + errors.extend(solve_errors); } - (rename_exprs(typed_exprs), errors) + + (rename_exprs(tes), errors) } \ No newline at end of file