From ca469f096e97b3601f322c963827ae57ee64f4f6 Mon Sep 17 00:00:00 2001 From: azur Date: Wed, 26 Apr 2023 00:50:40 +0700 Subject: [PATCH] Rename stuff, Initialize IRs --- Cargo.lock | 4 + Cargo.toml | 1 + bin/Cargo.toml | 4 + ir/Cargo.toml | 8 ++ ir/src/lib.rs | 190 +++++++++++++++++++++++++++++++++++ sketch.hlm | 1 - spec.md | 229 ------------------------------------------- syntax/src/expr.rs | 6 +- syntax/src/lib.rs | 34 ------- syntax/src/parser.rs | 60 ++++++++---- syntax/src/ty.rs | 4 +- test.hlm | 12 ++- test.ssa | 18 ++++ test2.hlm | 4 + typing/src/infer.rs | 30 +++--- 15 files changed, 299 insertions(+), 306 deletions(-) create mode 100644 ir/Cargo.toml create mode 100644 ir/src/lib.rs delete mode 100644 sketch.hlm delete mode 100644 spec.md create mode 100644 test.ssa create mode 100644 test2.hlm diff --git a/Cargo.lock b/Cargo.lock index 177147d..ade12bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -212,6 +212,10 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ir" +version = "0.1.0" + [[package]] name = "is-terminal" version = "0.4.7" diff --git a/Cargo.toml b/Cargo.toml index 4a07dc3..2384fd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,5 @@ members = [ "bin", "syntax", "typing", + "ir", ] \ No newline at end of file diff --git a/bin/Cargo.toml b/bin/Cargo.toml index a02598e..a8ed189 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -9,3 +9,7 @@ chumsky = "1.0.0-alpha.3" clap = { version = "4.2.4", features = ["derive"] } syntax = { path = "../syntax" } typing = { path = "../typing" } + +[[bin]] +name = "hc" +path = "src/main.rs" \ No newline at end of file diff --git a/ir/Cargo.toml b/ir/Cargo.toml new file mode 100644 index 0000000..d3ea0de --- /dev/null +++ b/ir/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "ir" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/ir/src/lib.rs b/ir/src/lib.rs new file mode 100644 index 0000000..3af472c --- /dev/null +++ b/ir/src/lib.rs @@ -0,0 +1,190 @@ +use std::fmt::{Display, Formatter, Result as FmtResult}; + +#[derive(Debug, Clone)] +enum IRExpr<'src> { + Int(i64), + Var(&'src str), + Call(&'src str, Vec), +} + +impl Display for IRExpr<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + match self { + IRExpr::Int(x) => write!(f, "{x}"), + IRExpr::Var(x) => write!(f, "{x}"), + IRExpr::Call(name, args) => { + write!(f, "{name}(")?; + for (i, arg) in args.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{arg}")?; + } + write!(f, ")") + } + } + } +} + +#[derive(Debug, Clone)] +enum IR<'src> { + Define { + name: &'src str, + value: Box>, + }, + IRExpr(IRExpr<'src>), + Block { + id: usize, + body: Vec, + }, + Func { + name: &'src str, + args: Vec<&'src str>, + body: Vec, + }, +} + +fn display_ir(ir: &IR, indent: usize) -> String { + let mut s = String::new(); + for _ in 0..indent { s.push(' '); } + match ir { + IR::Define { name, value } => s.push_str(&format!("{name} = {value}")), + IR::IRExpr(expr) => s.push_str(&format!("{expr}")), + IR::Block { id, body } => { + s.push_str(&format!("{id}:\n")); + for ir in body { + s.push_str(&display_ir(ir, indent + 4)); + s.push_str("\n"); + } + }, + IR::Func { name, args, body } => { + s.push_str(&format!("{name} ")); + for (i, arg) in args.iter().enumerate() { + if i > 0 { s.push_str(" "); } + s.push_str(&format!("{arg}")); + } + s.push_str(":\n"); + for ir in body { + s.push_str(&display_ir(ir, indent + 4)); + s.push_str("\n"); + } + } + } + s +} + +impl Display for IR<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "{}", display_ir(self, 0)) + } +} + +#[cfg(test)] +mod tests { + use super::{ + IR::*, + IRExpr::* + }; + + #[test] + fn test_ir() { + let fns = [ + Func { + name: "my_add", + args: vec!["a", "b"], + body: vec![ + Block { + id: 0, + body: vec![ + Define { + name: "v0", + value: Call( + "add", + vec![ + Var("a"), + Var("b"), + ] + ).into(), + } + ] + }, + ] + }, + Func { + name: "factorial", + args: vec!["n"], + body: vec![ + Block { + id: 0, + body: vec![ + Define { + name: "v0", + value: Call( + "eq", + vec![ + Var("n"), + Int(1), + ] + ).into(), + }, + IRExpr(Call( + "jf", + vec![ + Var("v0"), + Int(1), + ] + )), + IRExpr(Call( + "ret", + vec![ + Var("n"), + ] + )), + ] + }, + Block { + id: 1, + body: vec![ + Define { + name: "v0", + value: Call( + "isub", + vec![ + Var("n"), + Int(1), + ] + ).into(), + }, + Define { + name: "v1", + value: Call( + "call", + vec![ + Var("factorial"), + Var("v0"), + ] + ).into(), + }, + Define { + name: "v2", + value: Call( + "imul", + vec![ + Var("n"), + Var("v1"), + ] + ).into(), + }, + IRExpr(Call( + "ret", + vec![ + Var("v2"), + ] + )), + ] + }, + ] + } + ]; + + fns.iter().for_each(|ir| println!("{}", ir)); + } +} \ No newline at end of file diff --git a/sketch.hlm b/sketch.hlm deleted file mode 100644 index 9c6cf0c..0000000 --- a/sketch.hlm +++ /dev/null @@ -1 +0,0 @@ -let add = \x : num, y : num -> num = x + y; \ No newline at end of file diff --git a/spec.md b/spec.md deleted file mode 100644 index bf22340..0000000 --- a/spec.md +++ /dev/null @@ -1,229 +0,0 @@ -# Specification - -## Syntax - ---- - -### Expressions - -- Literals - - A literal is a value that is written directly - into the source code. - - - Number - - An number literal is of type `f64` and can - be expressed with or without a decimal point. - - Examples: `1`, `3.14`, `.5` - - ```ebnf - Number: - Digits + (maybe '.' + Digits). - (* Optional whole number, e.g. .5 *) - ('.' + Digits). - Digits: - one or more of 0..9. - ``` - - - String - - A string literal can consist of zero or more - characters enclosed in double quotes (`"`) - - Examples: `"Hello, World"`, - `"They said \"Hi\""`, - `"Foo\nBar"` - - ```ebnf - String: - '"' + (zero or more of Character) + '"'. - Character: - any character except '"' or '\'. - escape sequences. - ``` - - - Boolean - - A boolean literal can be either `true` or - `false`. - - ```ebnf - Boolean: - 'true' or 'false'. - ``` - - - Unit - - A unit literal is a value that represents - the absence of a value. - - ```ebnf - Unit: - '()'. - ``` - -- Identifiers - - An identifier is a name that is used to refer - to a variable, function, or other entity. - - Examples: `foo`, `barBaz`, `add2` - - ```ebnf - Identifier: - (Letter + zero or more of LetterOrDigit) but - not any of Keywords. - Letter: - one of a..z or A..Z. - LetterOrDigit: - Letter or one of 0..9. - ``` - -- Operators - - An operator is a symbol that is used to - represent an operation. - - ```ebnf - Binary: - one of ( - (* Arithmetic *) - + - * / % - (* Comparison *) - == != < <= > >= - (* Logical *) - && || - ). - Unary: - one of (- !). - ``` - -- Application (Function Call) - - An application is an expression that calls a - function with a list of arguments. - It is not necessary that the callee is a - function, but it must be an expression that - evaluates to a function. - - ```ebnf - Arguments: - zero or more of Expression delimited by ','. - Application: - Expression + '(' + Arguments + ')'. - ``` - - - Examples: - - ```rust - foo(1, 2, 3) - (\x -> x + 1)(2) - ``` - -- If-Else - - An if-else expression is an expression that - evaluates to one of two expressions depending - on the value of a condition. - - ```ebnf - IfElse: - 'if' + Expression + 'then' + Expression + 'else' + Expression. - ``` - - - Examples: - - ```rust - if true then 1 else 2 - if 1 == 2 then "foo" else "bar" - ``` - -- Let Binding(s) - - There are 2 types of let bindings: - - "Imperative" let bindings, which are - similar to variable declarations in - imperative languages (Javascript, Rust, etc.). - - ```ebnf - Bindings: - one or more of Binding delimited by ','. - Let: - 'let' + Bindings. - ``` - - - Example: - - ```rust - let x = 1 // -> () - x + 1 // -> 2 - ``` - - - "Functional" let bindings, which are - similar to variable declarations in - functional languages (ML-family, etc.). - - ```ebnf - LetIn: - 'let' + Bindings + 'in' + Expression. - ``` - - - Example: - - ```rust - let x = 1, y = 2 in - x + y // -> 3 - ``` - -- Block & Return - - A block is a sequence of expressions that are - evaluated in order and the value of the last - expression is returned (if not ended with a - semicolon). - - A return expression is an expression that - will exit the current block and return the - value of the expression. It is not necessary - to use a return expression in a block, but - it could be useful for early termination. - - Any use of a return expression outside of a - block is not allowed. - - ```ebnf - Block: - '{' + zero or more of Expression + '}'. - Return: - 'return' + Expression. - ``` - - - Examples: - - ```rust - { - let x = 1; - let y = 2; - x + y - } - ``` - ```rust - fun foo(): num = { - if true then - return 1; - - let bar = 42; - bar - }; - ``` - -### Keywords - -Keywords are reserved words that cannot be -used as identifiers. They are used to -represent constructs of the language. - -```ebnf -Keywords: - if then else - let fun return -``` diff --git a/syntax/src/expr.rs b/syntax/src/expr.rs index 34f6a8d..e311f2c 100644 --- a/syntax/src/expr.rs +++ b/syntax/src/expr.rs @@ -10,7 +10,7 @@ pub enum Delim { Paren, Brack, Brace } // 'src is the lifetime of the source code string. #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { - Unit, Bool(bool), Num(f64), Str(&'src str), + Unit, Bool(bool), Int(i64), Str(&'src str), Ident(&'src str), Add, Sub, Mul, Div, Rem, @@ -29,7 +29,7 @@ impl<'src> Display for Token<'src> { match self { Token::Unit => write!(f, "()"), Token::Bool(b) => write!(f, "{}", b), - Token::Num(n) => write!(f, "{}", n), + Token::Int(n) => write!(f, "{}", n), Token::Str(s) => write!(f, "\"{}\"", s), Token::Ident(s) => write!(f, "{}", s), @@ -82,7 +82,7 @@ pub type Span = SimpleSpan; pub enum Lit<'src> { Unit, Bool(bool), - Num(f64), + Int(i64), Str(&'src str), } diff --git a/syntax/src/lib.rs b/syntax/src/lib.rs index 907b709..c9a032c 100644 --- a/syntax/src/lib.rs +++ b/syntax/src/lib.rs @@ -1,37 +1,3 @@ pub mod expr; pub mod parser; pub mod ty; - -#[cfg(test)] -mod tests { - use chumsky::prelude::*; - use super::{ expr::*, parser::* }; - - #[test] - fn simple() { - let src = "let x = 1 + (), y = foo in x + !(y)"; - - let (ts, errs) = lexer().parse(src).into_output_errors(); - - assert!(errs.is_empty()); - assert_eq!(ts, Some(vec![ - (Token::Let, Span::new(0, 3)), - (Token::Ident("x"), Span::new(4, 5)), - (Token::Assign, Span::new(6, 7)), - (Token::Num(1.0), Span::new(8, 9)), - (Token::Add, Span::new(10, 11)), - (Token::Unit, Span::new(12, 14)), - (Token::Comma, Span::new(14, 15)), - (Token::Ident("y"), Span::new(16, 17)), - (Token::Assign, Span::new(18, 19)), - (Token::Ident("foo"), Span::new(20, 23)), - (Token::In, Span::new(24, 26)), - (Token::Ident("x"), Span::new(27, 28)), - (Token::Add, Span::new(29, 30)), - (Token::Not, Span::new(31, 32)), - (Token::Open(Delim::Paren), Span::new(32, 33)), - (Token::Ident("y"), Span::new(33, 34)), - (Token::Close(Delim::Paren), Span::new(34, 35)), - ])); - } -} \ No newline at end of file diff --git a/syntax/src/parser.rs b/syntax/src/parser.rs index dba1bcd..2b2384c 100644 --- a/syntax/src/parser.rs +++ b/syntax/src/parser.rs @@ -3,12 +3,17 @@ use chumsky::prelude::*; use super::{ expr::*, ty::Type }; pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { - let num = text::int(10) - .then(just('.').then(text::digits(10)).or_not()) + // let num = text::int(10) + // .then(just('.').then(text::digits(10)).or_not()) + // .slice() + // .from_str() + // .unwrapped() + // .map(Token::Int); + let int = text::int(10) .slice() .from_str() .unwrapped() - .map(Token::Num); + .map(Token::Int); let strn = just('"') .ignore_then(none_of('"').repeated()) @@ -31,7 +36,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, e "false" => Token::Bool(false), "let" => Token::Let, "in" => Token::In, - "func" => Token::Func, + "fn" => Token::Func, "return" => Token::Return, "if" => Token::If, "then" => Token::Then, @@ -75,15 +80,20 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, e )); let token = choice(( - num, + int, strn, word, sym, delim, )); + let comment = just("//") + .then(any().and_is(just('\n').not()).repeated()) + .padded(); + token .map_with_span(move |tok, span| (tok, span)) + .padded_by(comment.repeated()) .padded() // If we get an error, skip to the next character and try again. .recover_with(skip_then_retry_until(any().ignored(), end())) @@ -114,7 +124,7 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< let lit = select! { Token::Unit => Expr::Lit(Lit::Unit), Token::Bool(b) => Expr::Lit(Lit::Bool(b)), - Token::Num(n) => Expr::Lit(Lit::Num(n)), + Token::Int(n) => Expr::Lit(Lit::Int(n)), Token::Str(s) => Expr::Lit(Lit::Str(s)), }; @@ -132,20 +142,25 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< ) .map(|e: Spanned| e.0); - // \x : t, y : t -> rt = e - let lambda = just(Token::Lambda) + // func (x t, y t) : rt = e + // func x, y = e + let lambda = just(Token::Func) .ignore_then( - ( - symbol.then( - just(Token::Colon) - .ignore_then(type_parser()) - .or_not()) - ).separated_by(just(Token::Comma)) - .allow_trailing() + symbol + .map(|s| (s, None)) + .or(symbol + .then(type_parser()) + .delimited_by( + just(Token::Open(Delim::Paren)), + just(Token::Close(Delim::Paren)), + ) + .map(|(s, t)| (s, Some(t))) + ) + .repeated() .collect::>() ) .then( - just(Token::Arrow) + just(Token::Colon) .ignore_then(type_parser()) .or_not() ) @@ -219,7 +234,8 @@ pub fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< .or(if_) .or(block) .map_with_span(|e, s| (e, s)) - .boxed(); + .boxed() + .labelled("(atomic) expression"); let call = atom .then( @@ -322,7 +338,7 @@ pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser< recursive(|ty| { let lit_ty = select! { Token::Ident("Bool") => Type::Bool, - Token::Ident("Num") => Type::Num, + Token::Ident("Int") => Type::Int, Token::Ident("Str") => Type::Str, // TODO: Support type variables in both the parser and the type checker. Token::Ident(_) => Type::Var(69), @@ -362,9 +378,11 @@ pub fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser< }) .map(Type::Tuple); - let array = just(Token::Open(Delim::Brack)) - .ignore_then(ty.clone()) - .then_ignore(just(Token::Close(Delim::Brack))) + let array = ty.clone() + .delimited_by( + just(Token::Open(Delim::Brack)), + just(Token::Close(Delim::Brack)), + ) .map(|t| Type::Array(Box::new(t))); lit_ty diff --git a/syntax/src/ty.rs b/syntax/src/ty.rs index dace70a..a7300df 100644 --- a/syntax/src/ty.rs +++ b/syntax/src/ty.rs @@ -3,7 +3,7 @@ use std::fmt::{self, Display, Formatter}; // TODO: Introduce lifetime here to reduce cloning. #[derive(Clone, Debug, Eq, PartialEq)] pub enum Type { - Unit, Bool, Num, Str, + Unit, Bool, Int, Str, Var(usize), // This type is only used during type inference. Func(Vec, Box), Tuple(Vec), @@ -15,7 +15,7 @@ impl Display for Type { match *self { Type::Unit => write!(f, "Unit"), Type::Bool => write!(f, "Bool"), - Type::Num => write!(f, "Num"), + Type::Int => write!(f, "Int"), Type::Str => write!(f, "Str"), Type::Var(id) => write!(f, "{}", itoa(id)), Type::Func(ref args, ref ret) => { diff --git a/test.hlm b/test.hlm index 306b566..15b2506 100644 --- a/test.hlm +++ b/test.hlm @@ -1 +1,11 @@ -let f = \f, g, h, a, b, c, d = ; \ No newline at end of file +let addi = fn x y = x + y; + +let factorial = fn x = + if x == 1 + then x + else x * factorial(x - 1); + +let result = factorial(addi(2, 3)); + +let println = fn x = (); +println(result); diff --git a/test.ssa b/test.ssa new file mode 100644 index 0000000..d2acfa6 --- /dev/null +++ b/test.ssa @@ -0,0 +1,18 @@ +my_add x y: + 0: + v0 = iadd x y + ret v0 + +factorial x: + 0: + v0 = eq x 1 + jf v0 1 + ret x + 1: + v0 = isub x 1 + v1 = call factorial v0 + v2 = imul x v1 + ret v2 + +v0 = call my_add 2 3 +v1 = call factorial v0 \ No newline at end of file diff --git a/test2.hlm b/test2.hlm new file mode 100644 index 0000000..d35dac1 --- /dev/null +++ b/test2.hlm @@ -0,0 +1,4 @@ +let factorial = fn x = + if x == 1 + then x + else x * factorial(x - 1); diff --git a/typing/src/infer.rs b/typing/src/infer.rs index 7e3212e..0cdfe7c 100644 --- a/typing/src/infer.rs +++ b/typing/src/infer.rs @@ -68,7 +68,7 @@ impl<'src> Infer<'src> { fn occurs(&self, i: usize, t: Type) -> bool { use Type::*; match t { - Unit | Bool | Num | Str => false, + Unit | Bool | Int | Str => false, Var(j) => { if let Some(t) = self.subst(j) { if t != Var(j) { @@ -92,7 +92,7 @@ impl<'src> Infer<'src> { // Literal types (Unit, Unit) | (Bool, Bool) - | (Num, Num) + | (Int, Int) | (Str, Str) => Ok(()), // Variable @@ -298,9 +298,9 @@ impl<'src> Infer<'src> { self.add_constraint(expected, Type::Bool, span); ok!(TExpr::Lit(Lit::Bool(b))) } - Lit::Num(i) => { - self.add_constraint(expected, Type::Num, span); - ok!(TExpr::Lit(Lit::Num(i))) + Lit::Int(i) => { + self.add_constraint(expected, Type::Int, span); + ok!(TExpr::Lit(Lit::Int(i))) } Lit::Str(s) => { self.add_constraint(expected, Type::Str, span); @@ -326,14 +326,14 @@ impl<'src> Infer<'src> { // The type of the left and right hand side are inferred and // the expected type is determined by the operator Expr::Unary(op, e) => match op { - // Numeric operators (Num -> Num) + // Numeric operators (Int -> Int) UnaryOp::Neg => { - let (te, err) = self.infer(unbox!(e), Type::Num); - self.add_constraint(expected, Type::Num, span); + let (te, err) = self.infer(unbox!(e), Type::Int); + self.add_constraint(expected, Type::Int, span); (TExpr::Unary { op, expr: (Box::new(te), span), - ret_ty: Type::Num, + ret_ty: Type::Int, }, err) }, // Boolean operators (Bool -> Bool) @@ -348,22 +348,22 @@ impl<'src> Infer<'src> { }, } Expr::Binary(op, lhs, rhs) => match op { - // Numeric operators (Num -> Num -> Num) + // Numeric operators (Int -> Int -> Int) BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div | BinaryOp::Rem => { - let (lt, mut errs0) = self.infer(unbox!(lhs), Type::Num); - let (rt, errs1) = self.infer(unbox!(rhs), Type::Num); + let (lt, mut errs0) = self.infer(unbox!(lhs), Type::Int); + let (rt, errs1) = self.infer(unbox!(rhs), Type::Int); errs0.extend(errs1); - self.add_constraint(expected, Type::Num, span); + self.add_constraint(expected, Type::Int, span); (TExpr::Binary { op, lhs: (Box::new(lt), lhs.1), rhs: (Box::new(rt), rhs.1), - ret_ty: Type::Num, + ret_ty: Type::Int, }, errs0) }, // Boolean operators (Bool -> Bool -> Bool) @@ -528,8 +528,8 @@ impl<'src> Infer<'src> { }, Expr::Define { name, ty, value } => { let ty = ty.unwrap_or(self.fresh()); - let (val_ty, errs) = self.infer(unbox!(value), ty.clone()); self.env.insert(name.clone(), ty.clone()); + let (val_ty, errs) = self.infer(unbox!(value), ty.clone()); self.constraints.push((expected, Type::Unit, e.1));