From a98cc8ca277c74a8d6c4d9b1370c2a7c02bcb534 Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Fri, 25 Feb 2022 02:58:50 +0700 Subject: [PATCH] refactor to have IR in compliation --- example/ex.hyc | 14 ++--- src/back/c.rs | 130 ++++++++++++++++++++++++++++----------------- src/front/parse.rs | 85 ++++++++++++----------------- src/main.rs | 12 +++-- src/middle/ir.rs | 107 +++++++++++++++++++++++++++++++++++++ src/middle/mod.rs | 2 + 6 files changed, 241 insertions(+), 109 deletions(-) create mode 100644 src/middle/ir.rs create mode 100644 src/middle/mod.rs diff --git a/example/ex.hyc b/example/ex.hyc index 6d64d19..b19f784 100644 --- a/example/ex.hyc +++ b/example/ex.hyc @@ -1,9 +1,9 @@ -let foo = 1; -let bar = true; -let baz = "qux"; +let foo: int = 1; +let bar: string = "str"; +let baz: bool = true; -if (bar == true) then - print(baz); -else - print("quux"); +fun qux (lhs: int rhs: int) -> int = lhs + rhs; +fun main () -> int = do + puts("Hello, World"); + 0; end; \ No newline at end of file diff --git a/src/back/c.rs b/src/back/c.rs index 02d661c..29c39f4 100644 --- a/src/back/c.rs +++ b/src/back/c.rs @@ -1,14 +1,16 @@ use std::fmt::Display; -use crate::front::parse::Expr; +use crate::middle::ir::{IR, Value}; +#[derive(Debug, Clone)] pub struct Codegen { pub emitted: String, } -const HEADER_INCLUDES: [&str; 2] = [ - "#include ", - "#include ", +const HEADER_INCLUDES: [&str; 3] = [ + "", + "", + "", ]; impl Codegen { @@ -22,63 +24,95 @@ impl Codegen { self.emitted.push_str(&s.to_string()); } - pub fn gen(&mut self, exprs: &[Expr]) { + pub fn gen(&mut self, irs: &[IR]) { for header in HEADER_INCLUDES.iter() { + self.emit("#include "); self.emit(header); + self.emit("\n"); } - self.emit("int main() {"); - for expr in exprs { - self.gen_expr(expr); + for ir in irs { + self.gen_ir(ir); } - self.emit("return 0;"); - self.emit("}"); } - - fn gen_expr(&mut self, expr: &Expr) { - match expr { - Expr::Let { name, value } => { - match &**value { - Expr::Int(i) => self.emit(format!("int {} = {};", name, i)), - Expr::Float(f) => self.emit(format!("double {} = {};", name, f)), - Expr::Boolean(b) => self.emit(format!("bool {} = {};", name, b)), - Expr::String(s) => self.emit(format!("char *{} = \"{}\";", name, s)), - _ => todo!(), - } + + fn gen_ir(&mut self, ir: &IR) { + match ir { + IR::Define { name, type_hint, value } => { + self.emit(format!("{} {} = ", type_hint, name)); + self.gen_ir(value); + self.emit(";\n"); }, - Expr::Call { name, args } => { - match &**name { - Expr::Ident(func) => { - match func.as_str() { - "print" => { - self.emit(format!("printf({});", match &args[0] { - Expr::String(s) => format!("\"{}\"", s), - Expr::Ident(s) => format!("\"%s\", {}", s), - _ => todo!(), - })); - }, - _ => todo!(), + IR::Fun { name, return_type_hint, args, body } => { + let args = args.iter().map(|(name, type_hint)| { + format!("{} {}", type_hint, name) + }).collect::>().join(", "); + self.emit(format!("{} {}({}) {{", return_type_hint, name, args)); + match &**body { + IR::Value { value } => { + self.emit("return "); + self.gen_value(&value); + self.emit(";"); + }, + IR::Do { body } => { + for (i, node) in body.iter().enumerate() { + if i == body.len() - 1 { + self.emit("return "); + }; + self.gen_ir(node); + self.emit(";"); } }, + IR::Binary { op, left, right } => { + self.emit("return "); + self.gen_ir(left); + self.emit(op); + self.gen_ir(right); + self.emit(";"); + }, _ => todo!(), } + self.emit("}\n"); }, - Expr::If { cond, then, else_ } => { - self.emit("if (".to_string()); - self.gen_expr(&cond); - self.emit(") {".to_string()); - self.gen_expr(&then); - self.emit("} else {".to_string()); - self.gen_expr(&else_); - self.emit("}".to_string()); + IR::Call { name, args } => { + match name.as_str() { + "puts" => { + self.emit("printf("); + self.gen_ir(&args[0]); + self.emit(")"); + }, + _ => { + self.emit(format!("{}(", name)); + for (i, arg) in args.iter().enumerate() { + if i != 0 { + self.emit(", "); + } + self.gen_ir(arg); + } + self.emit(")"); + } + } }, - Expr::Binary { left, op, right } => { - self.gen_expr(&left); - self.emit(format!(" {} ", op.to_string())); - self.gen_expr(&right); + IR::Value { value } => { + self.gen_value(value); }, - Expr::Ident(s) => self.emit(s.to_string()), - Expr::Boolean(b) => self.emit(format!("{}", b)), - _ => { println!("{:?}", expr); todo!() }, + IR::Binary { op, left, right } => { + self.gen_ir(left); + self.emit(op); + self.gen_ir(right); + self.emit(";"); + }, + _ => todo!() + } + } + + fn gen_value(&mut self, value: &Value) { + match value { + Value::Int(i) => self.emit(format!("{}", i)), + Value::Float(f) => self.emit(format!("{}", f)), + Value::Double(d) => self.emit(format!("{}", d)), + Value::Bool(b) => self.emit(format!("{}", b)), + Value::String(s) => self.emit(format!("\"{}\"", s)), + Value::Ident(s) => self.emit(format!("{}", s)), } } diff --git a/src/front/parse.rs b/src/front/parse.rs index b9ab89b..6f1c29e 100644 --- a/src/front/parse.rs +++ b/src/front/parse.rs @@ -4,7 +4,7 @@ use chumsky::prelude::*; pub enum Token { // Types Int(i64), Float(String), - Boolean(bool), String(String), + Bool(bool), String(String), Ident(String), // Symbols @@ -13,6 +13,7 @@ pub enum Token { Semicolon, Assign, Colon, Comma, + ReturnHint, // Keywords Import, @@ -38,6 +39,14 @@ pub fn lexer() -> impl Parser, Error = Simple> { .then_ignore(just('"')) .collect::() .map(|s: String| Token::String(s)); + + let symbol = choice(( + just(';').to(Token::Semicolon), + just('=').to(Token::Assign), + just(':').to(Token::Colon), + just(',').to(Token::Comma), + just("->").to(Token::ReturnHint), + )); let operator = choice(( just("+"), @@ -62,16 +71,9 @@ pub fn lexer() -> impl Parser, Error = Simple> { just('}'), )).map(|c| Token::Delimiter(c)); - let symbol = choice(( - just(';').to(Token::Semicolon), - just('=').to(Token::Assign), - just(':').to(Token::Colon), - just(',').to(Token::Comma), - )); - let keyword = text::ident().map(|s: String| match s.as_str() { - "true" => Token::Boolean(true), - "false" => Token::Boolean(false), + "true" => Token::Bool(true), + "false" => Token::Bool(false), "import" => Token::Import, "let" => Token::Let, @@ -87,9 +89,9 @@ pub fn lexer() -> impl Parser, Error = Simple> { let token = int .or(float) .or(string) + .or(symbol) .or(operator) .or(delimiter) - .or(symbol) .or(keyword) .recover_with(skip_then_retry_until([])); @@ -107,7 +109,7 @@ pub fn lexer() -> impl Parser, Error = Simple> { #[derive(Clone, Debug)] pub enum Expr { Int(i64), Float(f64), - Boolean(bool), String(String), + Bool(bool), String(String), Ident(String), Unary { op: String, expr: Box }, @@ -116,11 +118,13 @@ pub enum Expr { Let { name: String, + type_hint: String, value: Box, }, Fun { name: String, - args: Vec, + type_hint: String, + args: Vec<(String, String)>, body: Box, }, @@ -143,7 +147,7 @@ fn expr_parser() -> impl Parser> + Clone { let literal = filter_map(|span, token| match token { Token::Int(i) => Ok(Expr::Int(i)), Token::Float(f) => Ok(Expr::Float(f.parse().unwrap())), - Token::Boolean(b) => Ok(Expr::Boolean(b)), + Token::Bool(b) => Ok(Expr::Bool(b)), Token::String(s) => Ok(Expr::String(s)), _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), }).labelled("literal"); @@ -240,26 +244,39 @@ fn expr_parser() -> impl Parser> + Clone { let declare_var = just(Token::Let) .ignore_then(ident) + .then_ignore(just(Token::Colon)) + .then(ident) .then_ignore(just(Token::Assign)) .then( do_block.clone() .or(decl.clone()) ) - .map(|(name, value)| Expr::Let { + .map(|((name, type_hint), value)| Expr::Let { name, + type_hint, value: Box::new(value), }).labelled("variable"); let declare_fun = just(Token::Fun) .ignore_then(ident) - .then(ident.repeated()) + .then_ignore(just(Token::Delimiter('('))) + .then( + (ident + .then_ignore(just(Token::Colon)) + .then(ident)) + .repeated() + ) + .then_ignore(just(Token::Delimiter(')'))) + .then_ignore(just(Token::ReturnHint)) + .then(ident) .then_ignore(just(Token::Assign)) .then( do_block.clone() .or(decl.clone()) ) - .map(|((name, args), body)| Expr::Fun { + .map(|(((name, args), type_hint), body)| Expr::Fun { name, + type_hint, args, body: Box::new(body), }).labelled("function"); @@ -304,38 +321,4 @@ pub fn parser() -> impl Parser, Error = Simple> + Clone .then_ignore(just(Token::Semicolon)) .repeated() .then_ignore(end()) -} - -impl Expr { - pub fn to_sexpr(&self) -> String { - let mut out = String::new(); - match self { - Self::Int(x) => out.push_str(&x.to_string()), - Self::Float(x) => out.push_str(&x.to_string()), - Self::Boolean(x) => out.push_str(&x.to_string()), - Self::String(x) => out.push_str(&format!("\"{}\"", x)), - Self::Ident(x) => out.push_str(&x), - - Self::Unary{ op, expr } => out.push_str(&format!("({} {})", op, expr.to_sexpr())), - Self::Binary{ op, left, right } => out.push_str( - &format!("({} {} {})", op, left.to_sexpr(), right.to_sexpr()) - ), - Self::Call{ name, args } => out.push_str( - &format!("({} {})", name.to_sexpr(), args.iter().map(|x| x.to_sexpr()).collect::>().join(" "))), - - Self::Let{ name, value } => out.push_str( - &format!("(let {}\n {})", name, value.clone().to_sexpr())), - Self::Fun{ name, args, body } => out.push_str( - &format!("(fun {} ({})\n {})", name, args.join(" "), body.to_sexpr())), - - Self::If { cond, then, else_ } => out.push_str( - &format!("(if {} {} {})", cond.to_sexpr(), then.to_sexpr(), else_.to_sexpr())), - - Self::Do { body } => out.push_str( - &format!("(do {})", body.iter().map(|x| x.to_sexpr()).collect::>().join(" "))), - - _ => todo!(), - } - out - } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 3e269e0..e180fb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,11 @@ use args::{Args, Options}; pub mod front; use front::parse::{lexer, parser}; +/// Middle-end of the language. +/// Contains the intermediate representation. +pub mod middle; +use middle::ir; + /// Back-end of the language. /// Contains code generator. pub mod back; @@ -42,12 +47,13 @@ fn main() { Some(ast) => { let start = time::Instant::now(); - let mut compiler = back::c::Codegen::new(); - compiler.gen(&ast); + let ir = ir::ast_to_ir(&ast); + let mut codegen = back::c::Codegen::new(); + codegen.gen(&ir); let out_file_name = file_name.file_stem().unwrap().to_str().unwrap().to_string() + ".c"; let mut out_file = fs::File::create(&out_file_name).expect("Failed to create file"); - write!(out_file, "{}", compiler.emitted).expect("Failed to write to file"); + write!(out_file, "{}", codegen.emitted).expect("Failed to write to file"); let compile_elapsed = start.elapsed(); diff --git a/src/middle/ir.rs b/src/middle/ir.rs new file mode 100644 index 0000000..fe50f9d --- /dev/null +++ b/src/middle/ir.rs @@ -0,0 +1,107 @@ +use core::fmt; + +use crate::front::parse::Expr; + +#[derive(Debug, Clone)] +pub enum TypeHint { + Int, + Float, Double, + Bool, + String, +} + +impl fmt::Display for TypeHint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TypeHint::Int => write!(f, "int"), + TypeHint::Float => write!(f, "float"), + TypeHint::Double => write!(f, "double"), + TypeHint::Bool => write!(f, "bool"), + TypeHint::String => write!(f, "char*"), + } + } +} + +#[derive(Debug, Clone)] +pub enum Value { + Int(i64), + Float(f32), + Double(f64), + Bool(bool), + String(String), + Ident(String), +} + +#[derive(Debug, Clone)] +pub enum IR { + Define { name: String, type_hint: TypeHint, value: Box }, + Fun { name: String, return_type_hint: TypeHint, args: Vec<(String, TypeHint)>, body: Box }, + Call { name: String, args: Vec }, + Do { body: Vec }, + If { cond: Box, body: Box, else_body: Box }, + Value { value: Value }, + Binary { op: String, left: Box, right: Box }, +} + +pub fn ast_to_ir(ast: &[Expr]) -> Vec { + let mut ir = Vec::new(); + for expr in ast { + ir.push(expr_to_ir(expr)); + } + ir +} + +pub fn expr_to_ir(expr: &Expr) -> IR { + match expr { + Expr::Let { name, type_hint, value } => IR::Define { + name: name.clone(), + type_hint: get_typehint(type_hint), + value: Box::new(expr_to_ir(value)), + }, + Expr::Fun { name, type_hint, args, body } => IR::Fun { + name: name.clone(), + return_type_hint: get_typehint(type_hint), + args: args + .iter() + .map(|(name, type_hint)| (name.to_string(), get_typehint(type_hint))) + .collect::>(), + body: Box::new(expr_to_ir(body)), + }, + Expr::Call { name, args } => IR::Call { + name: match &**name { + Expr::Ident(s) => s.clone(), + _ => panic!("Expected ident in call"), + }, + args: args.iter().map(|arg| expr_to_ir(arg)).collect(), + }, + Expr::Do { body } => IR::Do { + body: body + .iter() + .map(|expr| expr_to_ir(expr)) + .collect::>(), + }, + Expr::Binary { op, left, right } => IR::Binary { + op: op.to_string(), + left: Box::new(expr_to_ir(left)), + right: Box::new(expr_to_ir(right)), + }, + Expr::Int(value) => IR::Value { value: Value::Int(*value) }, + Expr::Float(value) => IR::Value { value: Value::Double(*value) }, // TODO: Actually use float + // Expr::Double(value) => IR::Value { value: Value::Double(*value) }, + Expr::Bool(value) => IR::Value { value: Value::Bool(*value) }, + Expr::String(value) => IR::Value { value: Value::String(value.clone()) }, + Expr::Ident(name) => IR::Value { value: Value::Ident(name.clone()) }, + _ => { println!("{:?}", expr); todo!() } + } +} + +fn get_typehint(from: &String) -> TypeHint { + match from.as_str() { + "int" => TypeHint::Int, + "float" => TypeHint::Float, + "double" => TypeHint::Double, + "bool" => TypeHint::Bool, + "string" => TypeHint::String, + _ => panic!("Unsupported type hint: {}", from) + } +} \ No newline at end of file diff --git a/src/middle/mod.rs b/src/middle/mod.rs new file mode 100644 index 0000000..f919949 --- /dev/null +++ b/src/middle/mod.rs @@ -0,0 +1,2 @@ +// The intemediate representation of the AST +pub mod ir; \ No newline at end of file