From 6e464be33d214f2802bcfa7f97d9d5f19152b9c3 Mon Sep 17 00:00:00 2001 From: mlokr Date: Thu, 1 Feb 2024 16:11:10 +0100 Subject: [PATCH] adding more parsing with a sanity test --- Cargo.lock | 2 +- hblang/Cargo.toml | 4 +- hblang/src/codegen.rs | 40 ++++++ hblang/src/lexer.rs | 1 + hblang/src/lib.rs | 2 + hblang/src/parser.rs | 317 +++++++++++++++++++++++++++++++++--------- hblang/src/typechk.rs | 20 +++ 7 files changed, 320 insertions(+), 66 deletions(-) create mode 100644 hblang/src/codegen.rs create mode 100644 hblang/src/typechk.rs diff --git a/Cargo.lock b/Cargo.lock index c1181a46..95a1210c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,7 +217,7 @@ dependencies = [ name = "hblang" version = "0.1.0" dependencies = [ - "hbvm", + "hbbytecode", "logos", ] diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index 42880ab8..0e19b10a 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -6,8 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -#hbbytecode = { version = "0.1.0", path = "../hbbytecode" } +hbbytecode = { version = "0.1.0", path = "../hbbytecode" } logos = "0.13.0" -[dev-dependencies] -hbvm = { path = "../hbvm", features = ["nightly"] } diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs new file mode 100644 index 00000000..42e5b799 --- /dev/null +++ b/hblang/src/codegen.rs @@ -0,0 +1,40 @@ +use crate::parser::Type; + +struct RegAlloc { + pub regs: Box<[Option; 256]>, +} + +struct Variable { + name: String, + location: usize, +} + +enum Symbol { + Type(String, Type), + Func(String, Vec, Type), +} + +struct Slot { + ty: Type, + value: Value, +} + +enum Value { + Reg(u8), + Stack(i32), + Imm(u64), +} + +type Label = usize; + +pub struct Generator { + regs: RegAlloc, + symbols: Vec, + variables: Vec, + slots: Vec, + relocations: Vec<(Label, usize)>, +} + +impl Generator { + pub fn gen(); +} diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index 6af6ce78..d04b97c3 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -51,6 +51,7 @@ macro_rules! gen_token { gen_token! { TokenKind { keywords: { + Use = "use", Fn = "fn", Let = "let", If = "if", diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 5333bc75..17e1be45 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,2 +1,4 @@ +mod codegen; mod lexer; mod parser; +mod typechk; diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 4629bc0a..799ef5b6 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -1,29 +1,37 @@ use {core::panic, std::iter}; +use std::array; + use logos::{Lexer, Logos}; use crate::lexer::{Op, TokenKind, Ty}; +#[derive(Clone, Debug)] pub enum Item { + Import(String), Struct(Struct), Function(Function), } +#[derive(Clone, Debug)] pub enum Type { Builtin(Ty), Struct(String), + Pinter(Box), } +#[derive(Clone, Debug)] pub struct Struct { - pub name: String, pub fields: Vec, } +#[derive(Clone, Debug)] pub struct Field { pub name: String, pub ty: Type, } +#[derive(Clone, Debug)] pub struct Function { pub name: String, pub args: Vec, @@ -31,11 +39,19 @@ pub struct Function { pub body: Vec, } +#[derive(Clone, Debug)] pub struct Arg { pub name: String, pub ty: Type, } +#[derive(Clone, Debug)] +pub struct CtorField { + pub name: String, + pub value: Exp, +} + +#[derive(Clone, Debug)] pub enum Exp { Literal(Literal), Variable(String), @@ -43,6 +59,10 @@ pub enum Exp { name: Box, args: Vec, }, + Ctor { + name: Option>, + fields: Vec, + }, Index { base: Box, index: Box, @@ -65,12 +85,24 @@ pub enum Exp { then: Box, else_: Option>, }, + Let { + name: String, + ty: Option, + value: Box, + }, + For { + init: Option>, + cond: Option>, + step: Option>, + block: Box, + }, Block(Vec), - Return(Box), + Return(Option>), Break, Continue, } +#[derive(Clone, Debug)] pub enum Literal { Int(i64), Bool(bool), @@ -110,7 +142,8 @@ impl<'a> Parser<'a> { }) .unwrap_or_else(|e| { let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start); - panic!("Lexer error: {}:{}", line, col,) + println!("Lexer error: {}:{}: {:?}", line, col, e); + std::process::exit(1); }) }) } @@ -126,7 +159,7 @@ impl<'a> Parser<'a> { } pub fn expect(&mut self, kind: TokenKind) -> Token { - let token = self.next().unwrap_or_else(|| panic!("Unexpected EOF")); + let token = self.expect_any(); if token.kind == kind { token } else { @@ -138,6 +171,10 @@ impl<'a> Parser<'a> { } } + pub fn expect_any(&mut self) -> Token { + self.next().unwrap_or_else(|| panic!("Unexpected EOF")) + } + pub fn peek(&self) -> Option<&Token> { self.next_token.as_ref() } @@ -170,22 +207,16 @@ impl<'a> Parser<'a> { fn parse_struct(&mut self) -> Item { let name = self.expect(TokenKind::Ident).value; self.expect(TokenKind::LBrace); - let fields = iter::from_fn(|| self.parse_field()).collect(); - self.expect(TokenKind::RBrace); + let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field); Item::Struct(Struct { name, fields }) } - fn parse_field(&mut self) -> Option { - if self.peek()?.kind == TokenKind::RBrace { - return None; - } - + fn parse_field(&mut self) -> Field { let name = self.expect(TokenKind::Ident).value; self.expect(TokenKind::Colon); let ty = self.type_(); - self.try_advance(TokenKind::Comma); - Some(Field { name, ty }) + Field { name, ty } } fn type_(&mut self) -> Type { @@ -193,6 +224,10 @@ impl<'a> Parser<'a> { match token.kind { TokenKind::Ty(ty) => Type::Builtin(ty), TokenKind::Ident => Type::Struct(token.value), + TokenKind::Op(Op::Band) => { + let ty = self.type_(); + Type::Pinter(Box::new(ty)) + } tkn => { let (line, col) = self.pos_to_line_col(token.span.start); panic!("Unexpected {:?} at {}:{}", tkn, line, col) @@ -203,43 +238,23 @@ impl<'a> Parser<'a> { fn parse_function(&mut self) -> Item { let name = self.expect(TokenKind::Ident).value; self.expect(TokenKind::LParen); - let args = iter::from_fn(|| self.parse_arg()).collect(); - self.expect(TokenKind::RParen); + let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg); self.expect(TokenKind::Colon); let ret = self.type_(); - self.expect(TokenKind::LBrace); - let body = iter::from_fn(|| self.parse_stmt()).collect(); - self.expect(TokenKind::RBrace); Item::Function(Function { name, args, ret, - body, + body: self.parse_block(), }) } - fn parse_arg(&mut self) -> Option { - if self.peek()?.kind == TokenKind::RParen { - return None; - } - + fn parse_arg(&mut self) -> Arg { let name = self.expect(TokenKind::Ident).value; self.expect(TokenKind::Colon); let ty = self.type_(); self.try_advance(TokenKind::Comma); - - Some(Arg { name, ty }) - } - - fn parse_stmt(&mut self) -> Option { - if self.peek()?.kind == TokenKind::RBrace { - return None; - } - - let expr = self.parse_expr(); - self.expect(TokenKind::Semicolon); - - Some(expr) + Arg { name, ty } } fn parse_expr(&mut self) -> Exp { @@ -251,7 +266,7 @@ impl<'a> Parser<'a> { while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) { let prec = op.prec(); - if prec <= min_prec { + if prec > min_prec { break; } @@ -275,9 +290,7 @@ impl<'a> Parser<'a> { TokenKind::False => Exp::Literal(Literal::Bool(false)), TokenKind::Ident => Exp::Variable(token.value), TokenKind::LBrace => { - let body = iter::from_fn(|| self.parse_stmt()).collect(); - self.expect(TokenKind::RBrace); - Exp::Block(body) + Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr)) } TokenKind::LParen => { let expr = self.parse_expr(); @@ -289,11 +302,89 @@ impl<'a> Parser<'a> { Exp::Literal(Literal::Int(value)) } TokenKind::Fn => todo!(), - TokenKind::Let => todo!(), - TokenKind::If => todo!(), + TokenKind::Let => { + let name = self.expect(TokenKind::Ident).value; + let ty = self.try_advance(TokenKind::Colon).then(|| self.type_()); + self.expect(TokenKind::Op(Op::Assign)); + let value = self.parse_expr(); + Exp::Let { + name, + ty, + value: Box::new(value), + } + } + TokenKind::If => { + let cond = self.parse_expr(); + let then = Exp::Block(self.parse_block()); + let else_ = self + .try_advance(TokenKind::Else) + .then(|| { + if self.peek().is_some_and(|t| t.kind == TokenKind::If) { + self.parse_expr() + } else { + Exp::Block(self.parse_block()) + } + }) + .map(Box::new); + Exp::If { + cond: Box::new(cond), + then: Box::new(then), + else_, + } + } TokenKind::Else => todo!(), - TokenKind::For => todo!(), - TokenKind::Return => todo!(), + TokenKind::For => { + let params = + self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr); + let mut exprs = Vec::new(); + while !self.try_advance(TokenKind::RBrace) { + exprs.push(self.parse_expr()); + self.try_advance(TokenKind::Semicolon); + } + let block = Exp::Block(exprs); + let len = params.len(); + let mut exprs = params.into_iter(); + let [init, consd, step] = array::from_fn(|_| exprs.next()); + match len { + 0 => Exp::For { + init: None, + cond: None, + step: None, + block: Box::new(block), + }, + 1 => Exp::For { + init: None, + cond: init.map(Box::new), + step: None, + block: Box::new(block), + }, + 3 => Exp::For { + init: init.map(Box::new), + cond: consd.map(Box::new), + step: step.map(Box::new), + block: Box::new(block), + }, + _ => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col) + } + } + } + TokenKind::Return => { + let value = self + .peek() + .is_some_and(|t| { + !matches!( + t.kind, + TokenKind::Semicolon + | TokenKind::RBrace + | TokenKind::RParen + | TokenKind::Comma + ) + }) + .then(|| Box::new(self.parse_expr())); + Exp::Return(value) + } TokenKind::Break => todo!(), TokenKind::Continue => todo!(), TokenKind::Struct => todo!(), @@ -304,20 +395,37 @@ impl<'a> Parser<'a> { TokenKind::Colon => todo!(), TokenKind::Semicolon => todo!(), TokenKind::Comma => todo!(), - TokenKind::Op(_) => todo!(), + TokenKind::Op(op) => Exp::Unary { + op, + exp: Box::new(self.parse_expr()), + }, TokenKind::Ty(_) => todo!(), - TokenKind::Dot => todo!(), + TokenKind::Dot => { + let token = self.expect_any(); + match token.kind { + TokenKind::LBrace => { + let fields = self.sequence( + TokenKind::Comma, + TokenKind::RBrace, + Self::parse_ctor_field, + ); + Exp::Ctor { name: None, fields } + } + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } }; loop { match self.peek().map(|t| t.kind) { Some(TokenKind::LParen) => { self.next(); - let args = iter::from_fn(|| self.parse_call_arg()).collect(); - self.expect(TokenKind::RParen); expr = Exp::Call { name: Box::new(expr), - args, + args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr), }; } Some(TokenKind::LBracket) => { @@ -331,29 +439,114 @@ impl<'a> Parser<'a> { } Some(TokenKind::Dot) => { self.next(); - let field = self.expect(TokenKind::Ident).value; - expr = Exp::Field { - base: Box::new(expr), - field, - }; + + let token = self.expect_any(); + match token.kind { + TokenKind::Ident => { + expr = Exp::Field { + base: Box::new(expr), + field: token.value, + }; + } + TokenKind::LBrace => { + let fields = self.sequence( + TokenKind::Comma, + TokenKind::RBrace, + Self::parse_ctor_field, + ); + expr = Exp::Ctor { + name: Some(Box::new(expr)), + fields, + }; + } + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } } _ => break expr, } } } - pub fn parse_call_arg(&mut self) -> Option { - if self.peek()?.kind == TokenKind::RParen { - return None; + pub fn parse_ctor_field(&mut self) -> CtorField { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let value = self.parse_expr(); + CtorField { name, value } + } + + pub fn parse_block(&mut self) -> Vec { + self.expect(TokenKind::LBrace); + let mut exprs = Vec::new(); + while !self.try_advance(TokenKind::RBrace) { + exprs.push(self.parse_expr()); + self.try_advance(TokenKind::Semicolon); } + exprs + } - let expr = self.parse_expr(); - self.try_advance(TokenKind::Comma); - - Some(expr) + pub fn sequence( + &mut self, + sep: TokenKind, + term: TokenKind, + mut parser: impl FnMut(&mut Self) -> T, + ) -> Vec { + let mut items = Vec::new(); + while !self.try_advance(term) { + items.push(parser(self)); + if self.try_advance(term) { + break; + } + self.expect(sep); + } + items } } pub fn parse(input: &str) -> Vec { Parser::new(input).parse() } + +#[cfg(test)] +mod test { + #[test] + fn sanity() { + let input = r#" + struct Foo { + x: i32, + y: i32, + } + + fn main(): void { + let foo = Foo.{ x: 1, y: 2 }; + if foo.x > 0 { + return foo.x; + } else { + return foo.y; + } + for i < 10 { + i = i + 1; + } + for let i = 0; i < 10; i = i + 1 { + i = i + 1; + } + i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7; + fomething(); + pahum(&foo); + lupa(*soo); + return foo.x + foo.y; + } + + fn lupa(x: i32): i32 { + return x; + } + + fn pahum(x: &Foo): void { + return; + } + "#; + let _ = super::parse(input); + } +} diff --git a/hblang/src/typechk.rs b/hblang/src/typechk.rs new file mode 100644 index 00000000..d2f95bc8 --- /dev/null +++ b/hblang/src/typechk.rs @@ -0,0 +1,20 @@ +use crate::lexer::Ty; + +#[derive(Clone, Debug)] +pub enum Type { + Builtin(Ty), + Struct(StructType), + Pointer(Box), +} + +#[derive(Clone, Debug)] +pub struct StructType { + pub name: String, + pub fields: Vec, +} + +#[derive(Clone, Debug)] +pub struct Field { + pub name: String, + pub ty: Type, +}