adding more parsing with a sanity test

This commit is contained in:
mlokr 2024-02-01 16:11:10 +01:00
parent 09aacff161
commit 6e464be33d
7 changed files with 320 additions and 66 deletions

2
Cargo.lock generated
View file

@ -217,7 +217,7 @@ dependencies = [
name = "hblang" name = "hblang"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"hbvm", "hbbytecode",
"logos", "logos",
] ]

View file

@ -6,8 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
#hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
logos = "0.13.0" logos = "0.13.0"
[dev-dependencies]
hbvm = { path = "../hbvm", features = ["nightly"] }

40
hblang/src/codegen.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::parser::Type;
struct RegAlloc {
pub regs: Box<[Option<usize>; 256]>,
}
struct Variable {
name: String,
location: usize,
}
enum Symbol {
Type(String, Type),
Func(String, Vec<Type>, Type),
}
struct Slot {
ty: Type,
value: Value,
}
enum Value {
Reg(u8),
Stack(i32),
Imm(u64),
}
type Label = usize;
pub struct Generator {
regs: RegAlloc,
symbols: Vec<Symbol>,
variables: Vec<Variable>,
slots: Vec<Slot>,
relocations: Vec<(Label, usize)>,
}
impl Generator {
pub fn gen();
}

View file

@ -51,6 +51,7 @@ macro_rules! gen_token {
gen_token! { gen_token! {
TokenKind { TokenKind {
keywords: { keywords: {
Use = "use",
Fn = "fn", Fn = "fn",
Let = "let", Let = "let",
If = "if", If = "if",

View file

@ -1,2 +1,4 @@
mod codegen;
mod lexer; mod lexer;
mod parser; mod parser;
mod typechk;

View file

@ -1,29 +1,37 @@
use {core::panic, std::iter}; use {core::panic, std::iter};
use std::array;
use logos::{Lexer, Logos}; use logos::{Lexer, Logos};
use crate::lexer::{Op, TokenKind, Ty}; use crate::lexer::{Op, TokenKind, Ty};
#[derive(Clone, Debug)]
pub enum Item { pub enum Item {
Import(String),
Struct(Struct), Struct(Struct),
Function(Function), Function(Function),
} }
#[derive(Clone, Debug)]
pub enum Type { pub enum Type {
Builtin(Ty), Builtin(Ty),
Struct(String), Struct(String),
Pinter(Box<Type>),
} }
#[derive(Clone, Debug)]
pub struct Struct { pub struct Struct {
pub name: String,
pub fields: Vec<Field>, pub fields: Vec<Field>,
} }
#[derive(Clone, Debug)]
pub struct Field { pub struct Field {
pub name: String, pub name: String,
pub ty: Type, pub ty: Type,
} }
#[derive(Clone, Debug)]
pub struct Function { pub struct Function {
pub name: String, pub name: String,
pub args: Vec<Arg>, pub args: Vec<Arg>,
@ -31,11 +39,19 @@ pub struct Function {
pub body: Vec<Exp>, pub body: Vec<Exp>,
} }
#[derive(Clone, Debug)]
pub struct Arg { pub struct Arg {
pub name: String, pub name: String,
pub ty: Type, pub ty: Type,
} }
#[derive(Clone, Debug)]
pub struct CtorField {
pub name: String,
pub value: Exp,
}
#[derive(Clone, Debug)]
pub enum Exp { pub enum Exp {
Literal(Literal), Literal(Literal),
Variable(String), Variable(String),
@ -43,6 +59,10 @@ pub enum Exp {
name: Box<Exp>, name: Box<Exp>,
args: Vec<Exp>, args: Vec<Exp>,
}, },
Ctor {
name: Option<Box<Exp>>,
fields: Vec<CtorField>,
},
Index { Index {
base: Box<Exp>, base: Box<Exp>,
index: Box<Exp>, index: Box<Exp>,
@ -65,12 +85,24 @@ pub enum Exp {
then: Box<Exp>, then: Box<Exp>,
else_: Option<Box<Exp>>, else_: Option<Box<Exp>>,
}, },
Let {
name: String,
ty: Option<Type>,
value: Box<Exp>,
},
For {
init: Option<Box<Exp>>,
cond: Option<Box<Exp>>,
step: Option<Box<Exp>>,
block: Box<Exp>,
},
Block(Vec<Exp>), Block(Vec<Exp>),
Return(Box<Exp>), Return(Option<Box<Exp>>),
Break, Break,
Continue, Continue,
} }
#[derive(Clone, Debug)]
pub enum Literal { pub enum Literal {
Int(i64), Int(i64),
Bool(bool), Bool(bool),
@ -110,7 +142,8 @@ impl<'a> Parser<'a> {
}) })
.unwrap_or_else(|e| { .unwrap_or_else(|e| {
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start); let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
panic!("Lexer error: {}:{}", line, col,) println!("Lexer error: {}:{}: {:?}", line, col, e);
std::process::exit(1);
}) })
}) })
} }
@ -126,7 +159,7 @@ impl<'a> Parser<'a> {
} }
pub fn expect(&mut self, kind: TokenKind) -> Token { pub fn expect(&mut self, kind: TokenKind) -> Token {
let token = self.next().unwrap_or_else(|| panic!("Unexpected EOF")); let token = self.expect_any();
if token.kind == kind { if token.kind == kind {
token token
} else { } else {
@ -138,6 +171,10 @@ impl<'a> Parser<'a> {
} }
} }
pub fn expect_any(&mut self) -> Token {
self.next().unwrap_or_else(|| panic!("Unexpected EOF"))
}
pub fn peek(&self) -> Option<&Token> { pub fn peek(&self) -> Option<&Token> {
self.next_token.as_ref() self.next_token.as_ref()
} }
@ -170,22 +207,16 @@ impl<'a> Parser<'a> {
fn parse_struct(&mut self) -> Item { fn parse_struct(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value; let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LBrace); self.expect(TokenKind::LBrace);
let fields = iter::from_fn(|| self.parse_field()).collect(); let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field);
self.expect(TokenKind::RBrace);
Item::Struct(Struct { name, fields }) Item::Struct(Struct { name, fields })
} }
fn parse_field(&mut self) -> Option<Field> { fn parse_field(&mut self) -> Field {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
let name = self.expect(TokenKind::Ident).value; let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon); self.expect(TokenKind::Colon);
let ty = self.type_(); let ty = self.type_();
self.try_advance(TokenKind::Comma);
Some(Field { name, ty }) Field { name, ty }
} }
fn type_(&mut self) -> Type { fn type_(&mut self) -> Type {
@ -193,6 +224,10 @@ impl<'a> Parser<'a> {
match token.kind { match token.kind {
TokenKind::Ty(ty) => Type::Builtin(ty), TokenKind::Ty(ty) => Type::Builtin(ty),
TokenKind::Ident => Type::Struct(token.value), TokenKind::Ident => Type::Struct(token.value),
TokenKind::Op(Op::Band) => {
let ty = self.type_();
Type::Pinter(Box::new(ty))
}
tkn => { tkn => {
let (line, col) = self.pos_to_line_col(token.span.start); let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col) panic!("Unexpected {:?} at {}:{}", tkn, line, col)
@ -203,43 +238,23 @@ impl<'a> Parser<'a> {
fn parse_function(&mut self) -> Item { fn parse_function(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value; let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LParen); self.expect(TokenKind::LParen);
let args = iter::from_fn(|| self.parse_arg()).collect(); let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg);
self.expect(TokenKind::RParen);
self.expect(TokenKind::Colon); self.expect(TokenKind::Colon);
let ret = self.type_(); let ret = self.type_();
self.expect(TokenKind::LBrace);
let body = iter::from_fn(|| self.parse_stmt()).collect();
self.expect(TokenKind::RBrace);
Item::Function(Function { Item::Function(Function {
name, name,
args, args,
ret, ret,
body, body: self.parse_block(),
}) })
} }
fn parse_arg(&mut self) -> Option<Arg> { fn parse_arg(&mut self) -> Arg {
if self.peek()?.kind == TokenKind::RParen {
return None;
}
let name = self.expect(TokenKind::Ident).value; let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon); self.expect(TokenKind::Colon);
let ty = self.type_(); let ty = self.type_();
self.try_advance(TokenKind::Comma); self.try_advance(TokenKind::Comma);
Arg { name, ty }
Some(Arg { name, ty })
}
fn parse_stmt(&mut self) -> Option<Exp> {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
let expr = self.parse_expr();
self.expect(TokenKind::Semicolon);
Some(expr)
} }
fn parse_expr(&mut self) -> Exp { fn parse_expr(&mut self) -> Exp {
@ -251,7 +266,7 @@ impl<'a> Parser<'a> {
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) { while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
let prec = op.prec(); let prec = op.prec();
if prec <= min_prec { if prec > min_prec {
break; break;
} }
@ -275,9 +290,7 @@ impl<'a> Parser<'a> {
TokenKind::False => Exp::Literal(Literal::Bool(false)), TokenKind::False => Exp::Literal(Literal::Bool(false)),
TokenKind::Ident => Exp::Variable(token.value), TokenKind::Ident => Exp::Variable(token.value),
TokenKind::LBrace => { TokenKind::LBrace => {
let body = iter::from_fn(|| self.parse_stmt()).collect(); Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr))
self.expect(TokenKind::RBrace);
Exp::Block(body)
} }
TokenKind::LParen => { TokenKind::LParen => {
let expr = self.parse_expr(); let expr = self.parse_expr();
@ -289,11 +302,89 @@ impl<'a> Parser<'a> {
Exp::Literal(Literal::Int(value)) Exp::Literal(Literal::Int(value))
} }
TokenKind::Fn => todo!(), TokenKind::Fn => todo!(),
TokenKind::Let => todo!(), TokenKind::Let => {
TokenKind::If => todo!(), let name = self.expect(TokenKind::Ident).value;
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
self.expect(TokenKind::Op(Op::Assign));
let value = self.parse_expr();
Exp::Let {
name,
ty,
value: Box::new(value),
}
}
TokenKind::If => {
let cond = self.parse_expr();
let then = Exp::Block(self.parse_block());
let else_ = self
.try_advance(TokenKind::Else)
.then(|| {
if self.peek().is_some_and(|t| t.kind == TokenKind::If) {
self.parse_expr()
} else {
Exp::Block(self.parse_block())
}
})
.map(Box::new);
Exp::If {
cond: Box::new(cond),
then: Box::new(then),
else_,
}
}
TokenKind::Else => todo!(), TokenKind::Else => todo!(),
TokenKind::For => todo!(), TokenKind::For => {
TokenKind::Return => todo!(), let params =
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
let block = Exp::Block(exprs);
let len = params.len();
let mut exprs = params.into_iter();
let [init, consd, step] = array::from_fn(|_| exprs.next());
match len {
0 => Exp::For {
init: None,
cond: None,
step: None,
block: Box::new(block),
},
1 => Exp::For {
init: None,
cond: init.map(Box::new),
step: None,
block: Box::new(block),
},
3 => Exp::For {
init: init.map(Box::new),
cond: consd.map(Box::new),
step: step.map(Box::new),
block: Box::new(block),
},
_ => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col)
}
}
}
TokenKind::Return => {
let value = self
.peek()
.is_some_and(|t| {
!matches!(
t.kind,
TokenKind::Semicolon
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::Comma
)
})
.then(|| Box::new(self.parse_expr()));
Exp::Return(value)
}
TokenKind::Break => todo!(), TokenKind::Break => todo!(),
TokenKind::Continue => todo!(), TokenKind::Continue => todo!(),
TokenKind::Struct => todo!(), TokenKind::Struct => todo!(),
@ -304,20 +395,37 @@ impl<'a> Parser<'a> {
TokenKind::Colon => todo!(), TokenKind::Colon => todo!(),
TokenKind::Semicolon => todo!(), TokenKind::Semicolon => todo!(),
TokenKind::Comma => todo!(), TokenKind::Comma => todo!(),
TokenKind::Op(_) => todo!(), TokenKind::Op(op) => Exp::Unary {
op,
exp: Box::new(self.parse_expr()),
},
TokenKind::Ty(_) => todo!(), TokenKind::Ty(_) => todo!(),
TokenKind::Dot => todo!(), TokenKind::Dot => {
let token = self.expect_any();
match token.kind {
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
Exp::Ctor { name: None, fields }
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
}; };
loop { loop {
match self.peek().map(|t| t.kind) { match self.peek().map(|t| t.kind) {
Some(TokenKind::LParen) => { Some(TokenKind::LParen) => {
self.next(); self.next();
let args = iter::from_fn(|| self.parse_call_arg()).collect();
self.expect(TokenKind::RParen);
expr = Exp::Call { expr = Exp::Call {
name: Box::new(expr), name: Box::new(expr),
args, args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr),
}; };
} }
Some(TokenKind::LBracket) => { Some(TokenKind::LBracket) => {
@ -331,29 +439,114 @@ impl<'a> Parser<'a> {
} }
Some(TokenKind::Dot) => { Some(TokenKind::Dot) => {
self.next(); self.next();
let field = self.expect(TokenKind::Ident).value;
expr = Exp::Field { let token = self.expect_any();
base: Box::new(expr), match token.kind {
field, TokenKind::Ident => {
}; expr = Exp::Field {
base: Box::new(expr),
field: token.value,
};
}
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
expr = Exp::Ctor {
name: Some(Box::new(expr)),
fields,
};
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
} }
_ => break expr, _ => break expr,
} }
} }
} }
pub fn parse_call_arg(&mut self) -> Option<Exp> { pub fn parse_ctor_field(&mut self) -> CtorField {
if self.peek()?.kind == TokenKind::RParen { let name = self.expect(TokenKind::Ident).value;
return None; self.expect(TokenKind::Colon);
let value = self.parse_expr();
CtorField { name, value }
}
pub fn parse_block(&mut self) -> Vec<Exp> {
self.expect(TokenKind::LBrace);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
} }
exprs
}
let expr = self.parse_expr(); pub fn sequence<T>(
self.try_advance(TokenKind::Comma); &mut self,
sep: TokenKind,
Some(expr) term: TokenKind,
mut parser: impl FnMut(&mut Self) -> T,
) -> Vec<T> {
let mut items = Vec::new();
while !self.try_advance(term) {
items.push(parser(self));
if self.try_advance(term) {
break;
}
self.expect(sep);
}
items
} }
} }
pub fn parse(input: &str) -> Vec<Item> { pub fn parse(input: &str) -> Vec<Item> {
Parser::new(input).parse() Parser::new(input).parse()
} }
#[cfg(test)]
mod test {
#[test]
fn sanity() {
let input = r#"
struct Foo {
x: i32,
y: i32,
}
fn main(): void {
let foo = Foo.{ x: 1, y: 2 };
if foo.x > 0 {
return foo.x;
} else {
return foo.y;
}
for i < 10 {
i = i + 1;
}
for let i = 0; i < 10; i = i + 1 {
i = i + 1;
}
i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7;
fomething();
pahum(&foo);
lupa(*soo);
return foo.x + foo.y;
}
fn lupa(x: i32): i32 {
return x;
}
fn pahum(x: &Foo): void {
return;
}
"#;
let _ = super::parse(input);
}
}

20
hblang/src/typechk.rs Normal file
View file

@ -0,0 +1,20 @@
use crate::lexer::Ty;
#[derive(Clone, Debug)]
pub enum Type {
Builtin(Ty),
Struct(StructType),
Pointer(Box<Type>),
}
#[derive(Clone, Debug)]
pub struct StructType {
pub name: String,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug)]
pub struct Field {
pub name: String,
pub ty: Type,
}