adding more parsing with a sanity test

This commit is contained in:
mlokr 2024-02-01 16:11:10 +01:00
parent 09aacff161
commit 6e464be33d
7 changed files with 320 additions and 66 deletions

2
Cargo.lock generated
View file

@ -217,7 +217,7 @@ dependencies = [
name = "hblang"
version = "0.1.0"
dependencies = [
"hbvm",
"hbbytecode",
"logos",
]

View file

@ -6,8 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
#hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
logos = "0.13.0"
[dev-dependencies]
hbvm = { path = "../hbvm", features = ["nightly"] }

40
hblang/src/codegen.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::parser::Type;
struct RegAlloc {
pub regs: Box<[Option<usize>; 256]>,
}
struct Variable {
name: String,
location: usize,
}
enum Symbol {
Type(String, Type),
Func(String, Vec<Type>, Type),
}
struct Slot {
ty: Type,
value: Value,
}
enum Value {
Reg(u8),
Stack(i32),
Imm(u64),
}
type Label = usize;
pub struct Generator {
regs: RegAlloc,
symbols: Vec<Symbol>,
variables: Vec<Variable>,
slots: Vec<Slot>,
relocations: Vec<(Label, usize)>,
}
impl Generator {
pub fn gen();
}

View file

@ -51,6 +51,7 @@ macro_rules! gen_token {
gen_token! {
TokenKind {
keywords: {
Use = "use",
Fn = "fn",
Let = "let",
If = "if",

View file

@ -1,2 +1,4 @@
mod codegen;
mod lexer;
mod parser;
mod typechk;

View file

@ -1,29 +1,37 @@
use {core::panic, std::iter};
use std::array;
use logos::{Lexer, Logos};
use crate::lexer::{Op, TokenKind, Ty};
#[derive(Clone, Debug)]
pub enum Item {
Import(String),
Struct(Struct),
Function(Function),
}
#[derive(Clone, Debug)]
pub enum Type {
Builtin(Ty),
Struct(String),
Pinter(Box<Type>),
}
#[derive(Clone, Debug)]
pub struct Struct {
pub name: String,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug)]
pub struct Field {
pub name: String,
pub ty: Type,
}
#[derive(Clone, Debug)]
pub struct Function {
pub name: String,
pub args: Vec<Arg>,
@ -31,11 +39,19 @@ pub struct Function {
pub body: Vec<Exp>,
}
#[derive(Clone, Debug)]
pub struct Arg {
pub name: String,
pub ty: Type,
}
#[derive(Clone, Debug)]
pub struct CtorField {
pub name: String,
pub value: Exp,
}
#[derive(Clone, Debug)]
pub enum Exp {
Literal(Literal),
Variable(String),
@ -43,6 +59,10 @@ pub enum Exp {
name: Box<Exp>,
args: Vec<Exp>,
},
Ctor {
name: Option<Box<Exp>>,
fields: Vec<CtorField>,
},
Index {
base: Box<Exp>,
index: Box<Exp>,
@ -65,12 +85,24 @@ pub enum Exp {
then: Box<Exp>,
else_: Option<Box<Exp>>,
},
Let {
name: String,
ty: Option<Type>,
value: Box<Exp>,
},
For {
init: Option<Box<Exp>>,
cond: Option<Box<Exp>>,
step: Option<Box<Exp>>,
block: Box<Exp>,
},
Block(Vec<Exp>),
Return(Box<Exp>),
Return(Option<Box<Exp>>),
Break,
Continue,
}
#[derive(Clone, Debug)]
pub enum Literal {
Int(i64),
Bool(bool),
@ -110,7 +142,8 @@ impl<'a> Parser<'a> {
})
.unwrap_or_else(|e| {
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
panic!("Lexer error: {}:{}", line, col,)
println!("Lexer error: {}:{}: {:?}", line, col, e);
std::process::exit(1);
})
})
}
@ -126,7 +159,7 @@ impl<'a> Parser<'a> {
}
pub fn expect(&mut self, kind: TokenKind) -> Token {
let token = self.next().unwrap_or_else(|| panic!("Unexpected EOF"));
let token = self.expect_any();
if token.kind == kind {
token
} else {
@ -138,6 +171,10 @@ impl<'a> Parser<'a> {
}
}
pub fn expect_any(&mut self) -> Token {
self.next().unwrap_or_else(|| panic!("Unexpected EOF"))
}
pub fn peek(&self) -> Option<&Token> {
self.next_token.as_ref()
}
@ -170,22 +207,16 @@ impl<'a> Parser<'a> {
fn parse_struct(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LBrace);
let fields = iter::from_fn(|| self.parse_field()).collect();
self.expect(TokenKind::RBrace);
let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field);
Item::Struct(Struct { name, fields })
}
fn parse_field(&mut self) -> Option<Field> {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
fn parse_field(&mut self) -> Field {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
Some(Field { name, ty })
Field { name, ty }
}
fn type_(&mut self) -> Type {
@ -193,6 +224,10 @@ impl<'a> Parser<'a> {
match token.kind {
TokenKind::Ty(ty) => Type::Builtin(ty),
TokenKind::Ident => Type::Struct(token.value),
TokenKind::Op(Op::Band) => {
let ty = self.type_();
Type::Pinter(Box::new(ty))
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
@ -203,43 +238,23 @@ impl<'a> Parser<'a> {
fn parse_function(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LParen);
let args = iter::from_fn(|| self.parse_arg()).collect();
self.expect(TokenKind::RParen);
let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg);
self.expect(TokenKind::Colon);
let ret = self.type_();
self.expect(TokenKind::LBrace);
let body = iter::from_fn(|| self.parse_stmt()).collect();
self.expect(TokenKind::RBrace);
Item::Function(Function {
name,
args,
ret,
body,
body: self.parse_block(),
})
}
fn parse_arg(&mut self) -> Option<Arg> {
if self.peek()?.kind == TokenKind::RParen {
return None;
}
fn parse_arg(&mut self) -> Arg {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
Some(Arg { name, ty })
}
fn parse_stmt(&mut self) -> Option<Exp> {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
let expr = self.parse_expr();
self.expect(TokenKind::Semicolon);
Some(expr)
Arg { name, ty }
}
fn parse_expr(&mut self) -> Exp {
@ -251,7 +266,7 @@ impl<'a> Parser<'a> {
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
let prec = op.prec();
if prec <= min_prec {
if prec > min_prec {
break;
}
@ -275,9 +290,7 @@ impl<'a> Parser<'a> {
TokenKind::False => Exp::Literal(Literal::Bool(false)),
TokenKind::Ident => Exp::Variable(token.value),
TokenKind::LBrace => {
let body = iter::from_fn(|| self.parse_stmt()).collect();
self.expect(TokenKind::RBrace);
Exp::Block(body)
Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr))
}
TokenKind::LParen => {
let expr = self.parse_expr();
@ -289,11 +302,89 @@ impl<'a> Parser<'a> {
Exp::Literal(Literal::Int(value))
}
TokenKind::Fn => todo!(),
TokenKind::Let => todo!(),
TokenKind::If => todo!(),
TokenKind::Let => {
let name = self.expect(TokenKind::Ident).value;
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
self.expect(TokenKind::Op(Op::Assign));
let value = self.parse_expr();
Exp::Let {
name,
ty,
value: Box::new(value),
}
}
TokenKind::If => {
let cond = self.parse_expr();
let then = Exp::Block(self.parse_block());
let else_ = self
.try_advance(TokenKind::Else)
.then(|| {
if self.peek().is_some_and(|t| t.kind == TokenKind::If) {
self.parse_expr()
} else {
Exp::Block(self.parse_block())
}
})
.map(Box::new);
Exp::If {
cond: Box::new(cond),
then: Box::new(then),
else_,
}
}
TokenKind::Else => todo!(),
TokenKind::For => todo!(),
TokenKind::Return => todo!(),
TokenKind::For => {
let params =
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
let block = Exp::Block(exprs);
let len = params.len();
let mut exprs = params.into_iter();
let [init, consd, step] = array::from_fn(|_| exprs.next());
match len {
0 => Exp::For {
init: None,
cond: None,
step: None,
block: Box::new(block),
},
1 => Exp::For {
init: None,
cond: init.map(Box::new),
step: None,
block: Box::new(block),
},
3 => Exp::For {
init: init.map(Box::new),
cond: consd.map(Box::new),
step: step.map(Box::new),
block: Box::new(block),
},
_ => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col)
}
}
}
TokenKind::Return => {
let value = self
.peek()
.is_some_and(|t| {
!matches!(
t.kind,
TokenKind::Semicolon
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::Comma
)
})
.then(|| Box::new(self.parse_expr()));
Exp::Return(value)
}
TokenKind::Break => todo!(),
TokenKind::Continue => todo!(),
TokenKind::Struct => todo!(),
@ -304,20 +395,37 @@ impl<'a> Parser<'a> {
TokenKind::Colon => todo!(),
TokenKind::Semicolon => todo!(),
TokenKind::Comma => todo!(),
TokenKind::Op(_) => todo!(),
TokenKind::Op(op) => Exp::Unary {
op,
exp: Box::new(self.parse_expr()),
},
TokenKind::Ty(_) => todo!(),
TokenKind::Dot => todo!(),
TokenKind::Dot => {
let token = self.expect_any();
match token.kind {
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
Exp::Ctor { name: None, fields }
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
};
loop {
match self.peek().map(|t| t.kind) {
Some(TokenKind::LParen) => {
self.next();
let args = iter::from_fn(|| self.parse_call_arg()).collect();
self.expect(TokenKind::RParen);
expr = Exp::Call {
name: Box::new(expr),
args,
args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr),
};
}
Some(TokenKind::LBracket) => {
@ -331,29 +439,114 @@ impl<'a> Parser<'a> {
}
Some(TokenKind::Dot) => {
self.next();
let field = self.expect(TokenKind::Ident).value;
expr = Exp::Field {
base: Box::new(expr),
field,
};
let token = self.expect_any();
match token.kind {
TokenKind::Ident => {
expr = Exp::Field {
base: Box::new(expr),
field: token.value,
};
}
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
expr = Exp::Ctor {
name: Some(Box::new(expr)),
fields,
};
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
_ => break expr,
}
}
}
pub fn parse_call_arg(&mut self) -> Option<Exp> {
if self.peek()?.kind == TokenKind::RParen {
return None;
pub fn parse_ctor_field(&mut self) -> CtorField {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let value = self.parse_expr();
CtorField { name, value }
}
pub fn parse_block(&mut self) -> Vec<Exp> {
self.expect(TokenKind::LBrace);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
exprs
}
let expr = self.parse_expr();
self.try_advance(TokenKind::Comma);
Some(expr)
pub fn sequence<T>(
&mut self,
sep: TokenKind,
term: TokenKind,
mut parser: impl FnMut(&mut Self) -> T,
) -> Vec<T> {
let mut items = Vec::new();
while !self.try_advance(term) {
items.push(parser(self));
if self.try_advance(term) {
break;
}
self.expect(sep);
}
items
}
}
pub fn parse(input: &str) -> Vec<Item> {
Parser::new(input).parse()
}
#[cfg(test)]
mod test {
#[test]
fn sanity() {
let input = r#"
struct Foo {
x: i32,
y: i32,
}
fn main(): void {
let foo = Foo.{ x: 1, y: 2 };
if foo.x > 0 {
return foo.x;
} else {
return foo.y;
}
for i < 10 {
i = i + 1;
}
for let i = 0; i < 10; i = i + 1 {
i = i + 1;
}
i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7;
fomething();
pahum(&foo);
lupa(*soo);
return foo.x + foo.y;
}
fn lupa(x: i32): i32 {
return x;
}
fn pahum(x: &Foo): void {
return;
}
"#;
let _ = super::parse(input);
}
}

20
hblang/src/typechk.rs Normal file
View file

@ -0,0 +1,20 @@
use crate::lexer::Ty;
#[derive(Clone, Debug)]
pub enum Type {
Builtin(Ty),
Struct(StructType),
Pointer(Box<Type>),
}
#[derive(Clone, Debug)]
pub struct StructType {
pub name: String,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug)]
pub struct Field {
pub name: String,
pub ty: Type,
}