holey-bytes/hblang/src/parser.rs

561 lines
16 KiB
Rust
Raw Normal View History

2024-01-31 13:11:57 -06:00
use {core::panic, std::iter};
2024-02-01 09:11:10 -06:00
use std::array;
2024-01-31 13:11:57 -06:00
use logos::{Lexer, Logos};
use crate::lexer::{Op, TokenKind, Ty};
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub enum Item {
2024-02-01 09:11:10 -06:00
Import(String),
2024-01-31 13:11:57 -06:00
Struct(Struct),
Function(Function),
}
2024-02-03 14:43:30 -06:00
#[derive(Clone, Debug, PartialEq, Eq)]
2024-01-31 13:11:57 -06:00
pub enum Type {
Builtin(Ty),
Struct(String),
2024-02-01 09:11:10 -06:00
Pinter(Box<Type>),
2024-01-31 13:11:57 -06:00
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub struct Struct {
2024-02-03 14:43:30 -06:00
pub name: String,
2024-01-31 13:11:57 -06:00
pub fields: Vec<Field>,
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub struct Field {
pub name: String,
pub ty: Type,
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub struct Function {
pub name: String,
pub args: Vec<Arg>,
pub ret: Type,
pub body: Vec<Exp>,
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub struct Arg {
pub name: String,
pub ty: Type,
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
pub struct CtorField {
pub name: String,
pub value: Exp,
}
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub enum Exp {
Literal(Literal),
Variable(String),
Call {
2024-02-04 03:34:16 -06:00
name: String,
2024-01-31 13:11:57 -06:00
args: Vec<Exp>,
},
2024-02-01 09:11:10 -06:00
Ctor {
name: Option<Box<Exp>>,
fields: Vec<CtorField>,
},
2024-01-31 13:11:57 -06:00
Index {
base: Box<Exp>,
index: Box<Exp>,
},
Field {
base: Box<Exp>,
field: String,
},
Unary {
op: Op,
exp: Box<Exp>,
},
Binary {
op: Op,
left: Box<Exp>,
right: Box<Exp>,
},
If {
cond: Box<Exp>,
then: Box<Exp>,
else_: Option<Box<Exp>>,
},
2024-02-01 09:11:10 -06:00
Let {
name: String,
ty: Option<Type>,
value: Box<Exp>,
},
For {
init: Option<Box<Exp>>,
cond: Option<Box<Exp>>,
step: Option<Box<Exp>>,
block: Box<Exp>,
},
2024-01-31 13:11:57 -06:00
Block(Vec<Exp>),
2024-02-01 09:11:10 -06:00
Return(Option<Box<Exp>>),
2024-01-31 13:11:57 -06:00
Break,
Continue,
}
2024-02-01 09:11:10 -06:00
#[derive(Clone, Debug)]
2024-01-31 13:11:57 -06:00
pub enum Literal {
2024-02-03 14:43:30 -06:00
Int(u64),
2024-01-31 13:11:57 -06:00
Bool(bool),
}
#[derive(Debug, PartialEq, Clone)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
pub value: String,
}
struct Parser<'a> {
next_token: Option<Token>,
lexer: logos::Lexer<'a, TokenKind>,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = TokenKind::lexer(input);
let next_token = Self::next_token(&mut lexer);
Self { next_token, lexer }
}
pub fn next(&mut self) -> Option<Token> {
let token = self.next_token.clone();
self.next_token = Self::next_token(&mut self.lexer);
token
}
pub fn next_token(lexer: &mut Lexer<TokenKind>) -> Option<Token> {
lexer.next().map(|r| {
r.map(|e| Token {
kind: e,
span: lexer.span(),
value: lexer.slice().to_owned(),
})
.unwrap_or_else(|e| {
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
2024-02-01 09:11:10 -06:00
println!("Lexer error: {}:{}: {:?}", line, col, e);
std::process::exit(1);
2024-01-31 13:11:57 -06:00
})
})
}
pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) {
Self::pos_to_line_col_low(self.lexer.source(), pos)
}
pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) {
let line = source[..pos].lines().count();
let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0);
(line, col)
}
pub fn expect(&mut self, kind: TokenKind) -> Token {
2024-02-01 09:11:10 -06:00
let token = self.expect_any();
2024-01-31 13:11:57 -06:00
if token.kind == kind {
token
} else {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!(
"Expected {:?} at {}:{}, found {:?}",
kind, line, col, token.kind
)
}
}
2024-02-01 09:11:10 -06:00
pub fn expect_any(&mut self) -> Token {
self.next().unwrap_or_else(|| panic!("Unexpected EOF"))
}
2024-01-31 13:11:57 -06:00
pub fn peek(&self) -> Option<&Token> {
self.next_token.as_ref()
}
pub fn try_advance(&mut self, kind: TokenKind) -> bool {
if self.peek().is_some_and(|t| t.kind == kind) {
self.next();
true
} else {
false
}
}
pub fn parse(&mut self) -> Vec<Item> {
iter::from_fn(|| self.parse_item()).collect()
}
fn parse_item(&mut self) -> Option<Item> {
let token = self.next()?;
match token.kind {
TokenKind::Struct => Some(self.parse_struct()),
TokenKind::Fn => Some(self.parse_function()),
2024-02-03 14:43:30 -06:00
TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)),
2024-01-31 13:11:57 -06:00
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_struct(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LBrace);
2024-02-01 09:11:10 -06:00
let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field);
2024-01-31 13:11:57 -06:00
Item::Struct(Struct { name, fields })
}
2024-02-01 09:11:10 -06:00
fn parse_field(&mut self) -> Field {
2024-01-31 13:11:57 -06:00
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
2024-02-01 09:11:10 -06:00
Field { name, ty }
2024-01-31 13:11:57 -06:00
}
fn type_(&mut self) -> Type {
let token = self.next().unwrap();
match token.kind {
TokenKind::Ty(ty) => Type::Builtin(ty),
TokenKind::Ident => Type::Struct(token.value),
2024-02-01 09:11:10 -06:00
TokenKind::Op(Op::Band) => {
let ty = self.type_();
Type::Pinter(Box::new(ty))
}
2024-01-31 13:11:57 -06:00
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_function(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LParen);
2024-02-01 09:11:10 -06:00
let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg);
2024-01-31 13:11:57 -06:00
self.expect(TokenKind::Colon);
let ret = self.type_();
Item::Function(Function {
name,
args,
ret,
2024-02-01 09:11:10 -06:00
body: self.parse_block(),
2024-01-31 13:11:57 -06:00
})
}
2024-02-01 09:11:10 -06:00
fn parse_arg(&mut self) -> Arg {
2024-01-31 13:11:57 -06:00
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
2024-02-01 09:11:10 -06:00
Arg { name, ty }
2024-01-31 13:11:57 -06:00
}
fn parse_expr(&mut self) -> Exp {
self.parse_binary_expr(255)
}
fn parse_binary_expr(&mut self, min_prec: u8) -> Exp {
let mut lhs = self.parse_unit_expr();
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
let prec = op.prec();
2024-02-01 09:11:10 -06:00
if prec > min_prec {
2024-01-31 13:11:57 -06:00
break;
}
self.next();
let rhs = self.parse_binary_expr(prec);
lhs = Exp::Binary {
op,
left: Box::new(lhs),
right: Box::new(rhs),
};
}
lhs
}
fn parse_unit_expr(&mut self) -> Exp {
let token = self.next().unwrap();
let mut expr = match token.kind {
TokenKind::True => Exp::Literal(Literal::Bool(true)),
TokenKind::False => Exp::Literal(Literal::Bool(false)),
TokenKind::Ident => Exp::Variable(token.value),
TokenKind::LBrace => {
2024-02-01 09:11:10 -06:00
Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr))
2024-01-31 13:11:57 -06:00
}
TokenKind::LParen => {
let expr = self.parse_expr();
self.expect(TokenKind::RParen);
expr
}
TokenKind::Number => {
let value = token.value.parse().unwrap();
Exp::Literal(Literal::Int(value))
}
2024-02-01 09:11:10 -06:00
TokenKind::Let => {
let name = self.expect(TokenKind::Ident).value;
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
self.expect(TokenKind::Op(Op::Assign));
let value = self.parse_expr();
Exp::Let {
name,
ty,
value: Box::new(value),
}
}
TokenKind::If => {
let cond = self.parse_expr();
let then = Exp::Block(self.parse_block());
let else_ = self
.try_advance(TokenKind::Else)
.then(|| {
if self.peek().is_some_and(|t| t.kind == TokenKind::If) {
self.parse_expr()
} else {
Exp::Block(self.parse_block())
}
})
.map(Box::new);
Exp::If {
cond: Box::new(cond),
then: Box::new(then),
else_,
}
}
TokenKind::For => {
let params =
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
let block = Exp::Block(exprs);
let len = params.len();
let mut exprs = params.into_iter();
let [init, consd, step] = array::from_fn(|_| exprs.next());
match len {
0 => Exp::For {
init: None,
cond: None,
step: None,
block: Box::new(block),
},
1 => Exp::For {
init: None,
cond: init.map(Box::new),
step: None,
block: Box::new(block),
},
3 => Exp::For {
init: init.map(Box::new),
cond: consd.map(Box::new),
step: step.map(Box::new),
block: Box::new(block),
},
_ => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col)
}
}
}
TokenKind::Return => {
let value = self
.peek()
.is_some_and(|t| {
!matches!(
t.kind,
TokenKind::Semicolon
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::Comma
)
})
.then(|| Box::new(self.parse_expr()));
Exp::Return(value)
}
TokenKind::Op(op) => Exp::Unary {
op,
exp: Box::new(self.parse_expr()),
},
TokenKind::Dot => {
let token = self.expect_any();
match token.kind {
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
Exp::Ctor { name: None, fields }
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
2024-02-03 14:43:30 -06:00
TokenKind::Ty(_)
| TokenKind::String
| TokenKind::Use
| TokenKind::Break
| TokenKind::Continue
| TokenKind::Struct
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::LBracket
| TokenKind::RBracket
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::Fn
| TokenKind::Else => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", token.kind, line, col)
}
2024-01-31 13:11:57 -06:00
};
loop {
match self.peek().map(|t| t.kind) {
Some(TokenKind::LParen) => {
self.next();
expr = Exp::Call {
name: Box::new(expr),
2024-02-01 09:11:10 -06:00
args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr),
2024-01-31 13:11:57 -06:00
};
}
Some(TokenKind::LBracket) => {
self.next();
let index = self.parse_expr();
self.expect(TokenKind::RBracket);
expr = Exp::Index {
base: Box::new(expr),
index: Box::new(index),
};
}
Some(TokenKind::Dot) => {
self.next();
2024-02-01 09:11:10 -06:00
let token = self.expect_any();
match token.kind {
TokenKind::Ident => {
expr = Exp::Field {
base: Box::new(expr),
field: token.value,
};
}
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
expr = Exp::Ctor {
name: Some(Box::new(expr)),
fields,
};
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
2024-01-31 13:11:57 -06:00
}
_ => break expr,
}
}
}
2024-02-01 09:11:10 -06:00
pub fn parse_ctor_field(&mut self) -> CtorField {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let value = self.parse_expr();
CtorField { name, value }
}
2024-01-31 13:11:57 -06:00
2024-02-01 09:11:10 -06:00
pub fn parse_block(&mut self) -> Vec<Exp> {
self.expect(TokenKind::LBrace);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
exprs
}
2024-01-31 13:11:57 -06:00
2024-02-01 09:11:10 -06:00
pub fn sequence<T>(
&mut self,
sep: TokenKind,
term: TokenKind,
mut parser: impl FnMut(&mut Self) -> T,
) -> Vec<T> {
let mut items = Vec::new();
while !self.try_advance(term) {
items.push(parser(self));
if self.try_advance(term) {
break;
}
self.expect(sep);
}
items
2024-01-31 13:11:57 -06:00
}
}
pub fn parse(input: &str) -> Vec<Item> {
Parser::new(input).parse()
}
2024-02-01 09:11:10 -06:00
#[cfg(test)]
mod test {
#[test]
fn sanity() {
let input = r#"
struct Foo {
x: i32,
y: i32,
}
fn main(): void {
let foo = Foo.{ x: 1, y: 2 };
if foo.x > 0 {
return foo.x;
} else {
return foo.y;
}
for i < 10 {
i = i + 1;
}
for let i = 0; i < 10; i = i + 1 {
i = i + 1;
}
i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7;
fomething();
pahum(&foo);
lupa(*soo);
return foo.x + foo.y;
}
fn lupa(x: i32): i32 {
return x;
}
fn pahum(x: &Foo): void {
return;
}
"#;
let _ = super::parse(input);
}
}