From 028c58980c192c80213bb2cd7741b81866fdc52f Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Sat, 12 Feb 2022 14:29:33 +0700 Subject: [PATCH] some AST generation (let, ident, etc.) also no more formal commit message because im lazy --- example/hello_world.hyc | 5 +- src/front/mod.rs | 3 +- src/front/model.rs | 125 +++++++++++++++++++++++++++++++++++ src/front/parser.rs | 142 ++++++++++++++++++++++++++++++++++++++++ src/main.rs | 8 ++- 5 files changed, 276 insertions(+), 7 deletions(-) create mode 100644 src/front/parser.rs diff --git a/example/hello_world.hyc b/example/hello_world.hyc index 5e30c0c..98e246c 100644 --- a/example/hello_world.hyc +++ b/example/hello_world.hyc @@ -1,3 +1,2 @@ -let msg :: String = "Hello, World"; -func add2 :: (a: Int, b: Int) -> Int = a + b; -func main :: () = puts (msg); \ No newline at end of file +let foo :: String = "Hello, "; +let bar :: String = "World!"; \ No newline at end of file diff --git a/src/front/mod.rs b/src/front/mod.rs index 5bc78eb..95506e6 100644 --- a/src/front/mod.rs +++ b/src/front/mod.rs @@ -1,4 +1,5 @@ pub mod model; pub mod helper; -pub mod lex; \ No newline at end of file +pub mod lex; +pub mod parser; \ No newline at end of file diff --git a/src/front/model.rs b/src/front/model.rs index c1720e1..3103c23 100644 --- a/src/front/model.rs +++ b/src/front/model.rs @@ -1,3 +1,7 @@ +use std::iter::Enumerate; + +use nom::{InputTake, Needed, InputIter, InputLength}; + #[derive(Clone, Debug, PartialEq)] pub enum Token { Illegal, EndOfFile, @@ -26,4 +30,125 @@ impl<'a> Tokens<'a> { pub fn new(tokens: &'a [Token]) -> Self { Tokens { tokens, start: 0, end: tokens.len(), } } +} + +impl<'a> InputTake for Tokens<'a> { + #[inline] + fn take(&self, count: usize) -> Self { + Tokens { + tokens: &self.tokens[0..count], + start: 0, + end: count, + } + } + + #[inline] + fn take_split(&self, count: usize) -> (Self, Self) { + let (prefix, suffix) = self.tokens.split_at(count); + let first = Tokens { + tokens: prefix, + start: 0, + end: prefix.len(), + }; + let second = Tokens { + tokens: suffix, + start: 0, + end: suffix.len(), + }; + (second, first) + } +} + +impl<'a> InputLength for Tokens<'a> { + #[inline] + fn input_len(&self) -> usize { + self.tokens.len() + } +} + +impl<'a> InputIter for Tokens<'a> { + type Item = &'a Token; + type Iter = Enumerate<::std::slice::Iter<'a, Token>>; + type IterElem = ::std::slice::Iter<'a, Token>; + + #[inline] + fn iter_indices(&self) -> Enumerate<::std::slice::Iter<'a, Token>> { + self.tokens.iter().enumerate() + } + + #[inline] + fn iter_elements(&self) -> ::std::slice::Iter<'a, Token> { + self.tokens.iter() + } + + #[inline] + fn position

(&self, predicate: P) -> Option + where P: Fn(Self::Item) -> bool { + self.tokens.iter().position(predicate) + } + + #[inline] + fn slice_index(&self, count: usize) -> Result { + if self.tokens.len() >= count { Ok(count) } + else { Err(Needed::Unknown) } + } +} + +pub type Program = Vec; + +#[derive(Clone, Debug, PartialEq)] +pub enum Stmt { + Let(Ident, Ident, Expr), + Func(Ident, Vec, Vec), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Expr { + Ident(Ident), Literal(Literal), + Array(Vec), + Prefix(Prefix, Box), + Infix(Infix, Box, Box), + If { + cond: Box, + then: Program, + else_: Option, + }, + Func { + name: Ident, + args: Vec, + body: Program, + }, + Call { + func: Box, + args: Vec, + }, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Literal { + Int(i64), Bool(bool), String(String), +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Ident(pub String); + +#[derive(Clone, Debug, PartialEq)] +pub enum Prefix { + Not, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Infix { + Plus, Minus, Mul, Div, + Eq, NEq, Lt, Gt, Lte, Gte, +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum Precedence { + Lowest, + Equals, + LessGreater, + Sum, + Product, + Call, } \ No newline at end of file diff --git a/src/front/parser.rs b/src/front/parser.rs new file mode 100644 index 0000000..4fc1b72 --- /dev/null +++ b/src/front/parser.rs @@ -0,0 +1,142 @@ +use nom::{ + bytes::complete::take, + combinator::{verify, map}, + Err, + IResult, sequence::{terminated, tuple}, multi::many0, branch::alt, error::{Error, ErrorKind}, +}; + +use super::model::{Token, Tokens, Precedence, Infix, Program, Stmt, Expr, Ident, Literal}; + +macro_rules! tag_token ( + ($func_name:ident, $tag: expr) => ( + fn $func_name(tokens: Tokens) -> IResult { + verify(take(1usize), |t: &Tokens| t.tokens[0] == $tag)(tokens) + } + ) +); + +tag_token!(tag_let, Token::Let); +tag_token!(tag_assign, Token::Assign); +tag_token!(tag_typehint, Token::Typehint); +tag_token!(tag_semicolon, Token::Semicolon); +tag_token!(tag_end_of_file, Token::EndOfFile); + +fn infix_operator(token: &Token) -> (Precedence, Option) { + match *token { + Token::Eq => (Precedence::Equals, Some(Infix::Eq)), + Token::NEq => (Precedence::Equals, Some(Infix::NEq)), + Token::Lt => (Precedence::LessGreater, Some(Infix::Lt)), + Token::Gt => (Precedence::LessGreater, Some(Infix::Gt)), + Token::Lte => (Precedence::LessGreater, Some(Infix::Lte)), + Token::Gte => (Precedence::LessGreater, Some(Infix::Gte)), + Token::Plus => (Precedence::Sum, Some(Infix::Plus)), + Token::Minus => (Precedence::Sum, Some(Infix::Minus)), + Token::Mul => (Precedence::Product, Some(Infix::Mul)), + Token::Div => (Precedence::Product, Some(Infix::Div)), + Token::LParen => (Precedence::Call, None), + _ => (Precedence::Lowest, None), + } +} + +fn parse_literal(input: Tokens) -> IResult { + let (i1, t1) = take(1usize)(input)?; + if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) } + else { + match t1.tokens[0].clone() { + Token::Int(i) => Ok((i1, Literal::Int(i))), + Token::String(s) => Ok((i1, Literal::String(s))), + Token::Bool(b) => Ok((i1, Literal::Bool(b))), + _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), + } + } +} + +fn parse_literal_expr(input: Tokens) -> IResult { + map(parse_literal, Expr::Literal)(input) +} + +fn parse_atom_expr(input: Tokens) -> IResult { + alt(( + parse_literal_expr, + parse_ident_expr, + ))(input) +} + +fn parse_ident(input: Tokens) -> IResult { + let (i1, t1) = take(1usize)(input)?; + if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) } + else { + match t1.tokens[0].clone() { + Token::Identifier(name) => Ok((i1, Ident(name))), + _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), + } + } +} + +fn parse_ident_expr(input: Tokens) -> IResult { + map(parse_ident, Expr::Ident)(input) +} + +fn parse_let(input: Tokens) -> IResult { + map( + tuple(( + tag_let, + parse_ident, + tag_typehint, + parse_ident, + tag_assign, + parse_expr_lowest, + tag_semicolon, + )), + |(_, ident, _, typehint, _, expr, _)| Stmt::Let(ident, typehint, expr), + )(input) +} + +fn parse_expr(input: Tokens, precedence: Precedence, left: Expr) -> IResult { + let (i1, t1) = take(1usize)(input)?; + + if t1.tokens.is_empty() { Ok((i1, left)) } + else { + let p = infix_operator(&t1.tokens[0]); + match p { + (Precedence::Call, _) if precedence < Precedence::Call => { + // let (i2, left2) = parse_call_expr(input, left)?; + // parse_expr(i2, precedence, left2) + todo!() + }, + (ref peek, _) if precedence < *peek => { + // let (i2, left2) = parse_infix_expr(input, left)?; + // parse_expr(i2, precedence, left2) + todo!() + }, + _ => Ok((input, left)), + } + } +} + +fn parse_expr_with(input: Tokens, precedence: Precedence) -> IResult { + let (i1, left) = parse_atom_expr(input)?; + parse_expr(i1, precedence, left) +} + +fn parse_expr_lowest(input: Tokens) -> IResult { + parse_expr_with(input, Precedence::Lowest) +} + +fn parse_stmt(input: Tokens) -> IResult { + alt(( + parse_let, + ))(input) +} + +fn parse_program(input: Tokens) -> IResult { + terminated(many0(parse_stmt), tag_end_of_file)(input) +} + +pub struct Parser; + +impl Parser { + pub fn parse(tokens: Tokens) -> IResult { + parse_program(tokens) + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 2d15824..b88030b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,15 +7,17 @@ pub mod args; use args::{Args, Options}; pub mod front; -use front::lex::Lexer; +use front::{lex::Lexer, parser::Parser, model::Tokens}; fn main() { let args = Args::parse(); match args.options { Options::Compile { input: src, ast: _print_ast } => { let bytes: Vec = fs::read(src).unwrap(); - let tokens = Lexer::lex_tokens(&bytes); - println!("{:?}", tokens); + let (_errs_, tokens) = Lexer::lex_tokens(&bytes).unwrap(); + let tokens = Tokens::new(&tokens); + let (_errs_, ast) = Parser::parse(tokens).unwrap(); + println!("{:#?}", ast); }, } } \ No newline at end of file