From 09aacff161e0927acb0f72526f9bb3ee4cb9baae Mon Sep 17 00:00:00 2001 From: mlokr Date: Wed, 31 Jan 2024 20:11:57 +0100 Subject: [PATCH] establishing some syntax --- Cargo.lock | 55 +++++++ hbbytecode/Cargo.toml | 1 + hbbytecode/src/lib.rs | 59 ++++++- hblang/Cargo.toml | 5 + hblang/src/lexer.rs | 149 ++++++++++++++++++ hblang/src/lib.rs | 16 +- hblang/src/parser.rs | 359 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 629 insertions(+), 15 deletions(-) create mode 100644 hblang/src/lexer.rs create mode 100644 hblang/src/parser.rs diff --git a/Cargo.lock b/Cargo.lock index 8cc8505..c1181a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "2.4.1" @@ -167,6 +173,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "getrandom" version = "0.2.10" @@ -197,12 +209,17 @@ dependencies = [ name = "hbbytecode" version = "0.1.0" dependencies = [ + "paste", "with_builtin_macros", ] [[package]] name = "hblang" version = "0.1.0" +dependencies = [ + "hbvm", + "logos", +] [[package]] name = "hbvm" @@ -254,6 +271,38 @@ version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +[[package]] +name = "logos" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn 2.0.38", +] + +[[package]] +name = "logos-derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e" +dependencies = [ + "logos-codegen", +] + [[package]] name = "memchr" version = "2.6.4" @@ -349,6 +398,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "rhai" version = "1.16.2" diff --git a/hbbytecode/Cargo.toml b/hbbytecode/Cargo.toml index ecfb1db..4c19884 100644 --- a/hbbytecode/Cargo.toml +++ b/hbbytecode/Cargo.toml @@ -4,4 +4,5 @@ version = "0.1.0" edition = "2018" [dependencies] +paste = "1.0.14" with_builtin_macros = "0.0.3" diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index 806e3f9..6a91fbc 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -23,6 +23,21 @@ macro_rules! define_items { #[repr(packed)] pub struct $name($(pub $item),*); unsafe impl BytecodeItem for $name {} + + impl Encodable for $name { + fn encode(self, buffer: &mut impl Buffer) { + let array = unsafe { + core::mem::transmute::()]>(self) + }; + for byte in array { + unsafe { buffer.write(byte) }; + } + } + + fn encode_len(self) -> usize { + core::mem::size_of::() + } + } )* }; } @@ -85,13 +100,55 @@ unsafe impl BytecodeItem for u8 {} } } +pub trait Buffer { + fn reserve(&mut self, bytes: usize); + /// # Safety + /// Reserve needs to be called before this function, and only reserved amount can be written. + unsafe fn write(&mut self, byte: u8); +} + +pub trait Encodable { + fn encode(self, buffer: &mut impl Buffer); + fn encode_len(self) -> usize; +} + macro_rules! gen_opcodes { - ($($opcode:expr, $mnemonic:ident, $_ty:ident, $doc:literal;)*) => { + ($($opcode:expr, $mnemonic:ident, $ty:ident, $doc:literal;)*) => { pub mod opcode { $( #[doc = $doc] pub const $mnemonic: u8 = $opcode; )* + + paste::paste! { + #[derive(Clone, Copy, Debug)] + pub enum Op { $( + [< $mnemonic:lower:camel >](super::[]), + )* } + + impl crate::Encodable for Op { + fn encode(self, buffer: &mut impl crate::Buffer) { + match self { + $( + Self::[< $mnemonic:lower:camel >](op) => { + unsafe { buffer.write($opcode) }; + op.encode(buffer); + } + )* + } + } + + fn encode_len(self) -> usize { + match self { + $( + Self::[< $mnemonic:lower:camel >](op) => { + 1 + crate::Encodable::encode_len(op) + } + )* + } + } + } + } } }; } diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index ea320ae..42880ab 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -6,3 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +#hbbytecode = { version = "0.1.0", path = "../hbbytecode" } +logos = "0.13.0" + +[dev-dependencies] +hbvm = { path = "../hbvm", features = ["nightly"] } diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs new file mode 100644 index 0000000..6af6ce7 --- /dev/null +++ b/hblang/src/lexer.rs @@ -0,0 +1,149 @@ +use logos::Logos; + +macro_rules! gen_token { + ($name:ident { + keywords: { + $($keyword:ident = $lit:literal,)* + }, + operators: $op_name:ident { + $($prec:literal: {$( + $op:ident = $op_lit:literal, + )*},)* + }, + types: $ty_type:ident { + $($ty:ident = $ty_lit:literal,)* + }, + regexes: { + $($regex:ident = $regex_lit:literal,)* + }, + }) => { + #[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)] + #[logos(skip "[ \t\n]+")] + pub enum $name { + $(#[token($lit)] $keyword,)* + $($(#[token($op_lit, |_| $op_name::$op)])*)* + Op($op_name), + $(#[token($ty_lit, |_| $ty_type::$ty)])* + Ty($ty_type), + $(#[regex($regex_lit)] $regex,)* + } + + #[derive(Debug, Clone, PartialEq, Eq, Copy)] + pub enum $op_name { + $($($op,)*)* + } + + #[derive(Debug, Clone, PartialEq, Eq, Copy)] + pub enum $ty_type { + $($ty,)* + } + + impl $op_name { + pub fn prec(&self) -> u8 { + match self { + $($($op_name::$op => $prec,)*)* + } + } + } + }; +} + +gen_token! { + TokenKind { + keywords: { + Fn = "fn", + Let = "let", + If = "if", + Else = "else", + For = "for", + Return = "return", + Break = "break", + Continue = "continue", + Struct = "struct", + + True = "true", + False = "false", + + LBrace = "{", + RBrace = "}", + LParen = "(", + RParen = ")", + LBracket = "[", + RBracket = "]", + + Colon = ":", + Semicolon = ";", + Comma = ",", + Dot = ".", + }, + operators: Op { + 14: { + Assign = "=", + AddAssign = "+=", + SubAssign = "-=", + MulAssign = "*=", + DivAssign = "/=", + ModAssign = "%=", + AndAssign = "&=", + OrAssign = "|=", + XorAssign = "^=", + ShlAssign = "<<=", + ShrAssign = ">>=", + }, + 12: { + Or = "||", + }, + 11: { + And = "&&", + }, + 10: { + Bor = "|", + }, + 9: { + Xor = "^", + }, + 8: { + Band = "&", + }, + 7: { + Eq = "==", + Neq = "!=", + }, + 6: { + Lt = "<", + Gt = ">", + Le = "<=", + Ge = ">=", + }, + 5: { + Shl = "<<", + Shr = ">>", + }, + 4: { + Add = "+", + Sub = "-", + }, + 3: { + Mul = "*", + Div = "/", + Mod = "%", + }, + }, + types: Ty { + U8 = "u8", + U16 = "u16", + U32 = "u32", + U64 = "u64", + I8 = "i8", + I16 = "i16", + I32 = "i32", + I64 = "i64", + Bool = "bool", + Void = "void", + }, + regexes: { + Ident = "[a-zA-Z_][a-zA-Z0-9_]*", + Number = "[0-9]+", + }, + } +} diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 7d12d9a..5333bc7 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,14 +1,2 @@ -pub fn add(left: usize, right: usize) -> usize { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +mod lexer; +mod parser; diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs new file mode 100644 index 0000000..4629bc0 --- /dev/null +++ b/hblang/src/parser.rs @@ -0,0 +1,359 @@ +use {core::panic, std::iter}; + +use logos::{Lexer, Logos}; + +use crate::lexer::{Op, TokenKind, Ty}; + +pub enum Item { + Struct(Struct), + Function(Function), +} + +pub enum Type { + Builtin(Ty), + Struct(String), +} + +pub struct Struct { + pub name: String, + pub fields: Vec, +} + +pub struct Field { + pub name: String, + pub ty: Type, +} + +pub struct Function { + pub name: String, + pub args: Vec, + pub ret: Type, + pub body: Vec, +} + +pub struct Arg { + pub name: String, + pub ty: Type, +} + +pub enum Exp { + Literal(Literal), + Variable(String), + Call { + name: Box, + args: Vec, + }, + Index { + base: Box, + index: Box, + }, + Field { + base: Box, + field: String, + }, + Unary { + op: Op, + exp: Box, + }, + Binary { + op: Op, + left: Box, + right: Box, + }, + If { + cond: Box, + then: Box, + else_: Option>, + }, + Block(Vec), + Return(Box), + Break, + Continue, +} + +pub enum Literal { + Int(i64), + Bool(bool), +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Token { + pub kind: TokenKind, + pub span: std::ops::Range, + pub value: String, +} + +struct Parser<'a> { + next_token: Option, + lexer: logos::Lexer<'a, TokenKind>, +} + +impl<'a> Parser<'a> { + pub fn new(input: &'a str) -> Self { + let mut lexer = TokenKind::lexer(input); + let next_token = Self::next_token(&mut lexer); + Self { next_token, lexer } + } + + pub fn next(&mut self) -> Option { + let token = self.next_token.clone(); + self.next_token = Self::next_token(&mut self.lexer); + token + } + + pub fn next_token(lexer: &mut Lexer) -> Option { + lexer.next().map(|r| { + r.map(|e| Token { + kind: e, + span: lexer.span(), + value: lexer.slice().to_owned(), + }) + .unwrap_or_else(|e| { + let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start); + panic!("Lexer error: {}:{}", line, col,) + }) + }) + } + + pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) { + Self::pos_to_line_col_low(self.lexer.source(), pos) + } + + pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) { + let line = source[..pos].lines().count(); + let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0); + (line, col) + } + + pub fn expect(&mut self, kind: TokenKind) -> Token { + let token = self.next().unwrap_or_else(|| panic!("Unexpected EOF")); + if token.kind == kind { + token + } else { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!( + "Expected {:?} at {}:{}, found {:?}", + kind, line, col, token.kind + ) + } + } + + pub fn peek(&self) -> Option<&Token> { + self.next_token.as_ref() + } + + pub fn try_advance(&mut self, kind: TokenKind) -> bool { + if self.peek().is_some_and(|t| t.kind == kind) { + self.next(); + true + } else { + false + } + } + + pub fn parse(&mut self) -> Vec { + iter::from_fn(|| self.parse_item()).collect() + } + + fn parse_item(&mut self) -> Option { + let token = self.next()?; + match token.kind { + TokenKind::Struct => Some(self.parse_struct()), + TokenKind::Fn => Some(self.parse_function()), + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + + fn parse_struct(&mut self) -> Item { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::LBrace); + let fields = iter::from_fn(|| self.parse_field()).collect(); + self.expect(TokenKind::RBrace); + Item::Struct(Struct { name, fields }) + } + + fn parse_field(&mut self) -> Option { + if self.peek()?.kind == TokenKind::RBrace { + return None; + } + + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let ty = self.type_(); + self.try_advance(TokenKind::Comma); + + Some(Field { name, ty }) + } + + fn type_(&mut self) -> Type { + let token = self.next().unwrap(); + match token.kind { + TokenKind::Ty(ty) => Type::Builtin(ty), + TokenKind::Ident => Type::Struct(token.value), + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + + fn parse_function(&mut self) -> Item { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::LParen); + let args = iter::from_fn(|| self.parse_arg()).collect(); + self.expect(TokenKind::RParen); + self.expect(TokenKind::Colon); + let ret = self.type_(); + self.expect(TokenKind::LBrace); + let body = iter::from_fn(|| self.parse_stmt()).collect(); + self.expect(TokenKind::RBrace); + Item::Function(Function { + name, + args, + ret, + body, + }) + } + + fn parse_arg(&mut self) -> Option { + if self.peek()?.kind == TokenKind::RParen { + return None; + } + + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let ty = self.type_(); + self.try_advance(TokenKind::Comma); + + Some(Arg { name, ty }) + } + + fn parse_stmt(&mut self) -> Option { + if self.peek()?.kind == TokenKind::RBrace { + return None; + } + + let expr = self.parse_expr(); + self.expect(TokenKind::Semicolon); + + Some(expr) + } + + fn parse_expr(&mut self) -> Exp { + self.parse_binary_expr(255) + } + + fn parse_binary_expr(&mut self, min_prec: u8) -> Exp { + let mut lhs = self.parse_unit_expr(); + + while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) { + let prec = op.prec(); + if prec <= min_prec { + break; + } + + self.next(); + let rhs = self.parse_binary_expr(prec); + + lhs = Exp::Binary { + op, + left: Box::new(lhs), + right: Box::new(rhs), + }; + } + + lhs + } + + fn parse_unit_expr(&mut self) -> Exp { + let token = self.next().unwrap(); + let mut expr = match token.kind { + TokenKind::True => Exp::Literal(Literal::Bool(true)), + TokenKind::False => Exp::Literal(Literal::Bool(false)), + TokenKind::Ident => Exp::Variable(token.value), + TokenKind::LBrace => { + let body = iter::from_fn(|| self.parse_stmt()).collect(); + self.expect(TokenKind::RBrace); + Exp::Block(body) + } + TokenKind::LParen => { + let expr = self.parse_expr(); + self.expect(TokenKind::RParen); + expr + } + TokenKind::Number => { + let value = token.value.parse().unwrap(); + Exp::Literal(Literal::Int(value)) + } + TokenKind::Fn => todo!(), + TokenKind::Let => todo!(), + TokenKind::If => todo!(), + TokenKind::Else => todo!(), + TokenKind::For => todo!(), + TokenKind::Return => todo!(), + TokenKind::Break => todo!(), + TokenKind::Continue => todo!(), + TokenKind::Struct => todo!(), + TokenKind::RBrace => todo!(), + TokenKind::RParen => todo!(), + TokenKind::LBracket => todo!(), + TokenKind::RBracket => todo!(), + TokenKind::Colon => todo!(), + TokenKind::Semicolon => todo!(), + TokenKind::Comma => todo!(), + TokenKind::Op(_) => todo!(), + TokenKind::Ty(_) => todo!(), + TokenKind::Dot => todo!(), + }; + + loop { + match self.peek().map(|t| t.kind) { + Some(TokenKind::LParen) => { + self.next(); + let args = iter::from_fn(|| self.parse_call_arg()).collect(); + self.expect(TokenKind::RParen); + expr = Exp::Call { + name: Box::new(expr), + args, + }; + } + Some(TokenKind::LBracket) => { + self.next(); + let index = self.parse_expr(); + self.expect(TokenKind::RBracket); + expr = Exp::Index { + base: Box::new(expr), + index: Box::new(index), + }; + } + Some(TokenKind::Dot) => { + self.next(); + let field = self.expect(TokenKind::Ident).value; + expr = Exp::Field { + base: Box::new(expr), + field, + }; + } + _ => break expr, + } + } + } + + pub fn parse_call_arg(&mut self) -> Option { + if self.peek()?.kind == TokenKind::RParen { + return None; + } + + let expr = self.parse_expr(); + self.try_advance(TokenKind::Comma); + + Some(expr) + } +} + +pub fn parse(input: &str) -> Vec { + Parser::new(input).parse() +}