establishing some syntax

This commit is contained in:
mlokr 2024-01-31 20:11:57 +01:00
parent 433f2db4d1
commit 09aacff161
7 changed files with 629 additions and 15 deletions

55
Cargo.lock generated
View file

@ -83,6 +83,12 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bitflags"
version = "2.4.1"
@ -167,6 +173,12 @@ dependencies = [
"once_cell",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.10"
@ -197,12 +209,17 @@ dependencies = [
name = "hbbytecode"
version = "0.1.0"
dependencies = [
"paste",
"with_builtin_macros",
]
[[package]]
name = "hblang"
version = "0.1.0"
dependencies = [
"hbvm",
"logos",
]
[[package]]
name = "hbvm"
@ -254,6 +271,38 @@ version = "0.2.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
[[package]]
name = "logos"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn 2.0.38",
]
[[package]]
name = "logos-derive"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e"
dependencies = [
"logos-codegen",
]
[[package]]
name = "memchr"
version = "2.6.4"
@ -349,6 +398,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "rhai"
version = "1.16.2"

View file

@ -4,4 +4,5 @@ version = "0.1.0"
edition = "2018"
[dependencies]
paste = "1.0.14"
with_builtin_macros = "0.0.3"

View file

@ -23,6 +23,21 @@ macro_rules! define_items {
#[repr(packed)]
pub struct $name($(pub $item),*);
unsafe impl BytecodeItem for $name {}
impl Encodable for $name {
fn encode(self, buffer: &mut impl Buffer) {
let array = unsafe {
core::mem::transmute::<Self, [u8; core::mem::size_of::<Self>()]>(self)
};
for byte in array {
unsafe { buffer.write(byte) };
}
}
fn encode_len(self) -> usize {
core::mem::size_of::<Self>()
}
}
)*
};
}
@ -85,13 +100,55 @@ unsafe impl BytecodeItem for u8 {}
}
}
pub trait Buffer {
fn reserve(&mut self, bytes: usize);
/// # Safety
/// Reserve needs to be called before this function, and only reserved amount can be written.
unsafe fn write(&mut self, byte: u8);
}
pub trait Encodable {
fn encode(self, buffer: &mut impl Buffer);
fn encode_len(self) -> usize;
}
macro_rules! gen_opcodes {
($($opcode:expr, $mnemonic:ident, $_ty:ident, $doc:literal;)*) => {
($($opcode:expr, $mnemonic:ident, $ty:ident, $doc:literal;)*) => {
pub mod opcode {
$(
#[doc = $doc]
pub const $mnemonic: u8 = $opcode;
)*
paste::paste! {
#[derive(Clone, Copy, Debug)]
pub enum Op { $(
[< $mnemonic:lower:camel >](super::[<Ops $ty>]),
)* }
impl crate::Encodable for Op {
fn encode(self, buffer: &mut impl crate::Buffer) {
match self {
$(
Self::[< $mnemonic:lower:camel >](op) => {
unsafe { buffer.write($opcode) };
op.encode(buffer);
}
)*
}
}
fn encode_len(self) -> usize {
match self {
$(
Self::[< $mnemonic:lower:camel >](op) => {
1 + crate::Encodable::encode_len(op)
}
)*
}
}
}
}
}
};
}

View file

@ -6,3 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
#hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
logos = "0.13.0"
[dev-dependencies]
hbvm = { path = "../hbvm", features = ["nightly"] }

149
hblang/src/lexer.rs Normal file
View file

@ -0,0 +1,149 @@
use logos::Logos;
macro_rules! gen_token {
($name:ident {
keywords: {
$($keyword:ident = $lit:literal,)*
},
operators: $op_name:ident {
$($prec:literal: {$(
$op:ident = $op_lit:literal,
)*},)*
},
types: $ty_type:ident {
$($ty:ident = $ty_lit:literal,)*
},
regexes: {
$($regex:ident = $regex_lit:literal,)*
},
}) => {
#[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)]
#[logos(skip "[ \t\n]+")]
pub enum $name {
$(#[token($lit)] $keyword,)*
$($(#[token($op_lit, |_| $op_name::$op)])*)*
Op($op_name),
$(#[token($ty_lit, |_| $ty_type::$ty)])*
Ty($ty_type),
$(#[regex($regex_lit)] $regex,)*
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum $op_name {
$($($op,)*)*
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum $ty_type {
$($ty,)*
}
impl $op_name {
pub fn prec(&self) -> u8 {
match self {
$($($op_name::$op => $prec,)*)*
}
}
}
};
}
gen_token! {
TokenKind {
keywords: {
Fn = "fn",
Let = "let",
If = "if",
Else = "else",
For = "for",
Return = "return",
Break = "break",
Continue = "continue",
Struct = "struct",
True = "true",
False = "false",
LBrace = "{",
RBrace = "}",
LParen = "(",
RParen = ")",
LBracket = "[",
RBracket = "]",
Colon = ":",
Semicolon = ";",
Comma = ",",
Dot = ".",
},
operators: Op {
14: {
Assign = "=",
AddAssign = "+=",
SubAssign = "-=",
MulAssign = "*=",
DivAssign = "/=",
ModAssign = "%=",
AndAssign = "&=",
OrAssign = "|=",
XorAssign = "^=",
ShlAssign = "<<=",
ShrAssign = ">>=",
},
12: {
Or = "||",
},
11: {
And = "&&",
},
10: {
Bor = "|",
},
9: {
Xor = "^",
},
8: {
Band = "&",
},
7: {
Eq = "==",
Neq = "!=",
},
6: {
Lt = "<",
Gt = ">",
Le = "<=",
Ge = ">=",
},
5: {
Shl = "<<",
Shr = ">>",
},
4: {
Add = "+",
Sub = "-",
},
3: {
Mul = "*",
Div = "/",
Mod = "%",
},
},
types: Ty {
U8 = "u8",
U16 = "u16",
U32 = "u32",
U64 = "u64",
I8 = "i8",
I16 = "i16",
I32 = "i32",
I64 = "i64",
Bool = "bool",
Void = "void",
},
regexes: {
Ident = "[a-zA-Z_][a-zA-Z0-9_]*",
Number = "[0-9]+",
},
}
}

View file

@ -1,14 +1,2 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
mod lexer;
mod parser;

359
hblang/src/parser.rs Normal file
View file

@ -0,0 +1,359 @@
use {core::panic, std::iter};
use logos::{Lexer, Logos};
use crate::lexer::{Op, TokenKind, Ty};
pub enum Item {
Struct(Struct),
Function(Function),
}
pub enum Type {
Builtin(Ty),
Struct(String),
}
pub struct Struct {
pub name: String,
pub fields: Vec<Field>,
}
pub struct Field {
pub name: String,
pub ty: Type,
}
pub struct Function {
pub name: String,
pub args: Vec<Arg>,
pub ret: Type,
pub body: Vec<Exp>,
}
pub struct Arg {
pub name: String,
pub ty: Type,
}
pub enum Exp {
Literal(Literal),
Variable(String),
Call {
name: Box<Exp>,
args: Vec<Exp>,
},
Index {
base: Box<Exp>,
index: Box<Exp>,
},
Field {
base: Box<Exp>,
field: String,
},
Unary {
op: Op,
exp: Box<Exp>,
},
Binary {
op: Op,
left: Box<Exp>,
right: Box<Exp>,
},
If {
cond: Box<Exp>,
then: Box<Exp>,
else_: Option<Box<Exp>>,
},
Block(Vec<Exp>),
Return(Box<Exp>),
Break,
Continue,
}
pub enum Literal {
Int(i64),
Bool(bool),
}
#[derive(Debug, PartialEq, Clone)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
pub value: String,
}
struct Parser<'a> {
next_token: Option<Token>,
lexer: logos::Lexer<'a, TokenKind>,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = TokenKind::lexer(input);
let next_token = Self::next_token(&mut lexer);
Self { next_token, lexer }
}
pub fn next(&mut self) -> Option<Token> {
let token = self.next_token.clone();
self.next_token = Self::next_token(&mut self.lexer);
token
}
pub fn next_token(lexer: &mut Lexer<TokenKind>) -> Option<Token> {
lexer.next().map(|r| {
r.map(|e| Token {
kind: e,
span: lexer.span(),
value: lexer.slice().to_owned(),
})
.unwrap_or_else(|e| {
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
panic!("Lexer error: {}:{}", line, col,)
})
})
}
pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) {
Self::pos_to_line_col_low(self.lexer.source(), pos)
}
pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) {
let line = source[..pos].lines().count();
let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0);
(line, col)
}
pub fn expect(&mut self, kind: TokenKind) -> Token {
let token = self.next().unwrap_or_else(|| panic!("Unexpected EOF"));
if token.kind == kind {
token
} else {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!(
"Expected {:?} at {}:{}, found {:?}",
kind, line, col, token.kind
)
}
}
pub fn peek(&self) -> Option<&Token> {
self.next_token.as_ref()
}
pub fn try_advance(&mut self, kind: TokenKind) -> bool {
if self.peek().is_some_and(|t| t.kind == kind) {
self.next();
true
} else {
false
}
}
pub fn parse(&mut self) -> Vec<Item> {
iter::from_fn(|| self.parse_item()).collect()
}
fn parse_item(&mut self) -> Option<Item> {
let token = self.next()?;
match token.kind {
TokenKind::Struct => Some(self.parse_struct()),
TokenKind::Fn => Some(self.parse_function()),
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_struct(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LBrace);
let fields = iter::from_fn(|| self.parse_field()).collect();
self.expect(TokenKind::RBrace);
Item::Struct(Struct { name, fields })
}
fn parse_field(&mut self) -> Option<Field> {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
Some(Field { name, ty })
}
fn type_(&mut self) -> Type {
let token = self.next().unwrap();
match token.kind {
TokenKind::Ty(ty) => Type::Builtin(ty),
TokenKind::Ident => Type::Struct(token.value),
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_function(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LParen);
let args = iter::from_fn(|| self.parse_arg()).collect();
self.expect(TokenKind::RParen);
self.expect(TokenKind::Colon);
let ret = self.type_();
self.expect(TokenKind::LBrace);
let body = iter::from_fn(|| self.parse_stmt()).collect();
self.expect(TokenKind::RBrace);
Item::Function(Function {
name,
args,
ret,
body,
})
}
fn parse_arg(&mut self) -> Option<Arg> {
if self.peek()?.kind == TokenKind::RParen {
return None;
}
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
Some(Arg { name, ty })
}
fn parse_stmt(&mut self) -> Option<Exp> {
if self.peek()?.kind == TokenKind::RBrace {
return None;
}
let expr = self.parse_expr();
self.expect(TokenKind::Semicolon);
Some(expr)
}
fn parse_expr(&mut self) -> Exp {
self.parse_binary_expr(255)
}
fn parse_binary_expr(&mut self, min_prec: u8) -> Exp {
let mut lhs = self.parse_unit_expr();
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
let prec = op.prec();
if prec <= min_prec {
break;
}
self.next();
let rhs = self.parse_binary_expr(prec);
lhs = Exp::Binary {
op,
left: Box::new(lhs),
right: Box::new(rhs),
};
}
lhs
}
fn parse_unit_expr(&mut self) -> Exp {
let token = self.next().unwrap();
let mut expr = match token.kind {
TokenKind::True => Exp::Literal(Literal::Bool(true)),
TokenKind::False => Exp::Literal(Literal::Bool(false)),
TokenKind::Ident => Exp::Variable(token.value),
TokenKind::LBrace => {
let body = iter::from_fn(|| self.parse_stmt()).collect();
self.expect(TokenKind::RBrace);
Exp::Block(body)
}
TokenKind::LParen => {
let expr = self.parse_expr();
self.expect(TokenKind::RParen);
expr
}
TokenKind::Number => {
let value = token.value.parse().unwrap();
Exp::Literal(Literal::Int(value))
}
TokenKind::Fn => todo!(),
TokenKind::Let => todo!(),
TokenKind::If => todo!(),
TokenKind::Else => todo!(),
TokenKind::For => todo!(),
TokenKind::Return => todo!(),
TokenKind::Break => todo!(),
TokenKind::Continue => todo!(),
TokenKind::Struct => todo!(),
TokenKind::RBrace => todo!(),
TokenKind::RParen => todo!(),
TokenKind::LBracket => todo!(),
TokenKind::RBracket => todo!(),
TokenKind::Colon => todo!(),
TokenKind::Semicolon => todo!(),
TokenKind::Comma => todo!(),
TokenKind::Op(_) => todo!(),
TokenKind::Ty(_) => todo!(),
TokenKind::Dot => todo!(),
};
loop {
match self.peek().map(|t| t.kind) {
Some(TokenKind::LParen) => {
self.next();
let args = iter::from_fn(|| self.parse_call_arg()).collect();
self.expect(TokenKind::RParen);
expr = Exp::Call {
name: Box::new(expr),
args,
};
}
Some(TokenKind::LBracket) => {
self.next();
let index = self.parse_expr();
self.expect(TokenKind::RBracket);
expr = Exp::Index {
base: Box::new(expr),
index: Box::new(index),
};
}
Some(TokenKind::Dot) => {
self.next();
let field = self.expect(TokenKind::Ident).value;
expr = Exp::Field {
base: Box::new(expr),
field,
};
}
_ => break expr,
}
}
}
pub fn parse_call_arg(&mut self) -> Option<Exp> {
if self.peek()?.kind == TokenKind::RParen {
return None;
}
let expr = self.parse_expr();
self.try_advance(TokenKind::Comma);
Some(expr)
}
}
pub fn parse(input: &str) -> Vec<Item> {
Parser::new(input).parse()
}