normal syntax tree i guess

This commit is contained in:
nothendev 2023-05-04 15:44:49 +03:00
parent b0020ff838
commit f0a7166470
8 changed files with 197 additions and 94 deletions

View file

@ -6,4 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
logos = "0.13.0" logos = "0"
thiserror = "1"

View file

@ -11,8 +11,6 @@ Union Option<T>{
Some<T> Some<T>
} }
Structure Version { Structure Version {
major: Byte, major: Byte,
minor: Byte, minor: Byte,

View file

@ -1,3 +1,4 @@
// core provides lots of useful types like String and Byte
Use core; Use core;
Constant VERSION Version { Constant VERSION Version {
@ -6,7 +7,7 @@ Constant VERSION Version{
patch: 0, patch: 0,
} }
Type Path = String; Alias Path = String;
Structure File { Structure File {
name: String, name: String,

View file

@ -0,0 +1,3 @@
Use core;
Alias Thing = Byte;

View file

@ -10,34 +10,67 @@ pub struct IDLModule {
// why: only allow use before other items // why: only allow use before other items
// parser will error if use is present in any other place // parser will error if use is present in any other place
pub uses: Vec<UseDecl>, pub uses: Vec<UseDecl>,
pub items: Vec<Item> pub items: Vec<Item>,
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Item { pub enum Item {
Interface(ItemInterface), Interface(ItemInterface),
Type(ItemType) Type(ItemAlias),
Constant(ItemConstant),
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Function { pub struct Function {
pub name: String, pub name: String,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct ItemInterface { pub struct ItemInterface {
pub name: String, pub name: String,
pub functions: Vec<Function> pub functions: Vec<Function>,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct ItemType { pub struct ItemAlias {
pub name: String,
pub referree: String,
}
#[derive(Debug)]
pub struct ItemConstant {
pub name: String, pub name: String,
pub referree: String
} }
#[derive(Debug)] #[derive(Debug)]
pub struct UseDecl { pub struct UseDecl {
pub module: String pub module: String,
}
#[derive(Debug)]
pub enum Expr {
Literal(Literal),
}
#[derive(Debug)]
pub enum Literal {
String(String),
Number(NumberLiteral),
}
#[derive(Debug)]
pub enum NumberLiteral {
Ptr(usize),
U8(u8),
I8(i8),
U16(u16),
I16(i16),
U32(u32),
I32(i32),
U64(u64),
I64(i64),
} }

View file

@ -1,4 +1,4 @@
use std::ops::{Range, Add}; use std::{ops::{Range, Add}, fmt::Display};
use logos::Logos; use logos::Logos;
@ -35,9 +35,6 @@ pub enum Token {
#[token("=")] #[token("=")]
Equals, Equals,
//#[regex(r#"[A-z]+"#, |lex| lex.slice().parse().ok())]
//Literal(String),
#[regex(r#"[A-z]+"#, |lex| Ident::lexer(lex.slice()).next().and_then(Result::ok))] #[regex(r#"[A-z]+"#, |lex| Ident::lexer(lex.slice()).next().and_then(Result::ok))]
Ident(Ident), Ident(Ident),
@ -64,8 +61,8 @@ pub enum Ident {
Constant, Constant,
#[token("Structure")] #[token("Structure")]
Structure, Structure,
#[token("Type")] #[token("Alias")]
Type, Alias,
#[token("Use")] #[token("Use")]
Use, Use,
#[regex(r"[A-z]+", |lex| lex.slice().parse().ok())] #[regex(r"[A-z]+", |lex| lex.slice().parse().ok())]
@ -90,6 +87,11 @@ impl Span {
Self(min(self.lower(), other.lower())..max(self.upper(), other.upper())) Self(min(self.lower(), other.lower())..max(self.upper(), other.upper()))
} }
} }
impl Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}..{}", self.lower(), self.upper())
}
}
impl Add for Span { impl Add for Span {
type Output = Self; type Output = Self;
@ -109,3 +111,9 @@ impl<T> Spanned<T> {
Spanned(f(self.0), self.1) Spanned(f(self.0), self.1)
} }
} }
impl<T: Display> Display for Spanned<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} @ {}", self.0, self.1)
}
}

View file

@ -1,18 +1,19 @@
#![allow(non_snake_case)] #![allow(non_snake_case)]
use logos::Logos;
use parser::Parser; use parser::Parser;
use crate::lexer::Token;
mod ast; mod ast;
mod lexer; mod lexer;
mod parser; mod parser;
const VFS: &str = include_str!("../assets/vfs.idl"); const TEST: &str = include_str!("../assets/why.idl");
fn main() { fn main() {
dbg!(Token::lexer(VFS).for_each(|a| println!("{:#?}", a))); let res = Parser::new(TEST).parse();
dbg!(Parser::new(VFS).parse()); match res {
Ok(ast) => { dbg!(ast); }
Err(e) => println!("{}", e)
}
} }
#[macro_export] #[macro_export]

View file

@ -1,53 +1,56 @@
use logos::{Logos, SpannedIter}; use logos::{Lexer, Logos, SpannedIter};
use crate::{ use crate::{
ast::{IDLModule, Item, ItemInterface, ItemType, UseDecl}, ast::{IDLModule, Item, ItemAlias, ItemConstant, ItemInterface, UseDecl},
lexer::{Span, Spanned, Token, Ident}, lexer::{Ident, Span, Spanned, Token},
};
use std::{
iter::{Iterator, Peekable, Filter},
ops::Range,
}; };
use std::iter::{Filter, Iterator, Peekable};
type Wtf<'a> = Peekable< type Wtf<'a> = Peekable<Filter<&'a mut Lexer<'a, Token>, Box<dyn Fn(&Result<Token, ()>) -> bool>>>;
Filter<SpannedIter<'a, Token>, Box<dyn Fn(&(Result<Token, ()>, Range<usize>)) -> bool>>,
>;
struct TokenIterator<'a> { struct TokenIterator<'a> {
spanned: Wtf<'a>, lexer: Lexer<'a, Token>,
peeked: Option<Option<Token>>,
} }
fn token_is_not_comment((ref a, ..): &(Result<Token, ()>, Range<usize>)) -> bool { fn token_is_not_comment(a: &Result<Token, ()>) -> bool {
!matches!(a, Err(_) | Ok(Token::Comment(..))) !matches!(a, Err(_) | Ok(Token::Comment(..)))
} }
impl<'a> TokenIterator<'a> { impl<'a> TokenIterator<'a> {
pub fn new(src: &'a str) -> Self { pub fn new(src: &'a str) -> Self {
let spanned = Token::lexer(src) let lexer = Token::lexer(src);
.spanned()
.filter(
Box::new(token_is_not_comment) as Box<dyn Fn(&(Result<Token, ()>, Range<usize>)) -> bool>,
)
.peekable();
Self { spanned } Self {
lexer,
peeked: None,
} }
pub fn next(&mut self) -> Option<Spanned<Token>> { }
let nxt = self pub fn next(&mut self) -> Result<Spanned<Token>, ParserError> {
.spanned let n = match self.peeked.take() {
.next() Some(thing) => thing,
.and_then(|(token, span)| Some(Spanned(token.ok()?, Span(span)))); None => self.lexer.find(token_is_not_comment).and_then(Result::ok),
};
let nxt = n.map(|token| Spanned(token, Span(self.lexer.span())));
println!("[NEXT] {:#?}", nxt); println!("[NEXT] {:#?}", nxt);
nxt nxt.ok_or(ParserError::UnexpectedEOF)
} }
pub fn peek(&mut self) -> Option<Spanned<&Token>> { fn _peek(&mut self) -> Option<&Token> {
let peek = self self.peeked
.spanned .get_or_insert_with(|| self.lexer.find(token_is_not_comment).and_then(Result::ok))
.peek() .as_ref()
.and_then(|(token, span)| Some(Spanned(token.as_ref().ok()?, Span(span.clone())))); }
pub fn peek(&mut self) -> Result<Spanned<&Token>, ParserError> {
let span = Span(self.lexer.span());
let peek = self._peek().map(|token| Spanned(token, span));
println!("[PEEK] {:#?}", peek); println!("[PEEK] {:#?}", peek);
peek peek.ok_or(ParserError::UnexpectedEOF)
}
pub fn current(&self) -> Spanned<String> {
Spanned(self.lexer.slice().to_owned(), Span(self.lexer.span()))
} }
} }
@ -62,32 +65,44 @@ impl<'a> Parser<'a> {
} }
} }
fn get_real(&mut self, matcher: impl Fn(&Token) -> bool) -> Option<Spanned<Token>> { fn get_real(
&mut self,
matcher: impl Fn(&Token) -> bool,
expected: &'static str,
) -> Result<Spanned<Token>, ParserError> {
if matcher(self.tokens.peek()?.0) { if matcher(self.tokens.peek()?.0) {
self.tokens.next() self.tokens.next()
} else { } else {
None Err(self.unexpected(expected))
} }
} }
fn semi(&mut self) -> Option<Span> { fn unexpected(&self, expected: &'static str) -> ParserError {
Some(self.get_real(|token| matches!(token, Token::Semicolon))?.1) ParserError::Unexpected(expected.to_owned(), self.tokens.current())
} }
fn ask_ident(&mut self) -> Option<Spanned<String>> { fn semi(&mut self) -> Result<Span, ParserError> {
Some(crate::unwrap_match!( Ok(self
self.get_real(|token| matches!(token, Token::Ident(Ident::Other(_))))?, .get_real(|token| matches!(token, Token::Semicolon), "a semicolon")?
.1)
}
fn ask_ident(&mut self) -> Result<Spanned<String>, ParserError> {
Ok(crate::unwrap_match!(
self.get_real(|token| matches!(token, Token::Ident(Ident::Other(_))), "an identifier")?,
Spanned(Token::Ident(Ident::Other(ident)), span) => Spanned(Token::Ident(Ident::Other(ident)), span) =>
Spanned(ident, span) Spanned(ident, span)
)) ))
} }
fn ask_interface(&mut self) -> Option<Spanned<ItemInterface>> { fn _ask_interface(&mut self) -> Result<Spanned<ItemInterface>, ParserError> {
let Spanned(_, kSp) = let Spanned(_, kSp) = self.get_real(
self.get_real(|token| matches!(token, Token::Ident(Ident::Interface)))?; |token| matches!(token, Token::Ident(Ident::Interface)),
"`Interface`",
)?;
let Spanned(ident, iSp) = self.ask_ident()?; let Spanned(ident, iSp) = self.ask_ident()?;
Some(Spanned::new( Ok(Spanned::new(
ItemInterface { ItemInterface {
name: ident, name: ident,
functions: vec![], functions: vec![],
@ -96,55 +111,98 @@ impl<'a> Parser<'a> {
)) ))
} }
fn ask_typealias(&mut self) -> Option<Spanned<ItemType>> { fn ask_alias(&mut self) -> Result<Spanned<ItemAlias>, ParserError> {
let Spanned(_, kSp) = let Spanned(_, kSp) = self.get_real(
self.get_real(|token| matches!(token, Token::Ident(Ident::Type)))?; |token| matches!(token, Token::Ident(Ident::Alias)),
"`Alias`",
)?;
let Spanned(name, nSp) = self.ask_ident()?; let Spanned(name, nSp) = self.ask_ident()?;
let Spanned(_, eqSp) = self.get_real(|token| matches!(token, Token::Equals))?; let Spanned(_, eqSp) = self.get_real(|token| matches!(token, Token::Equals), "`=`")?;
let Spanned(referree, rSp) = self.ask_ident()?; let Spanned(referree, rSp) = self.ask_ident()?;
Some(Spanned::new( Ok(Spanned::new(
ItemType { name, referree }, ItemAlias { name, referree },
[kSp, nSp, eqSp, rSp, self.semi()?], [kSp, nSp, eqSp, rSp, self.semi()?],
)) ))
} }
fn ask_item(&mut self) -> Option<Spanned<Item>> { fn ask_constant(&mut self) -> Result<Spanned<ItemConstant>, ParserError> {
Some(match self.tokens.peek()?.0 { Err(self.unexpected(""))
Token::Ident(Ident::Other(_)) => None?, }
fn ask_item(&mut self) -> Result<Spanned<Item>, ParserError> {
Ok(match self.tokens.peek()?.0 {
Token::Ident(Ident::Other(_)) => {
Err(self.unexpected("a keyword, not just an identifier"))?
}
Token::Ident(keyword) => match keyword { Token::Ident(keyword) => match keyword {
Ident::Interface => self.ask_interface()?.map(Item::Interface), //Ident::Interface => self.ask_interface()?.map(Item::Interface),
Ident::Type => self.ask_typealias()?.map(Item::Type), Ident::Alias => self.ask_alias()?.map(Item::Type),
_ => None?, Ident::Constant => self.ask_constant()?.map(Item::Constant),
_ => Err(self.unexpected("`Alias` or `Constant`"))?,
}, },
_ => None?, _ => Err(self.unexpected("a keyword"))?,
}) })
} }
fn ask_use(&mut self) -> Option<Spanned<UseDecl>> { fn ask_use(&mut self) -> Result<Spanned<UseDecl>, ParserError> {
let Spanned(_, kSp) = let Spanned(_, kSp) = {
self.get_real(|token| matches!(token, Token::Ident(Ident::Use)))?; match self.tokens.peek()? {
Spanned(Token::Ident(Ident::Use), _) => Ok(self.tokens.next()?),
_ => Err(ParserError::PleaseStopParsingUse),
}
}?;
let Spanned(name, nSp) = self.ask_ident()?; let Spanned(name, nSp) = self.ask_ident()?;
Some(Spanned::new( Ok(Spanned::new(
UseDecl { module: name }, UseDecl { module: name },
[kSp, nSp, self.semi()?], [kSp, nSp, self.semi()?],
)) ))
} }
pub fn parse(mut self) -> IDLModule { pub fn parse(mut self) -> Result<IDLModule, ParserError> {
IDLModule { Ok(IDLModule {
uses: fill_while(|| self.ask_use()), uses: {
items: fill_while(|| self.ask_item()), let mut real = vec![];
loop {
let r = self.ask_use();
match r {
Ok(Spanned(a, _)) => real.push(a),
Err(ParserError::UnexpectedEOF) => return Err(ParserError::UnexpectedEOF),
Err(ParserError::PleaseStopParsingUse) => break,
Err(unexpected @ ParserError::Unexpected(..)) => return Err(unexpected),
} }
} }
Ok(real)
}?,
items: fill_while(|| self.ask_item())?,
})
}
} }
fn fill_while<T>(mut f: impl FnMut() -> Option<Spanned<T>>) -> Vec<T> { fn fill_while<T>(
mut f: impl FnMut() -> Result<Spanned<T>, ParserError>,
) -> Result<Vec<T>, ParserError> {
let mut real = vec![]; let mut real = vec![];
while let Some(Spanned(t, _)) = f() { loop {
real.push(t); match f() {
Ok(Spanned(next, _)) => real.push(next),
Err(unexpected @ ParserError::Unexpected(..)) => return Err(unexpected),
Err(ParserError::UnexpectedEOF) => break,
Err(ParserError::PleaseStopParsingUse) => unreachable!(),
} }
real }
Ok(real)
}
#[derive(thiserror::Error, Debug)]
pub enum ParserError {
// expected, got
#[error("Unexpected `{_1}`, expected {_0}")]
Unexpected(String, Spanned<String>),
#[error("Unexpected end of file")]
UnexpectedEOF,
#[error("please stop.")]
PleaseStopParsingUse,
} }