Added locations to Tokens in the parser

This commit is contained in:
Talha Qamar 2024-09-13 14:40:09 +05:00
parent 86e1020bc7
commit 0775d0c70a
2 changed files with 194 additions and 173 deletions

View file

@ -1,66 +1,71 @@
pub mod protocol;
mod parser; mod parser;
use crate::idl::parser::parse;
use std::io::Read; use std::io::Read;
use self::parser::parse; use logos::{Lexer, Logos, Skip};
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Clone)] #[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens #[logos(extras = (usize, usize))]
enum Token { enum Token {
#[regex("//[^\n]*\n", logos::skip)] #[regex("//[^\n]*", logos::skip)]
Comment,
#[regex(r"[ \t\f]+", logos::skip)]
Ignored, Ignored,
// Tokens can be literal strings, of any length. #[regex(r"\n", newline_callback)]
#[token("protocol")] Newline,
Protocol,
// Tokens can be literal strings, of any length. // Tokens can be literal strings, of any length.
#[token("type")] #[token("protocol", get_token_position)]
Type, Protocol((usize,usize)),
#[token("fn", priority = 5)]
Fn,
#[token("enum")] // Tokens can be literal strings, of any length.
Enum, #[token("type", get_token_position)]
Type((usize, usize)),
#[token("struct")] #[token("fn", priority = 5, callback = get_token_position)]
Struct, Fn((usize, usize)),
#[token("{")] #[token("enum", get_token_position)]
LBrace, Enum((usize, usize)),
#[token("}")] #[token("struct", get_token_position)]
RBrace, Struct((usize, usize)),
#[token("(")] #[token("{", get_token_position)]
LParen, LBrace((usize, usize)),
#[token(")")] #[token("}", get_token_position)]
RParen, RBrace((usize, usize)),
#[token(":")] #[token("(", get_token_position)]
Colon, LParen((usize, usize)),
#[token(";")] #[token(")", get_token_position)]
SemiColon, RParen((usize, usize)),
#[token(",")] #[token(":", get_token_position)]
Comma, Colon((usize, usize)),
#[token("=")] #[token(";", get_token_position)]
Equal, SemiColon((usize, usize)),
#[token("->")] #[token(",", get_token_position)]
RArrow, Comma((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{lex.slice().to_string()})] #[token("=", get_token_position)]
Identifier(String), Equal((usize, usize)),
#[regex("[1234567890]+", |lex|{lex.slice().parse::<u64>().unwrap()})] #[token("->", get_token_position)]
Number(u64), RArrow((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{let text = lex.slice().to_string(); let (line, col) = get_token_position(lex); (line, col, text)})]
Identifier((usize, usize, String)),
#[regex("[1234567890]+", |lex|{let num = lex.slice().parse::<u64>().unwrap(); let (line, col) = get_token_position(lex); (line, col, num) })]
Number((usize, usize, u64)),
#[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)] #[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)]
Decorator, Decorator,
@ -76,7 +81,6 @@ pub fn build_idl(name: String) {
for x in lex { for x in lex {
match x { match x {
Ok(token) => { Ok(token) => {
println!("{:?}", token);
tokens.push(token); tokens.push(token);
} }
Err(err) => println!("{:?}", err), Err(err) => println!("{:?}", err),
@ -93,3 +97,17 @@ fn open_protocol(name: String) -> String {
file.read_to_string(&mut contents).unwrap(); file.read_to_string(&mut contents).unwrap();
contents contents
} }
fn get_token_position(lex : &mut Lexer<Token>) -> (usize, usize) {
let line = lex.extras.0;
let column = lex.span().start - lex.extras.1;
(line, column + 1)
}
/// Update the line count and the char index.
fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
lex.extras.0 += 1;
lex.extras.1 = lex.span().end;
Skip
}

View file

@ -11,6 +11,7 @@ enum Declaration{
TypeDeclaration(TypeDeclaration), TypeDeclaration(TypeDeclaration),
ProtocolDeclaration(ProtocolDeclaration), ProtocolDeclaration(ProtocolDeclaration),
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
struct StructDeclaration { struct StructDeclaration {
name : String, name : String,
@ -51,17 +52,19 @@ struct FuncDeclaration {
return_type : String, return_type : String,
} }
macro_rules! consume {
/// Consume's a token that's expected. If the token that's consumed is not ($tokens:ident, $token_type:path) => {
/// the given expected token then panic let a = $tokens.next();
fn consume(tokens : &mut Peekable<Iter<'_, Token>>, token : Token) { match a {
let a = tokens.next(); None => panic!("Expected {}, Got End Of Tokens", stringify!($token_type)),
match a { Some(t) => {
None => panic!("Expected {:?}, Got End Of File", token), if matches!(t, $token_type(_)){}
Some(a) => if *a != token { else {
panic!("Expected {:?}, Got {:?}", token, *a); panic!("Expected {:?}, Got {:?}", stringify!($token_type), t);
}
}
} }
} };
} }
pub fn parse(tokens : Vec<Token>) -> AST{ pub fn parse(tokens : Vec<Token>) -> AST{
@ -84,29 +87,29 @@ fn declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<Declaration> {
match tokens.peek(){ match tokens.peek(){
None => None, None => None,
Some(tok) => match tok { Some(tok) => match tok {
Token::Enum => Some(enum_decl(tokens)), Token::Enum(_) => Some(enum_decl(tokens)),
Token::Struct => Some(struct_decl(tokens)), // Token::Struct(_) => Some(struct_decl(tokens)),
Token::Type => Some(type_declaration(tokens)), // Token::Type(_) => Some(type_declaration(tokens)),
Token::Protocol => Some(protocol_declaration(tokens)), // Token::Protocol(_) => Some(protocol_declaration(tokens)),
_ => None, _ => None,
} }
} }
} }
fn enum_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration { fn enum_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Enum); consume!(tokens, Token::Enum);
let name = identifier(tokens).expect("Expected Identifier after `enum`"); let name = identifier(tokens).expect("Expected Identifier after `enum`");
consume(tokens, Token::LBrace); consume!(tokens, Token::LBrace);
let mut members = Vec::new(); let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") { match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for enum_members if empty Token::RBrace(_) => {}, // skip checking for enum_members if empty
_ => { _ => {
enum_members(tokens, &mut members); enum_members(tokens, &mut members);
}, },
} }
consume(tokens, Token::RBrace); consume!(tokens, Token::RBrace);
Declaration::EnumDeclaration(EnumDeclaration{name, members}) Declaration::EnumDeclaration(EnumDeclaration{name, members})
} }
@ -115,8 +118,8 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
members.push(enum_member(tokens).unwrap()); members.push(enum_member(tokens).unwrap());
loop { loop {
match tokens.peek().expect("Unexpected EOF inside enum declaration") { match tokens.peek().expect("Unexpected EOF inside enum declaration") {
Token::Comma => { Token::Comma(_) => {
consume(tokens, Token::Comma); consume!(tokens, Token::Comma);
if let Some(member) = enum_member(tokens) { if let Some(member) = enum_member(tokens) {
members.push(member); members.push(member);
} else { } else {
@ -131,68 +134,68 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> { fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> {
let name = identifier(tokens); let name = identifier(tokens);
if let Some(name) = name { if let Some(name) = name {
consume(tokens, Token::Equal); consume!(tokens, Token::Equal);
let number = parse_number(tokens).expect("Expected Number after `=`"); let number = parse_number(tokens).expect("Expected Number after `=`");
Some(EnumMember{name, number}) Some(EnumMember{name, number})
} else { } else {
None None
} }
} }
//
fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration { // fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Struct); // consume(tokens, Token::Struct);
let name = identifier(tokens).expect("Expected Identifier after `struct`"); // let name = identifier(tokens).expect("Expected Identifier after `struct`");
consume(tokens, Token::LBrace); // consume(tokens, Token::LBrace);
let mut members = Vec::new(); // let mut members = Vec::new();
//
match tokens.peek().expect("Unexpected EOF after LBrace") { // match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for struct_members if empty // Token::RBrace => {}, // skip checking for struct_members if empty
_ => { // _ => {
struct_members(tokens, &mut members); // struct_members(tokens, &mut members);
}, // },
} // }
//
consume(tokens, Token::RBrace); // consume(tokens, Token::RBrace);
//
Declaration::StructDeclaration(StructDeclaration{name, members}) // Declaration::StructDeclaration(StructDeclaration{name, members})
} // }
//
fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<StructMember>) { // fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<StructMember>) {
members.push(struct_member(tokens).unwrap()); // members.push(struct_member(tokens).unwrap());
loop { // loop {
match tokens.peek().expect("Unexpected EOF inside struct declaration") { // match tokens.peek().expect("Unexpected EOF inside struct declaration") {
Token::Comma => { // Token::Comma => {
consume(tokens, Token::Comma); // consume(tokens, Token::Comma);
if let Some(member) = struct_member(tokens) { // if let Some(member) = struct_member(tokens) {
members.push(member); // members.push(member);
} else { // } else {
break; // break;
} // }
}, // },
_ => {}, // _ => {},
} // }
} // }
} // }
fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> { // fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> {
let name = identifier(tokens); // let name = identifier(tokens);
if let Some(name) = name { // if let Some(name) = name {
consume(tokens, Token::Colon); // consume(tokens, Token::Colon);
let type_name = identifier(tokens).expect("Expected Type after Colon"); // let type_name = identifier(tokens).expect("Expected Type after Colon");
Some(StructMember{name, type_name}) // Some(StructMember{name, type_name})
} else { // } else {
None // None
} // }
} // }
//
fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration { // fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Type); // consume(tokens, Token::Type);
let name = identifier(tokens).expect("Expected Identifier after `type`"); // let name = identifier(tokens).expect("Expected Identifier after `type`");
let type_name = identifier(tokens).expect("Expected type after Identifier"); // let type_name = identifier(tokens).expect("Expected type after Identifier");
Declaration::TypeDeclaration(TypeDeclaration{name, type_name}) // Declaration::TypeDeclaration(TypeDeclaration{name, type_name})
} // }
fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> { fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
let result = tokens.peek().map_or(None, |x| match x { let result = tokens.peek().map_or(None, |x| match x {
Token::Identifier(s) => { Token::Identifier((_, _, s)) => {
Some(s.to_string()) Some(s.to_string())
}, },
_ => None _ => None
@ -206,7 +209,7 @@ fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
} }
fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> { fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
let result = tokens.peek().map_or(None, |x| match x { let result = tokens.peek().map_or(None, |x| match x {
Token::Number(s) => Some(*s), Token::Number((_, _, s)) => Some(*s),
_ => None _ => None
}); });
if let Some(_) = result { if let Some(_) = result {
@ -214,61 +217,61 @@ fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
} }
result result
} }
//
fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration { // fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Protocol); // consume(tokens, Token::Protocol);
let name = identifier(tokens).expect("Expected Identifier after `protocol`"); // let name = identifier(tokens).expect("Expected Identifier after `protocol`");
consume(tokens, Token::LBrace); // consume(tokens, Token::LBrace);
let mut interface = Vec::new(); // let mut interface = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") { // match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // Token::RBrace => {},
_ => { // _ => {
functions(tokens, &mut interface); // functions(tokens, &mut interface);
}, // },
}; // };
Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface}) // Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface})
} // }
//
fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncDeclaration>) { // fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncDeclaration>) {
loop { // loop {
match function(tokens) { // match function(tokens) {
Some(x) => interface.push(x), // Some(x) => interface.push(x),
None => break, // None => break,
} // }
} // }
} // }
//
fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{ // fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{
if let Some(Token::Fn) = tokens.peek() { // if let Some(Token::Fn) = tokens.peek() {
consume(tokens, Token::Fn); // consume(tokens, Token::Fn);
let name = identifier(tokens).expect("Expected Identifier after `fn`"); // let name = identifier(tokens).expect("Expected Identifier after `fn`");
consume(tokens, Token::LParen); // consume(tokens, Token::LParen);
let arg_list = arg_list(tokens); // let arg_list = arg_list(tokens);
consume(tokens, Token::RParen); // consume(tokens, Token::RParen);
consume(tokens, Token::RArrow); // consume(tokens, Token::RArrow((1,1)));
let return_type = identifier(tokens).expect("Expected return type after `->"); // let return_type = identifier(tokens).expect("Expected return type after `->");
Some(FuncDeclaration{name, arg_list, return_type}) // Some(FuncDeclaration{name, arg_list, return_type})
} else { // } else {
None // None
} // }
} // }
//
fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> { // fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> {
let mut result = Vec::new(); // let mut result = Vec::new();
loop { // loop {
match identifier(tokens) { // match identifier(tokens) {
None => break, // None => break,
Some(i) =>{ // Some(i) =>{
result.push(i); // result.push(i);
if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{ // if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{
consume(tokens, Token::Comma); // consume(tokens, Token::Comma);
match tokens.peek().expect("Unexpected EOF in argument list") { // match tokens.peek().expect("Unexpected EOF in argument list") {
Token::Identifier(_) => {}, // Token::Identifier(_) => {},
_ => panic!("Unexpected symbol after Comma in argument list"), // _ => panic!("Unexpected symbol after Comma in argument list"),
} // }
} // }
} // }
} // }
} // }
result // result
} // }