Added locations to Tokens in the parser

This commit is contained in:
Talha Qamar 2024-09-13 14:40:09 +05:00
parent 86e1020bc7
commit 0775d0c70a
2 changed files with 194 additions and 173 deletions

View file

@ -1,66 +1,71 @@
pub mod protocol;
mod parser;
use crate::idl::parser::parse;
use std::io::Read;
use self::parser::parse;
use logos::Logos;
use logos::{Lexer, Logos, Skip};
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens
#[logos(extras = (usize, usize))]
enum Token {
#[regex("//[^\n]*\n", logos::skip)]
#[regex("//[^\n]*", logos::skip)]
Comment,
#[regex(r"[ \t\f]+", logos::skip)]
Ignored,
// Tokens can be literal strings, of any length.
#[token("protocol")]
Protocol,
#[regex(r"\n", newline_callback)]
Newline,
// Tokens can be literal strings, of any length.
#[token("type")]
Type,
#[token("fn", priority = 5)]
Fn,
#[token("protocol", get_token_position)]
Protocol((usize,usize)),
#[token("enum")]
Enum,
// Tokens can be literal strings, of any length.
#[token("type", get_token_position)]
Type((usize, usize)),
#[token("struct")]
Struct,
#[token("fn", priority = 5, callback = get_token_position)]
Fn((usize, usize)),
#[token("{")]
LBrace,
#[token("enum", get_token_position)]
Enum((usize, usize)),
#[token("}")]
RBrace,
#[token("struct", get_token_position)]
Struct((usize, usize)),
#[token("(")]
LParen,
#[token("{", get_token_position)]
LBrace((usize, usize)),
#[token(")")]
RParen,
#[token("}", get_token_position)]
RBrace((usize, usize)),
#[token(":")]
Colon,
#[token("(", get_token_position)]
LParen((usize, usize)),
#[token(";")]
SemiColon,
#[token(")", get_token_position)]
RParen((usize, usize)),
#[token(",")]
Comma,
#[token(":", get_token_position)]
Colon((usize, usize)),
#[token("=")]
Equal,
#[token(";", get_token_position)]
SemiColon((usize, usize)),
#[token("->")]
RArrow,
#[token(",", get_token_position)]
Comma((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{lex.slice().to_string()})]
Identifier(String),
#[token("=", get_token_position)]
Equal((usize, usize)),
#[regex("[1234567890]+", |lex|{lex.slice().parse::<u64>().unwrap()})]
Number(u64),
#[token("->", get_token_position)]
RArrow((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{let text = lex.slice().to_string(); let (line, col) = get_token_position(lex); (line, col, text)})]
Identifier((usize, usize, String)),
#[regex("[1234567890]+", |lex|{let num = lex.slice().parse::<u64>().unwrap(); let (line, col) = get_token_position(lex); (line, col, num) })]
Number((usize, usize, u64)),
#[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)]
Decorator,
@ -76,7 +81,6 @@ pub fn build_idl(name: String) {
for x in lex {
match x {
Ok(token) => {
println!("{:?}", token);
tokens.push(token);
}
Err(err) => println!("{:?}", err),
@ -93,3 +97,17 @@ fn open_protocol(name: String) -> String {
file.read_to_string(&mut contents).unwrap();
contents
}
fn get_token_position(lex : &mut Lexer<Token>) -> (usize, usize) {
let line = lex.extras.0;
let column = lex.span().start - lex.extras.1;
(line, column + 1)
}
/// Update the line count and the char index.
fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
lex.extras.0 += 1;
lex.extras.1 = lex.span().end;
Skip
}

View file

@ -11,6 +11,7 @@ enum Declaration{
TypeDeclaration(TypeDeclaration),
ProtocolDeclaration(ProtocolDeclaration),
}
#[derive(Debug, Clone, PartialEq)]
struct StructDeclaration {
name : String,
@ -51,17 +52,19 @@ struct FuncDeclaration {
return_type : String,
}
/// Consume's a token that's expected. If the token that's consumed is not
/// the given expected token then panic
fn consume(tokens : &mut Peekable<Iter<'_, Token>>, token : Token) {
let a = tokens.next();
macro_rules! consume {
($tokens:ident, $token_type:path) => {
let a = $tokens.next();
match a {
None => panic!("Expected {:?}, Got End Of File", token),
Some(a) => if *a != token {
panic!("Expected {:?}, Got {:?}", token, *a);
None => panic!("Expected {}, Got End Of Tokens", stringify!($token_type)),
Some(t) => {
if matches!(t, $token_type(_)){}
else {
panic!("Expected {:?}, Got {:?}", stringify!($token_type), t);
}
}
}
};
}
pub fn parse(tokens : Vec<Token>) -> AST{
@ -84,29 +87,29 @@ fn declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<Declaration> {
match tokens.peek(){
None => None,
Some(tok) => match tok {
Token::Enum => Some(enum_decl(tokens)),
Token::Struct => Some(struct_decl(tokens)),
Token::Type => Some(type_declaration(tokens)),
Token::Protocol => Some(protocol_declaration(tokens)),
Token::Enum(_) => Some(enum_decl(tokens)),
// Token::Struct(_) => Some(struct_decl(tokens)),
// Token::Type(_) => Some(type_declaration(tokens)),
// Token::Protocol(_) => Some(protocol_declaration(tokens)),
_ => None,
}
}
}
fn enum_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Enum);
consume!(tokens, Token::Enum);
let name = identifier(tokens).expect("Expected Identifier after `enum`");
consume(tokens, Token::LBrace);
consume!(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for enum_members if empty
Token::RBrace(_) => {}, // skip checking for enum_members if empty
_ => {
enum_members(tokens, &mut members);
},
}
consume(tokens, Token::RBrace);
consume!(tokens, Token::RBrace);
Declaration::EnumDeclaration(EnumDeclaration{name, members})
}
@ -115,8 +118,8 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
members.push(enum_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside enum declaration") {
Token::Comma => {
consume(tokens, Token::Comma);
Token::Comma(_) => {
consume!(tokens, Token::Comma);
if let Some(member) = enum_member(tokens) {
members.push(member);
} else {
@ -131,68 +134,68 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume(tokens, Token::Equal);
consume!(tokens, Token::Equal);
let number = parse_number(tokens).expect("Expected Number after `=`");
Some(EnumMember{name, number})
} else {
None
}
}
fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Struct);
let name = identifier(tokens).expect("Expected Identifier after `struct`");
consume(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for struct_members if empty
_ => {
struct_members(tokens, &mut members);
},
}
consume(tokens, Token::RBrace);
Declaration::StructDeclaration(StructDeclaration{name, members})
}
fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<StructMember>) {
members.push(struct_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside struct declaration") {
Token::Comma => {
consume(tokens, Token::Comma);
if let Some(member) = struct_member(tokens) {
members.push(member);
} else {
break;
}
},
_ => {},
}
}
}
fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume(tokens, Token::Colon);
let type_name = identifier(tokens).expect("Expected Type after Colon");
Some(StructMember{name, type_name})
} else {
None
}
}
fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Type);
let name = identifier(tokens).expect("Expected Identifier after `type`");
let type_name = identifier(tokens).expect("Expected type after Identifier");
Declaration::TypeDeclaration(TypeDeclaration{name, type_name})
}
//
// fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
// consume(tokens, Token::Struct);
// let name = identifier(tokens).expect("Expected Identifier after `struct`");
// consume(tokens, Token::LBrace);
// let mut members = Vec::new();
//
// match tokens.peek().expect("Unexpected EOF after LBrace") {
// Token::RBrace => {}, // skip checking for struct_members if empty
// _ => {
// struct_members(tokens, &mut members);
// },
// }
//
// consume(tokens, Token::RBrace);
//
// Declaration::StructDeclaration(StructDeclaration{name, members})
// }
//
// fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<StructMember>) {
// members.push(struct_member(tokens).unwrap());
// loop {
// match tokens.peek().expect("Unexpected EOF inside struct declaration") {
// Token::Comma => {
// consume(tokens, Token::Comma);
// if let Some(member) = struct_member(tokens) {
// members.push(member);
// } else {
// break;
// }
// },
// _ => {},
// }
// }
// }
// fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> {
// let name = identifier(tokens);
// if let Some(name) = name {
// consume(tokens, Token::Colon);
// let type_name = identifier(tokens).expect("Expected Type after Colon");
// Some(StructMember{name, type_name})
// } else {
// None
// }
// }
//
// fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
// consume(tokens, Token::Type);
// let name = identifier(tokens).expect("Expected Identifier after `type`");
// let type_name = identifier(tokens).expect("Expected type after Identifier");
// Declaration::TypeDeclaration(TypeDeclaration{name, type_name})
// }
fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Identifier(s) => {
Token::Identifier((_, _, s)) => {
Some(s.to_string())
},
_ => None
@ -206,7 +209,7 @@ fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
}
fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Number(s) => Some(*s),
Token::Number((_, _, s)) => Some(*s),
_ => None
});
if let Some(_) = result {
@ -214,61 +217,61 @@ fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
}
result
}
fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Protocol);
let name = identifier(tokens).expect("Expected Identifier after `protocol`");
consume(tokens, Token::LBrace);
let mut interface = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {},
_ => {
functions(tokens, &mut interface);
},
};
Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface})
}
fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncDeclaration>) {
loop {
match function(tokens) {
Some(x) => interface.push(x),
None => break,
}
}
}
fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{
if let Some(Token::Fn) = tokens.peek() {
consume(tokens, Token::Fn);
let name = identifier(tokens).expect("Expected Identifier after `fn`");
consume(tokens, Token::LParen);
let arg_list = arg_list(tokens);
consume(tokens, Token::RParen);
consume(tokens, Token::RArrow);
let return_type = identifier(tokens).expect("Expected return type after `->");
Some(FuncDeclaration{name, arg_list, return_type})
} else {
None
}
}
fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> {
let mut result = Vec::new();
loop {
match identifier(tokens) {
None => break,
Some(i) =>{
result.push(i);
if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{
consume(tokens, Token::Comma);
match tokens.peek().expect("Unexpected EOF in argument list") {
Token::Identifier(_) => {},
_ => panic!("Unexpected symbol after Comma in argument list"),
}
}
}
}
}
result
}
//
// fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
// consume(tokens, Token::Protocol);
// let name = identifier(tokens).expect("Expected Identifier after `protocol`");
// consume(tokens, Token::LBrace);
// let mut interface = Vec::new();
// match tokens.peek().expect("Unexpected EOF after LBrace") {
// Token::RBrace => {},
// _ => {
// functions(tokens, &mut interface);
// },
// };
// Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface})
// }
//
// fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncDeclaration>) {
// loop {
// match function(tokens) {
// Some(x) => interface.push(x),
// None => break,
// }
// }
// }
//
// fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{
// if let Some(Token::Fn) = tokens.peek() {
// consume(tokens, Token::Fn);
// let name = identifier(tokens).expect("Expected Identifier after `fn`");
// consume(tokens, Token::LParen);
// let arg_list = arg_list(tokens);
// consume(tokens, Token::RParen);
// consume(tokens, Token::RArrow((1,1)));
// let return_type = identifier(tokens).expect("Expected return type after `->");
// Some(FuncDeclaration{name, arg_list, return_type})
// } else {
// None
// }
// }
//
// fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> {
// let mut result = Vec::new();
// loop {
// match identifier(tokens) {
// None => break,
// Some(i) =>{
// result.push(i);
// if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{
// consume(tokens, Token::Comma);
// match tokens.peek().expect("Unexpected EOF in argument list") {
// Token::Identifier(_) => {},
// _ => panic!("Unexpected symbol after Comma in argument list"),
// }
// }
// }
// }
// }
// result
// }