From 0775d0c70ab6e1545d081cc454b47d274da830f9 Mon Sep 17 00:00:00 2001 From: Talha Qamar Date: Fri, 13 Sep 2024 14:40:09 +0500 Subject: [PATCH] Added locations to Tokens in the parser --- dev/src/idl/mod.rs | 98 ++++++++------- dev/src/idl/parser.rs | 269 +++++++++++++++++++++--------------------- 2 files changed, 194 insertions(+), 173 deletions(-) diff --git a/dev/src/idl/mod.rs b/dev/src/idl/mod.rs index 47963335..ee16818d 100644 --- a/dev/src/idl/mod.rs +++ b/dev/src/idl/mod.rs @@ -1,66 +1,71 @@ -pub mod protocol; mod parser; +use crate::idl::parser::parse; use std::io::Read; -use self::parser::parse; - -use logos::Logos; +use logos::{Lexer, Logos, Skip}; #[derive(Logos, Debug, PartialEq, Clone)] -#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens +#[logos(extras = (usize, usize))] enum Token { - #[regex("//[^\n]*\n", logos::skip)] + #[regex("//[^\n]*", logos::skip)] + Comment, + + #[regex(r"[ \t\f]+", logos::skip)] Ignored, - // Tokens can be literal strings, of any length. - #[token("protocol")] - Protocol, + #[regex(r"\n", newline_callback)] + Newline, // Tokens can be literal strings, of any length. - #[token("type")] - Type, - #[token("fn", priority = 5)] - Fn, + #[token("protocol", get_token_position)] + Protocol((usize,usize)), - #[token("enum")] - Enum, + // Tokens can be literal strings, of any length. + #[token("type", get_token_position)] + Type((usize, usize)), - #[token("struct")] - Struct, + #[token("fn", priority = 5, callback = get_token_position)] + Fn((usize, usize)), - #[token("{")] - LBrace, + #[token("enum", get_token_position)] + Enum((usize, usize)), - #[token("}")] - RBrace, + #[token("struct", get_token_position)] + Struct((usize, usize)), - #[token("(")] - LParen, + #[token("{", get_token_position)] + LBrace((usize, usize)), - #[token(")")] - RParen, + #[token("}", get_token_position)] + RBrace((usize, usize)), - #[token(":")] - Colon, + #[token("(", get_token_position)] + LParen((usize, usize)), - #[token(";")] - SemiColon, + #[token(")", get_token_position)] + RParen((usize, usize)), - #[token(",")] - Comma, + #[token(":", get_token_position)] + Colon((usize, usize)), - #[token("=")] - Equal, + #[token(";", get_token_position)] + SemiColon((usize, usize)), - #[token("->")] - RArrow, + #[token(",", get_token_position)] + Comma((usize, usize)), - #[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{lex.slice().to_string()})] - Identifier(String), + #[token("=", get_token_position)] + Equal((usize, usize)), - #[regex("[1234567890]+", |lex|{lex.slice().parse::().unwrap()})] - Number(u64), + #[token("->", get_token_position)] + RArrow((usize, usize)), + + #[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{let text = lex.slice().to_string(); let (line, col) = get_token_position(lex); (line, col, text)})] + Identifier((usize, usize, String)), + + #[regex("[1234567890]+", |lex|{let num = lex.slice().parse::().unwrap(); let (line, col) = get_token_position(lex); (line, col, num) })] + Number((usize, usize, u64)), #[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)] Decorator, @@ -76,7 +81,6 @@ pub fn build_idl(name: String) { for x in lex { match x { Ok(token) => { - println!("{:?}", token); tokens.push(token); } Err(err) => println!("{:?}", err), @@ -93,3 +97,17 @@ fn open_protocol(name: String) -> String { file.read_to_string(&mut contents).unwrap(); contents } + +fn get_token_position(lex : &mut Lexer) -> (usize, usize) { + let line = lex.extras.0; + let column = lex.span().start - lex.extras.1; + + (line, column + 1) +} + +/// Update the line count and the char index. +fn newline_callback(lex: &mut Lexer) -> Skip { + lex.extras.0 += 1; + lex.extras.1 = lex.span().end; + Skip +} diff --git a/dev/src/idl/parser.rs b/dev/src/idl/parser.rs index 78ae4a6a..fc1fc750 100644 --- a/dev/src/idl/parser.rs +++ b/dev/src/idl/parser.rs @@ -11,6 +11,7 @@ enum Declaration{ TypeDeclaration(TypeDeclaration), ProtocolDeclaration(ProtocolDeclaration), } + #[derive(Debug, Clone, PartialEq)] struct StructDeclaration { name : String, @@ -51,17 +52,19 @@ struct FuncDeclaration { return_type : String, } - -/// Consume's a token that's expected. If the token that's consumed is not -/// the given expected token then panic -fn consume(tokens : &mut Peekable>, token : Token) { - let a = tokens.next(); - match a { - None => panic!("Expected {:?}, Got End Of File", token), - Some(a) => if *a != token { - panic!("Expected {:?}, Got {:?}", token, *a); +macro_rules! consume { + ($tokens:ident, $token_type:path) => { + let a = $tokens.next(); + match a { + None => panic!("Expected {}, Got End Of Tokens", stringify!($token_type)), + Some(t) => { + if matches!(t, $token_type(_)){} + else { + panic!("Expected {:?}, Got {:?}", stringify!($token_type), t); + } + } } - } + }; } pub fn parse(tokens : Vec) -> AST{ @@ -84,29 +87,29 @@ fn declaration(tokens : &mut Peekable>) -> Option { match tokens.peek(){ None => None, Some(tok) => match tok { - Token::Enum => Some(enum_decl(tokens)), - Token::Struct => Some(struct_decl(tokens)), - Token::Type => Some(type_declaration(tokens)), - Token::Protocol => Some(protocol_declaration(tokens)), + Token::Enum(_) => Some(enum_decl(tokens)), +// Token::Struct(_) => Some(struct_decl(tokens)), +// Token::Type(_) => Some(type_declaration(tokens)), +// Token::Protocol(_) => Some(protocol_declaration(tokens)), _ => None, } } } fn enum_decl(tokens : &mut Peekable>) -> Declaration { - consume(tokens, Token::Enum); + consume!(tokens, Token::Enum); let name = identifier(tokens).expect("Expected Identifier after `enum`"); - consume(tokens, Token::LBrace); + consume!(tokens, Token::LBrace); let mut members = Vec::new(); match tokens.peek().expect("Unexpected EOF after LBrace") { - Token::RBrace => {}, // skip checking for enum_members if empty + Token::RBrace(_) => {}, // skip checking for enum_members if empty _ => { enum_members(tokens, &mut members); }, } - consume(tokens, Token::RBrace); + consume!(tokens, Token::RBrace); Declaration::EnumDeclaration(EnumDeclaration{name, members}) } @@ -115,8 +118,8 @@ fn enum_members(tokens : &mut Peekable>, members: &mut Vec { - consume(tokens, Token::Comma); + Token::Comma(_) => { + consume!(tokens, Token::Comma); if let Some(member) = enum_member(tokens) { members.push(member); } else { @@ -131,68 +134,68 @@ fn enum_members(tokens : &mut Peekable>, members: &mut Vec>) -> Option { let name = identifier(tokens); if let Some(name) = name { - consume(tokens, Token::Equal); + consume!(tokens, Token::Equal); let number = parse_number(tokens).expect("Expected Number after `=`"); Some(EnumMember{name, number}) } else { None } } - -fn struct_decl(tokens : &mut Peekable>) -> Declaration { - consume(tokens, Token::Struct); - let name = identifier(tokens).expect("Expected Identifier after `struct`"); - consume(tokens, Token::LBrace); - let mut members = Vec::new(); - - match tokens.peek().expect("Unexpected EOF after LBrace") { - Token::RBrace => {}, // skip checking for struct_members if empty - _ => { - struct_members(tokens, &mut members); - }, - } - - consume(tokens, Token::RBrace); - - Declaration::StructDeclaration(StructDeclaration{name, members}) -} - -fn struct_members(tokens : &mut Peekable>, members: &mut Vec) { - members.push(struct_member(tokens).unwrap()); - loop { - match tokens.peek().expect("Unexpected EOF inside struct declaration") { - Token::Comma => { - consume(tokens, Token::Comma); - if let Some(member) = struct_member(tokens) { - members.push(member); - } else { - break; - } - }, - _ => {}, - } - } -} -fn struct_member(tokens : &mut Peekable>) -> Option { - let name = identifier(tokens); - if let Some(name) = name { - consume(tokens, Token::Colon); - let type_name = identifier(tokens).expect("Expected Type after Colon"); - Some(StructMember{name, type_name}) - } else { - None - } -} - -fn type_declaration(tokens : &mut Peekable>) -> Declaration { - consume(tokens, Token::Type); - let name = identifier(tokens).expect("Expected Identifier after `type`"); - let type_name = identifier(tokens).expect("Expected type after Identifier"); - Declaration::TypeDeclaration(TypeDeclaration{name, type_name}) -} +// +// fn struct_decl(tokens : &mut Peekable>) -> Declaration { +// consume(tokens, Token::Struct); +// let name = identifier(tokens).expect("Expected Identifier after `struct`"); +// consume(tokens, Token::LBrace); +// let mut members = Vec::new(); +// +// match tokens.peek().expect("Unexpected EOF after LBrace") { +// Token::RBrace => {}, // skip checking for struct_members if empty +// _ => { +// struct_members(tokens, &mut members); +// }, +// } +// +// consume(tokens, Token::RBrace); +// +// Declaration::StructDeclaration(StructDeclaration{name, members}) +// } +// +// fn struct_members(tokens : &mut Peekable>, members: &mut Vec) { +// members.push(struct_member(tokens).unwrap()); +// loop { +// match tokens.peek().expect("Unexpected EOF inside struct declaration") { +// Token::Comma => { +// consume(tokens, Token::Comma); +// if let Some(member) = struct_member(tokens) { +// members.push(member); +// } else { +// break; +// } +// }, +// _ => {}, +// } +// } +// } +// fn struct_member(tokens : &mut Peekable>) -> Option { +// let name = identifier(tokens); +// if let Some(name) = name { +// consume(tokens, Token::Colon); +// let type_name = identifier(tokens).expect("Expected Type after Colon"); +// Some(StructMember{name, type_name}) +// } else { +// None +// } +// } +// +// fn type_declaration(tokens : &mut Peekable>) -> Declaration { +// consume(tokens, Token::Type); +// let name = identifier(tokens).expect("Expected Identifier after `type`"); +// let type_name = identifier(tokens).expect("Expected type after Identifier"); +// Declaration::TypeDeclaration(TypeDeclaration{name, type_name}) +// } fn identifier(tokens : &mut Peekable>) -> Option { let result = tokens.peek().map_or(None, |x| match x { - Token::Identifier(s) => { + Token::Identifier((_, _, s)) => { Some(s.to_string()) }, _ => None @@ -206,7 +209,7 @@ fn identifier(tokens : &mut Peekable>) -> Option { } fn parse_number(tokens : &mut Peekable>) -> Option { let result = tokens.peek().map_or(None, |x| match x { - Token::Number(s) => Some(*s), + Token::Number((_, _, s)) => Some(*s), _ => None }); if let Some(_) = result { @@ -214,61 +217,61 @@ fn parse_number(tokens : &mut Peekable>) -> Option { } result } - -fn protocol_declaration(tokens : &mut Peekable>) -> Declaration { - consume(tokens, Token::Protocol); - let name = identifier(tokens).expect("Expected Identifier after `protocol`"); - consume(tokens, Token::LBrace); - let mut interface = Vec::new(); - match tokens.peek().expect("Unexpected EOF after LBrace") { - Token::RBrace => {}, - _ => { - functions(tokens, &mut interface); - }, - }; - Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface}) -} - -fn functions(tokens : &mut Peekable>, interface : &mut Vec) { - loop { - match function(tokens) { - Some(x) => interface.push(x), - None => break, - } - } -} - -fn function(tokens : &mut Peekable>) -> Option{ - if let Some(Token::Fn) = tokens.peek() { - consume(tokens, Token::Fn); - let name = identifier(tokens).expect("Expected Identifier after `fn`"); - consume(tokens, Token::LParen); - let arg_list = arg_list(tokens); - consume(tokens, Token::RParen); - consume(tokens, Token::RArrow); - let return_type = identifier(tokens).expect("Expected return type after `->"); - Some(FuncDeclaration{name, arg_list, return_type}) - } else { - None - } -} - -fn arg_list(tokens : &mut Peekable>) -> Vec { - let mut result = Vec::new(); - loop { - match identifier(tokens) { - None => break, - Some(i) =>{ - result.push(i); - if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{ - consume(tokens, Token::Comma); - match tokens.peek().expect("Unexpected EOF in argument list") { - Token::Identifier(_) => {}, - _ => panic!("Unexpected symbol after Comma in argument list"), - } - } - } - } - } - result -} +// +// fn protocol_declaration(tokens : &mut Peekable>) -> Declaration { +// consume(tokens, Token::Protocol); +// let name = identifier(tokens).expect("Expected Identifier after `protocol`"); +// consume(tokens, Token::LBrace); +// let mut interface = Vec::new(); +// match tokens.peek().expect("Unexpected EOF after LBrace") { +// Token::RBrace => {}, +// _ => { +// functions(tokens, &mut interface); +// }, +// }; +// Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface}) +// } +// +// fn functions(tokens : &mut Peekable>, interface : &mut Vec) { +// loop { +// match function(tokens) { +// Some(x) => interface.push(x), +// None => break, +// } +// } +// } +// +// fn function(tokens : &mut Peekable>) -> Option{ +// if let Some(Token::Fn) = tokens.peek() { +// consume(tokens, Token::Fn); +// let name = identifier(tokens).expect("Expected Identifier after `fn`"); +// consume(tokens, Token::LParen); +// let arg_list = arg_list(tokens); +// consume(tokens, Token::RParen); +// consume(tokens, Token::RArrow((1,1))); +// let return_type = identifier(tokens).expect("Expected return type after `->"); +// Some(FuncDeclaration{name, arg_list, return_type}) +// } else { +// None +// } +// } +// +// fn arg_list(tokens : &mut Peekable>) -> Vec { +// let mut result = Vec::new(); +// loop { +// match identifier(tokens) { +// None => break, +// Some(i) =>{ +// result.push(i); +// if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{ +// consume(tokens, Token::Comma); +// match tokens.peek().expect("Unexpected EOF in argument list") { +// Token::Identifier(_) => {}, +// _ => panic!("Unexpected symbol after Comma in argument list"), +// } +// } +// } +// } +// } +// result +// }