Compare commits

...

11 commits

Author SHA1 Message Date
Talha Qamar 923e8b7218 added some basic validation 2024-09-19 14:00:25 +05:00
Talha Qamar 0e5d5f7de7 Brought parser back completely from the dead 2024-09-14 06:09:41 +05:00
Talha Qamar d0d6f0475e Added types 2024-09-14 05:57:18 +05:00
Talha Qamar 0775d0c70a Added locations to Tokens in the parser 2024-09-13 14:40:09 +05:00
Talha Qamar 86e1020bc7 fixed warnings 2024-09-11 11:11:46 +05:00
Talha Qamar 39f7d5aba4 Fixed lexer not taking identifiers with number in them and finished parsing support for protocols 2024-09-11 11:05:05 +05:00
Talha Qamar 45950fdb34 Added ProtocolDeclaration datatype 2024-09-11 09:00:08 +05:00
Talha Qamar f9451e3d7d preparing materials for adding protocol to parser and added type aliases to parser 2024-09-11 05:18:11 +05:00
Talha Qamar 6be6635e4e big rewrite 2024-09-11 04:50:51 +05:00
Talha Qamar b5cdc9c4fd changed file extension to .aldi and made it so that decorators don't work 2024-09-11 01:29:18 +05:00
Talha Qamar 94b7aabdec Added struct declarations to parsers 2024-09-11 01:17:36 +05:00
8 changed files with 657 additions and 64 deletions

31
dev/aldi.bnf Normal file
View file

@ -0,0 +1,31 @@
declarations ::= <declaration> <declarations>
declaration ::= <enum_decl> | <struct_decl> | <type_decl> | <protocol_decl>
type_decl ::= "type" <ident> <ident> ";"
enum_decl ::= "enum" <ident> "{" "}"
| "enum" <ident> "{" <enum_members> "}"
enum_members ::= <enum_member> ["," <enum_member>]+ [","]
enum_member ::= <ident> "=" <number>
struct_decl ::= "struct" <ident> "{" "}"
| "struct" <ident> "{" <struct_members> "}"
struct_members ::= <struct_member>
| <struct_member> ","
| <struct_member> "," <struct_members>
| <struct_member> "," <struct_members>
protocol_decl ::= "protocol" <ident> "{" "}"
| "protocol" <ident> "{" <protocol_member>+ "}"
protocol_member ::= "fn" <ident>"("[<arg_list>]")" "->" <ident> ";"
arg_list ::= <ident> ["," <ident>]+

View file

@ -1,56 +1,78 @@
pub mod protocol; mod parser;
mod types;
mod protocol;
use crate::idl::{parser::parse, types::get_protocols};
use std::io::Read; use std::io::Read;
use { use logos::{Lexer, Logos, Skip};
logos::{Lexer, Logos},
protocol::Protocol,
};
#[derive(Logos, Debug, PartialEq, Clone)] #[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens #[logos(extras = (usize, usize))]
enum Token { enum Token {
#[regex("//[^\n]*", logos::skip)]
Comment,
#[regex(r"[ \t\f]+", logos::skip)]
Ignored,
#[regex(r"\n", newline_callback)]
Newline,
// Tokens can be literal strings, of any length. // Tokens can be literal strings, of any length.
#[token("protocol")] #[token("protocol", get_token_position)]
Protocol, Protocol((usize,usize)),
#[token("{")] // Tokens can be literal strings, of any length.
LBrace, #[token("type", get_token_position)]
Type((usize, usize)),
#[token("}")] #[token("fn", priority = 5, callback = get_token_position)]
RBrace, Fn((usize, usize)),
#[token("(")] #[token("enum", get_token_position)]
LParen, Enum((usize, usize)),
#[token(")")] #[token("struct", get_token_position)]
RParen, Struct((usize, usize)),
#[token(":")] #[token("{", get_token_position)]
Colon, LBrace((usize, usize)),
#[token(";")]
SemiColon,
#[token(",")] #[token("}", get_token_position)]
Comma, RBrace((usize, usize)),
#[token("=")] #[token("(", get_token_position)]
Equal, LParen((usize, usize)),
#[token("->")] #[token(")", get_token_position)]
RArrow, RParen((usize, usize)),
#[regex("[a-zA-Z_]+", |lex|{lex.slice().to_string()})] #[token(":", get_token_position)]
Text(String), Colon((usize, usize)),
#[regex("[1234567890]+", |lex|{lex.slice().parse::<u64>().unwrap()})] #[token(";", get_token_position)]
Number(u64), SemiColon((usize, usize)),
#[regex(r"@[a-zA-Z_]+", |lex|{lex.slice().to_string()})] #[token(",", get_token_position)] Comma((usize, usize)),
Decorator(String),
#[regex(r#"@[a-zA-Z_]+\([a-zA-Z,0-9=]+\)"#, |lex|{lex.slice().to_string()})] #[token("=", get_token_position)]
DecoratorOption(String), Equal((usize, usize)),
#[token("->", get_token_position)]
RArrow((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{let text = lex.slice().to_string(); let (line, col) = get_token_position(lex); (line, col, text)})]
Identifier((usize, usize, String)),
#[regex("[1234567890]+", |lex|{let num = lex.slice().parse::<u64>().unwrap(); let (line, col) = get_token_position(lex); (line, col, num) })]
Number((usize, usize, u64)),
#[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)]
Decorator,
#[regex(r#"@[a-zA-Z_]+\([a-zA-Z,0-9=]+\)"#, /*|lex|{lex.slice().to_string()}*/ logos::skip)]
DecoratorOption,
} }
pub fn build_idl(name: String) { pub fn build_idl(name: String) {
@ -60,25 +82,36 @@ pub fn build_idl(name: String) {
for x in lex { for x in lex {
match x { match x {
Ok(token) => { Ok(token) => {
println!("{:?}", token);
tokens.push(token); tokens.push(token);
} }
Err(err) => println!("{:?}", err), Err(err) => println!("{:?}", err),
} }
} }
build(tokens);
}
fn build(a: Vec<Token>) { let protocols = get_protocols(parse(tokens));
for toke in a { let data : Vec<u8> = vec![1, 5, 12, 12, 12, 12, 3, 28, 8, 28];
println!("{:?}", toke); println!("{:#?}", &protocols);
} protocols.validate("Foo", "bar" , data).unwrap();
} }
fn open_protocol(name: String) -> String { fn open_protocol(name: String) -> String {
let path = format!("sysdata/idl/{}/src/protocol.aidl", name); let path = format!("sysdata/idl/{}/src/protocol.aldi", name);
let mut file = std::fs::File::open(path).unwrap(); let mut file = std::fs::File::open(path).unwrap();
let mut contents = String::new(); let mut contents = String::new();
file.read_to_string(&mut contents).unwrap(); file.read_to_string(&mut contents).unwrap();
contents contents
} }
fn get_token_position(lex : &mut Lexer<Token>) -> (usize, usize) {
let line = lex.extras.0;
let column = lex.span().start - lex.extras.1;
(line, column + 1)
}
/// Update the line count and the char index.
fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
lex.extras.0 += 1;
lex.extras.1 = lex.span().end;
Skip
}

277
dev/src/idl/parser.rs Normal file
View file

@ -0,0 +1,277 @@
use std::{iter::Peekable, slice::Iter};
use super::Token;
#[derive(Debug, Clone, PartialEq)]
pub struct AST(pub Vec<Declaration>);
#[derive(Debug, Clone, PartialEq)]
pub enum Declaration{
EnumDeclaration(EnumDeclaration),
StructDeclaration(StructDeclaration),
TypeDeclaration(TypeDeclaration),
ProtocolDeclaration(ProtocolDeclaration),
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructDeclaration {
pub name : String,
pub members : Vec<StructMember>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct TypeDeclaration {
pub name : String,
pub type_name : String,
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructMember {
pub name : String,
pub type_name : String,
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumDeclaration {
pub name : String,
pub members : Vec<EnumMember>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumMember {
pub name : String,
pub number: u64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ProtocolDeclaration{
pub name : String,
pub interface : Vec<FuncDeclaration>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct FuncDeclaration {
pub name : String,
pub arg_list : Vec<String>,
pub return_type : String,
}
macro_rules! consume {
($tokens:ident, $token_type:path) => {
let a = $tokens.next();
match a {
None => panic!("Expected {}, Got End Of Tokens", stringify!($token_type)),
Some(t) => {
if matches!(t, $token_type(_)){}
else {
panic!("Expected {:?}, Got {:?}", stringify!($token_type), t);
}
}
}
};
}
pub fn parse(tokens : Vec<Token>) -> AST{
let mut tokens_iter = tokens.iter().peekable();
AST(declarations(&mut tokens_iter))
}
fn declarations(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<Declaration> {
let mut decls : Vec<Declaration> = Vec::new();
loop {
match declaration(tokens) {
Some(x) => decls.push(x),
None => break,
}
}
decls
}
fn declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<Declaration> {
match tokens.peek(){
None => None,
Some(tok) => match tok {
Token::Enum(_) => Some(enum_decl(tokens)),
Token::Struct(_) => Some(struct_decl(tokens)),
Token::Type(_) => Some(type_declaration(tokens)),
Token::Protocol(_) => Some(protocol_declaration(tokens)),
_ => None,
}
}
}
fn enum_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume!(tokens, Token::Enum);
let name = identifier(tokens).expect("Expected Identifier after `enum`");
consume!(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace(_) => {}, // skip checking for enum_members if empty
_ => {
enum_members(tokens, &mut members);
},
}
consume!(tokens, Token::RBrace);
Declaration::EnumDeclaration(EnumDeclaration{name, members})
}
fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumMember>) {
members.push(enum_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside enum declaration") {
Token::Comma(_) => {
consume!(tokens, Token::Comma);
if let Some(member) = enum_member(tokens) {
members.push(member);
} else {
break;
}
},
_ => {},
}
}
}
fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume!(tokens, Token::Equal);
let number = parse_number(tokens).expect("Expected Number after `=`");
Some(EnumMember{name, number})
} else {
None
}
}
fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume!(tokens, Token::Struct);
let name = identifier(tokens).expect("Expected Identifier after `struct`");
consume!(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace(_) => {}, // skip checking for struct_members if empty
_ => {
struct_members(tokens, &mut members);
},
}
consume!(tokens, Token::RBrace);
Declaration::StructDeclaration(StructDeclaration{name, members})
}
fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<StructMember>) {
members.push(struct_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside struct declaration") {
Token::Comma(_) => {
consume!(tokens, Token::Comma);
if let Some(member) = struct_member(tokens) {
members.push(member);
} else {
break;
}
},
_ => {},
}
}
}
fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume!(tokens, Token::Colon);
let type_name = identifier(tokens).expect("Expected Type after Colon");
Some(StructMember{name, type_name})
} else {
None
}
}
fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume!(tokens, Token::Type);
let name = identifier(tokens).expect("Expected Identifier after `type`");
let type_name = identifier(tokens).expect("Expected type after Identifier");
Declaration::TypeDeclaration(TypeDeclaration{name, type_name})
}
fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Identifier((_, _, s)) => {
Some(s.to_string())
},
_ => None
});
if let Some(_) = result {
tokens.next();
}
result
}
fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Number((_, _, s)) => Some(*s),
_ => None
});
if let Some(_) = result {
tokens.next();
}
result
}
fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume!(tokens, Token::Protocol);
let name = identifier(tokens).expect("Expected Identifier after `protocol`");
consume!(tokens, Token::LBrace);
let mut interface = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace(_) => {},
_ => {
functions(tokens, &mut interface);
},
};
Declaration::ProtocolDeclaration(ProtocolDeclaration{name, interface})
}
fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncDeclaration>) {
loop {
match function(tokens) {
Some(x) => interface.push(x),
None => break,
}
}
}
fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{
if let Some(Token::Fn(_)) = tokens.peek() {
consume!(tokens, Token::Fn);
let name = identifier(tokens).expect("Expected Identifier after `fn`");
consume!(tokens, Token::LParen);
let arg_list = arg_list(tokens);
consume!(tokens, Token::RParen);
consume!(tokens, Token::RArrow);
let return_type = identifier(tokens).expect("Expected return type after `->");
Some(FuncDeclaration{name, arg_list, return_type})
} else {
None
}
}
fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> {
let mut result = Vec::new();
loop {
match identifier(tokens) {
None => break,
Some(i) =>{
result.push(i);
if let Token::Comma(_) = **tokens.peek().expect("Unexpected EOF in argument list"){
consume!(tokens, Token::Comma);
match tokens.peek().expect("Unexpected EOF in argument list") {
Token::Identifier(_) => {},
_ => panic!("Unexpected symbol after Comma in argument list"),
}
}
}
}
}
result
}

View file

@ -1,17 +1,167 @@
pub enum ProtocolTypes { use std::collections::HashMap;
Byte, use crate::idl::types::Type;
#[derive(Debug, Clone, PartialEq)]
pub struct Function{
pub arguments : Vec<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Protocol{
interface : HashMap<String, Function>,
} }
pub struct Protocol {} #[derive(Debug, Clone, PartialEq)]
impl Protocol { pub struct Protocols {
pub fn is_empty(&self) -> bool { protocols : HashMap<String, Protocol>,
true symbol_table : HashMap<String, Type>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ValidationError{
IncorrectVersion,
InvalidHeader,
FunctionDoesNotExist,
ProtocolDoesNotExist,
InvalidSize,
InvalidArgument,
NonExistentType(String),
}
impl Protocols {
pub fn new(symbol_table: HashMap<String, Type>) -> Self {
let protocols = HashMap::new();
Self { protocols, symbol_table }
}
pub fn add_protocol(&mut self, name : String, interface : HashMap<String, Function>) {
self.protocols.insert(name, Protocol::new(interface));
} }
pub fn validate_data(&self, data: Vec<u8>) -> bool { pub fn validate(&self, protocol_name : &str, function_name : &str, data : Vec<u8>) -> Result<(), ValidationError>{
if !data.is_empty() && self.is_empty() { match self.protocols.get(protocol_name) {
return false; Some(s) => s.validate(function_name, data, &self.symbol_table),
None => Err(ValidationError::ProtocolDoesNotExist),
}
}
}
impl Protocol {
pub fn new(interface: HashMap<String, Function>) -> Self {
Self {interface}
}
fn validate(&self, function_name : &str, data : Vec<u8>, symbols : &HashMap<String, Type>) -> Result<(), ValidationError> {
match self.interface.get(function_name){
Some(s) => s.validate(data, symbols),
None => Err(ValidationError::FunctionDoesNotExist),
} }
true
} }
} }
impl Function {
fn validate(&self, data : Vec<u8>, symbols : &HashMap<String, Type>) -> Result<(), ValidationError> {
let mut types = Vec::new();
for arg in self.arguments.iter() {
let type_value = symbols.get(arg);
if let Some(type_value) = type_value {
types.push(type_value);
}
else{
return Err(ValidationError::NonExistentType(arg.to_string()));
}
}
let mut data = data.iter();
if let Some(ver) = data.next() {
if *ver == 1 {
// We got the correct version number
// Now to parse individual argumebts
let mut types = types.iter().peekable();
loop {
let type_byte = data.next();
if let Some(type_byte) = type_byte {
let type_value = types.next();
if type_value.is_none() {
return Err(ValidationError::InvalidSize);
}
let type_value = type_value.unwrap();
let data_type = match type_byte {
0 => Some(Type::U64),
1 => Some(Type::U32),
2 => Some(Type::U16),
3 => Some(Type::U8),
4 => Some(Type::I64),
5 => Some(Type::I32),
6 => Some(Type::I16),
7 => Some(Type::I8),
8 => Some(Type::Bool),
9 => Some(Type::F32),
10 => Some(Type::F64),
11 => Some(Type::Str),
_ => None,
};
if data_type.is_none() || data_type.as_ref().unwrap() != *type_value {
println!("{:#?}", *type_value);
return Err(ValidationError::InvalidArgument);
}
match data_type.unwrap(){
Type::U64 | Type::I64 | Type::F64 => {
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
},
Type::U32 | Type::I32 | Type::F32 => {
data.next();
data.next();
data.next();
data.next();
},
Type::U16 | Type::I16 => {
data.next();
data.next();
},
Type::U8 | Type::I8 | Type::Bool => {
data.next();
},
Type::Str => todo!(),
_ => panic!("Should not be possible"),
}
} else if types.peek().is_none() {
return Ok(());
}
break;
}
}
else {
return Err(ValidationError::IncorrectVersion);
}
} else {
return Err(ValidationError::InvalidHeader);
}
Ok(())
}
}

98
dev/src/idl/types.rs Normal file
View file

@ -0,0 +1,98 @@
use std::collections::HashMap;
use crate::idl::parser::AST;
use super::protocol::{Function, Protocols};
#[derive(Debug, Clone, PartialEq)]
pub enum Type {
U64,
U32,
U16,
U8,
I64,
I32,
I16,
I8,
Bool,
F32,
F64,
Str,
Struct(StructType),
Enum(EnumType),
Alias(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructType {
members : HashMap<String, String>
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumType {
members : HashMap<String, u8>
}
fn add_builtin_types(symbol_table : &mut HashMap<String, Type>) {
symbol_table.insert("u8".to_string(), Type::U8);
symbol_table.insert("u16".to_string(), Type::U16);
symbol_table.insert("u32".to_string(), Type::U32);
symbol_table.insert("u64".to_string(), Type::U64);
symbol_table.insert("i8".to_string(), Type::I8);
symbol_table.insert("i16".to_string(), Type::I16);
symbol_table.insert("i32".to_string(), Type::I32);
symbol_table.insert("i64".to_string(), Type::I64);
symbol_table.insert("bool".to_string(), Type::Bool);
symbol_table.insert("f32".to_string(), Type::F32);
symbol_table.insert("f64".to_string(), Type::F64);
}
pub fn get_protocols(ast : AST) -> Protocols{
let mut symbol_table : HashMap<String, Type> = HashMap::new();
let declarations = ast.0;
add_builtin_types(&mut symbol_table);
// First Pass
// We just populate the symbol table here
for decl in declarations.iter() {
match decl{
super::parser::Declaration::EnumDeclaration(e) => {
let mut members = HashMap::new();
for m in e.members.iter(){
members.insert(m.name.to_string(), m.number as u8);
}
symbol_table.insert(e.name.to_string(), Type::Enum(EnumType{members}));
},
super::parser::Declaration::StructDeclaration(s) => {
let mut members = HashMap::new();
for m in s.members.iter() {
members.insert(m.name.to_string(), m.type_name.to_string());
}
symbol_table.insert(s.name.to_string(), Type::Struct(StructType{members}));
},
super::parser::Declaration::TypeDeclaration(t) => {
symbol_table.insert(t.name.to_string(), Type::Alias(t.type_name.to_string()));
},
super::parser::Declaration::ProtocolDeclaration(_) => {},
}
}
let mut protocols = Protocols::new(symbol_table);
for decl in declarations.iter(){
match decl {
super::parser::Declaration::ProtocolDeclaration(p) => {
let mut funcs : HashMap<String, Function> = HashMap::new();
for i in p.interface.iter(){
funcs.insert(i.name.to_string(), Function{arguments : i.arg_list.clone()});
}
protocols.add_protocol(p.name.to_string(), funcs);
},
_ => {}
}
}
protocols
}

View file

@ -57,7 +57,7 @@ pub fn new(development_type: DevelopmentType, name: String) {
let (folder_hierarchy, entry_name) = match development_type { let (folder_hierarchy, entry_name) = match development_type {
DevelopmentType::Program => ("programs", "main.hb"), DevelopmentType::Program => ("programs", "main.hb"),
DevelopmentType::Library => ("libraries", "lib.hb"), DevelopmentType::Library => ("libraries", "lib.hb"),
DevelopmentType::IDL => ("idl", "protocol.aidl"), DevelopmentType::IDL => ("idl", "protocol.aldi"),
}; };
let project_folder_path_string = format!("sysdata/{folder_hierarchy}/{name}"); let project_folder_path_string = format!("sysdata/{folder_hierarchy}/{name}");
@ -117,8 +117,8 @@ fn build(name: String) {
} }
} }
pub fn build_program(name: String) {} pub fn build_program(_name: String) {}
pub fn build_library(name: String) {} pub fn build_library(_name: String) {}
fn help() { fn help() {
println!( println!(

View file

@ -1,24 +1,25 @@
@auto_increment // A comment
//@auto_increment
enum LogLevel { enum LogLevel {
Error = 0, Error = 0,
Warn, Warn = 1,
Info, Info = 2,
Debug, Debug = 3,
Trace, Trace = 4,
} }
@auto_increment //@auto_increment
enum LogResult { enum LogResult {
Err = 0, Err = 0,
Ok, Ok = 1,
} }
struct Log { struct Log {
log_level: LogLevel, log_level: LogLevel,
} }
@visibility(public) //@visibility(public)
protocol Log { protocol Log {
fn log(Log) -> LogResult; fn log(Log) -> LogResult;
fn flush() -> LogResult; fn flush() -> LogResult;
} }

View file

@ -0,0 +1,3 @@
protocol Foo{
fn bar(i32, u8, bool) -> void;
}