1
0
Fork 0
forked from AbleOS/ableos

Compare commits

...

4 commits

Author SHA1 Message Date
Talha Qamar 923e8b7218 added some basic validation 2024-09-19 14:00:25 +05:00
Talha Qamar 0e5d5f7de7 Brought parser back completely from the dead 2024-09-14 06:09:41 +05:00
Talha Qamar d0d6f0475e Added types 2024-09-14 05:57:18 +05:00
Talha Qamar 0775d0c70a Added locations to Tokens in the parser 2024-09-13 14:40:09 +05:00
5 changed files with 395 additions and 119 deletions

View file

@ -1,66 +1,72 @@
pub mod protocol;
mod parser;
mod types;
mod protocol;
use crate::idl::{parser::parse, types::get_protocols};
use std::io::Read;
use self::parser::parse;
use logos::Logos;
use logos::{Lexer, Logos, Skip};
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens
#[logos(extras = (usize, usize))]
enum Token {
#[regex("//[^\n]*\n", logos::skip)]
#[regex("//[^\n]*", logos::skip)]
Comment,
#[regex(r"[ \t\f]+", logos::skip)]
Ignored,
// Tokens can be literal strings, of any length.
#[token("protocol")]
Protocol,
#[regex(r"\n", newline_callback)]
Newline,
// Tokens can be literal strings, of any length.
#[token("type")]
Type,
#[token("fn", priority = 5)]
Fn,
#[token("protocol", get_token_position)]
Protocol((usize,usize)),
#[token("enum")]
Enum,
// Tokens can be literal strings, of any length.
#[token("type", get_token_position)]
Type((usize, usize)),
#[token("struct")]
Struct,
#[token("fn", priority = 5, callback = get_token_position)]
Fn((usize, usize)),
#[token("{")]
LBrace,
#[token("enum", get_token_position)]
Enum((usize, usize)),
#[token("}")]
RBrace,
#[token("struct", get_token_position)]
Struct((usize, usize)),
#[token("(")]
LParen,
#[token("{", get_token_position)]
LBrace((usize, usize)),
#[token(")")]
RParen,
#[token("}", get_token_position)]
RBrace((usize, usize)),
#[token(":")]
Colon,
#[token("(", get_token_position)]
LParen((usize, usize)),
#[token(";")]
SemiColon,
#[token(")", get_token_position)]
RParen((usize, usize)),
#[token(",")]
Comma,
#[token(":", get_token_position)]
Colon((usize, usize)),
#[token("=")]
Equal,
#[token(";", get_token_position)]
SemiColon((usize, usize)),
#[token("->")]
RArrow,
#[token(",", get_token_position)] Comma((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{lex.slice().to_string()})]
Identifier(String),
#[token("=", get_token_position)]
Equal((usize, usize)),
#[regex("[1234567890]+", |lex|{lex.slice().parse::<u64>().unwrap()})]
Number(u64),
#[token("->", get_token_position)]
RArrow((usize, usize)),
#[regex("[a-zA-Z_][a-zA-Z_1234567890]+", |lex|{let text = lex.slice().to_string(); let (line, col) = get_token_position(lex); (line, col, text)})]
Identifier((usize, usize, String)),
#[regex("[1234567890]+", |lex|{let num = lex.slice().parse::<u64>().unwrap(); let (line, col) = get_token_position(lex); (line, col, num) })]
Number((usize, usize, u64)),
#[regex(r"@[a-zA-Z_]+", /*|lex|{lex.slice().to_string()}*/ logos::skip)]
Decorator,
@ -76,14 +82,16 @@ pub fn build_idl(name: String) {
for x in lex {
match x {
Ok(token) => {
println!("{:?}", token);
tokens.push(token);
}
Err(err) => println!("{:?}", err),
}
}
println!("{:#?}", parse(tokens));
let protocols = get_protocols(parse(tokens));
let data : Vec<u8> = vec![1, 5, 12, 12, 12, 12, 3, 28, 8, 28];
println!("{:#?}", &protocols);
protocols.validate("Foo", "bar" , data).unwrap();
}
fn open_protocol(name: String) -> String {
@ -93,3 +101,17 @@ fn open_protocol(name: String) -> String {
file.read_to_string(&mut contents).unwrap();
contents
}
fn get_token_position(lex : &mut Lexer<Token>) -> (usize, usize) {
let line = lex.extras.0;
let column = lex.span().start - lex.extras.1;
(line, column + 1)
}
/// Update the line count and the char index.
fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
lex.extras.0 += 1;
lex.extras.1 = lex.span().end;
Skip
}

View file

@ -2,67 +2,70 @@ use std::{iter::Peekable, slice::Iter};
use super::Token;
#[derive(Debug, Clone, PartialEq)]
pub struct AST(Vec<Declaration>);
pub struct AST(pub Vec<Declaration>);
#[derive(Debug, Clone, PartialEq)]
enum Declaration{
pub enum Declaration{
EnumDeclaration(EnumDeclaration),
StructDeclaration(StructDeclaration),
TypeDeclaration(TypeDeclaration),
ProtocolDeclaration(ProtocolDeclaration),
}
#[derive(Debug, Clone, PartialEq)]
struct StructDeclaration {
name : String,
members : Vec<StructMember>,
pub struct StructDeclaration {
pub name : String,
pub members : Vec<StructMember>,
}
#[derive(Debug, Clone, PartialEq)]
struct TypeDeclaration {
name : String,
type_name : String,
pub struct TypeDeclaration {
pub name : String,
pub type_name : String,
}
#[derive(Debug, Clone, PartialEq)]
struct StructMember {
name : String,
type_name : String,
pub struct StructMember {
pub name : String,
pub type_name : String,
}
#[derive(Debug, Clone, PartialEq)]
struct EnumDeclaration {
name : String,
members : Vec<EnumMember>,
pub struct EnumDeclaration {
pub name : String,
pub members : Vec<EnumMember>,
}
#[derive(Debug, Clone, PartialEq)]
struct EnumMember {
name : String,
number: u64,
pub struct EnumMember {
pub name : String,
pub number: u64,
}
#[derive(Debug, Clone, PartialEq)]
struct ProtocolDeclaration{
name : String,
interface : Vec<FuncDeclaration>,
pub struct ProtocolDeclaration{
pub name : String,
pub interface : Vec<FuncDeclaration>,
}
#[derive(Debug, Clone, PartialEq)]
struct FuncDeclaration {
name : String,
arg_list : Vec<String>,
return_type : String,
pub struct FuncDeclaration {
pub name : String,
pub arg_list : Vec<String>,
pub return_type : String,
}
/// Consume's a token that's expected. If the token that's consumed is not
/// the given expected token then panic
fn consume(tokens : &mut Peekable<Iter<'_, Token>>, token : Token) {
let a = tokens.next();
macro_rules! consume {
($tokens:ident, $token_type:path) => {
let a = $tokens.next();
match a {
None => panic!("Expected {:?}, Got End Of File", token),
Some(a) => if *a != token {
panic!("Expected {:?}, Got {:?}", token, *a);
None => panic!("Expected {}, Got End Of Tokens", stringify!($token_type)),
Some(t) => {
if matches!(t, $token_type(_)){}
else {
panic!("Expected {:?}, Got {:?}", stringify!($token_type), t);
}
}
}
};
}
pub fn parse(tokens : Vec<Token>) -> AST{
let mut tokens_iter = tokens.iter().peekable();
@ -84,29 +87,29 @@ fn declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<Declaration> {
match tokens.peek(){
None => None,
Some(tok) => match tok {
Token::Enum => Some(enum_decl(tokens)),
Token::Struct => Some(struct_decl(tokens)),
Token::Type => Some(type_declaration(tokens)),
Token::Protocol => Some(protocol_declaration(tokens)),
Token::Enum(_) => Some(enum_decl(tokens)),
Token::Struct(_) => Some(struct_decl(tokens)),
Token::Type(_) => Some(type_declaration(tokens)),
Token::Protocol(_) => Some(protocol_declaration(tokens)),
_ => None,
}
}
}
fn enum_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Enum);
consume!(tokens, Token::Enum);
let name = identifier(tokens).expect("Expected Identifier after `enum`");
consume(tokens, Token::LBrace);
consume!(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for enum_members if empty
Token::RBrace(_) => {}, // skip checking for enum_members if empty
_ => {
enum_members(tokens, &mut members);
},
}
consume(tokens, Token::RBrace);
consume!(tokens, Token::RBrace);
Declaration::EnumDeclaration(EnumDeclaration{name, members})
}
@ -115,8 +118,8 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
members.push(enum_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside enum declaration") {
Token::Comma => {
consume(tokens, Token::Comma);
Token::Comma(_) => {
consume!(tokens, Token::Comma);
if let Some(member) = enum_member(tokens) {
members.push(member);
} else {
@ -131,7 +134,7 @@ fn enum_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<EnumM
fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume(tokens, Token::Equal);
consume!(tokens, Token::Equal);
let number = parse_number(tokens).expect("Expected Number after `=`");
Some(EnumMember{name, number})
} else {
@ -140,19 +143,19 @@ fn enum_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<EnumMember> {
}
fn struct_decl(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Struct);
consume!(tokens, Token::Struct);
let name = identifier(tokens).expect("Expected Identifier after `struct`");
consume(tokens, Token::LBrace);
consume!(tokens, Token::LBrace);
let mut members = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {}, // skip checking for struct_members if empty
Token::RBrace(_) => {}, // skip checking for struct_members if empty
_ => {
struct_members(tokens, &mut members);
},
}
consume(tokens, Token::RBrace);
consume!(tokens, Token::RBrace);
Declaration::StructDeclaration(StructDeclaration{name, members})
}
@ -161,8 +164,8 @@ fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<Str
members.push(struct_member(tokens).unwrap());
loop {
match tokens.peek().expect("Unexpected EOF inside struct declaration") {
Token::Comma => {
consume(tokens, Token::Comma);
Token::Comma(_) => {
consume!(tokens, Token::Comma);
if let Some(member) = struct_member(tokens) {
members.push(member);
} else {
@ -176,23 +179,23 @@ fn struct_members(tokens : &mut Peekable<Iter<'_, Token>>, members: &mut Vec<Str
fn struct_member(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<StructMember> {
let name = identifier(tokens);
if let Some(name) = name {
consume(tokens, Token::Colon);
consume!(tokens, Token::Colon);
let type_name = identifier(tokens).expect("Expected Type after Colon");
Some(StructMember{name, type_name})
} else {
None
}
}
}
fn type_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Type);
consume!(tokens, Token::Type);
let name = identifier(tokens).expect("Expected Identifier after `type`");
let type_name = identifier(tokens).expect("Expected type after Identifier");
Declaration::TypeDeclaration(TypeDeclaration{name, type_name})
}
fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Identifier(s) => {
Token::Identifier((_, _, s)) => {
Some(s.to_string())
},
_ => None
@ -206,7 +209,7 @@ fn identifier(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<String> {
}
fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
let result = tokens.peek().map_or(None, |x| match x {
Token::Number(s) => Some(*s),
Token::Number((_, _, s)) => Some(*s),
_ => None
});
if let Some(_) = result {
@ -216,12 +219,12 @@ fn parse_number(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<u64> {
}
fn protocol_declaration(tokens : &mut Peekable<Iter<'_, Token>>) -> Declaration {
consume(tokens, Token::Protocol);
consume!(tokens, Token::Protocol);
let name = identifier(tokens).expect("Expected Identifier after `protocol`");
consume(tokens, Token::LBrace);
consume!(tokens, Token::LBrace);
let mut interface = Vec::new();
match tokens.peek().expect("Unexpected EOF after LBrace") {
Token::RBrace => {},
Token::RBrace(_) => {},
_ => {
functions(tokens, &mut interface);
},
@ -239,13 +242,13 @@ fn functions(tokens : &mut Peekable<Iter<'_, Token>>, interface : &mut Vec<FuncD
}
fn function(tokens : &mut Peekable<Iter<'_, Token>>) -> Option<FuncDeclaration>{
if let Some(Token::Fn) = tokens.peek() {
consume(tokens, Token::Fn);
if let Some(Token::Fn(_)) = tokens.peek() {
consume!(tokens, Token::Fn);
let name = identifier(tokens).expect("Expected Identifier after `fn`");
consume(tokens, Token::LParen);
consume!(tokens, Token::LParen);
let arg_list = arg_list(tokens);
consume(tokens, Token::RParen);
consume(tokens, Token::RArrow);
consume!(tokens, Token::RParen);
consume!(tokens, Token::RArrow);
let return_type = identifier(tokens).expect("Expected return type after `->");
Some(FuncDeclaration{name, arg_list, return_type})
} else {
@ -260,8 +263,8 @@ fn arg_list(tokens : &mut Peekable<Iter<'_, Token>>) -> Vec<String> {
None => break,
Some(i) =>{
result.push(i);
if **tokens.peek().expect("Unexpected EOF in argument list") == Token::Comma{
consume(tokens, Token::Comma);
if let Token::Comma(_) = **tokens.peek().expect("Unexpected EOF in argument list"){
consume!(tokens, Token::Comma);
match tokens.peek().expect("Unexpected EOF in argument list") {
Token::Identifier(_) => {},
_ => panic!("Unexpected symbol after Comma in argument list"),

View file

@ -1,17 +1,167 @@
pub enum ProtocolTypes {
Byte,
use std::collections::HashMap;
use crate::idl::types::Type;
#[derive(Debug, Clone, PartialEq)]
pub struct Function{
pub arguments : Vec<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Protocol{
interface : HashMap<String, Function>,
}
pub struct Protocol {}
#[derive(Debug, Clone, PartialEq)]
pub struct Protocols {
protocols : HashMap<String, Protocol>,
symbol_table : HashMap<String, Type>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ValidationError{
IncorrectVersion,
InvalidHeader,
FunctionDoesNotExist,
ProtocolDoesNotExist,
InvalidSize,
InvalidArgument,
NonExistentType(String),
}
impl Protocols {
pub fn new(symbol_table: HashMap<String, Type>) -> Self {
let protocols = HashMap::new();
Self { protocols, symbol_table }
}
pub fn add_protocol(&mut self, name : String, interface : HashMap<String, Function>) {
self.protocols.insert(name, Protocol::new(interface));
}
pub fn validate(&self, protocol_name : &str, function_name : &str, data : Vec<u8>) -> Result<(), ValidationError>{
match self.protocols.get(protocol_name) {
Some(s) => s.validate(function_name, data, &self.symbol_table),
None => Err(ValidationError::ProtocolDoesNotExist),
}
}
}
impl Protocol {
pub fn is_empty(&self) -> bool {
true
pub fn new(interface: HashMap<String, Function>) -> Self {
Self {interface}
}
fn validate(&self, function_name : &str, data : Vec<u8>, symbols : &HashMap<String, Type>) -> Result<(), ValidationError> {
match self.interface.get(function_name){
Some(s) => s.validate(data, symbols),
None => Err(ValidationError::FunctionDoesNotExist),
}
}
}
pub fn validate_data(&self, data: Vec<u8>) -> bool {
if !data.is_empty() && self.is_empty() {
return false;
impl Function {
fn validate(&self, data : Vec<u8>, symbols : &HashMap<String, Type>) -> Result<(), ValidationError> {
let mut types = Vec::new();
for arg in self.arguments.iter() {
let type_value = symbols.get(arg);
if let Some(type_value) = type_value {
types.push(type_value);
}
true
else{
return Err(ValidationError::NonExistentType(arg.to_string()));
}
}
let mut data = data.iter();
if let Some(ver) = data.next() {
if *ver == 1 {
// We got the correct version number
// Now to parse individual argumebts
let mut types = types.iter().peekable();
loop {
let type_byte = data.next();
if let Some(type_byte) = type_byte {
let type_value = types.next();
if type_value.is_none() {
return Err(ValidationError::InvalidSize);
}
let type_value = type_value.unwrap();
let data_type = match type_byte {
0 => Some(Type::U64),
1 => Some(Type::U32),
2 => Some(Type::U16),
3 => Some(Type::U8),
4 => Some(Type::I64),
5 => Some(Type::I32),
6 => Some(Type::I16),
7 => Some(Type::I8),
8 => Some(Type::Bool),
9 => Some(Type::F32),
10 => Some(Type::F64),
11 => Some(Type::Str),
_ => None,
};
if data_type.is_none() || data_type.as_ref().unwrap() != *type_value {
println!("{:#?}", *type_value);
return Err(ValidationError::InvalidArgument);
}
match data_type.unwrap(){
Type::U64 | Type::I64 | Type::F64 => {
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
data.next();
},
Type::U32 | Type::I32 | Type::F32 => {
data.next();
data.next();
data.next();
data.next();
},
Type::U16 | Type::I16 => {
data.next();
data.next();
},
Type::U8 | Type::I8 | Type::Bool => {
data.next();
},
Type::Str => todo!(),
_ => panic!("Should not be possible"),
}
} else if types.peek().is_none() {
return Ok(());
}
break;
}
}
else {
return Err(ValidationError::IncorrectVersion);
}
} else {
return Err(ValidationError::InvalidHeader);
}
Ok(())
}
}

98
dev/src/idl/types.rs Normal file
View file

@ -0,0 +1,98 @@
use std::collections::HashMap;
use crate::idl::parser::AST;
use super::protocol::{Function, Protocols};
#[derive(Debug, Clone, PartialEq)]
pub enum Type {
U64,
U32,
U16,
U8,
I64,
I32,
I16,
I8,
Bool,
F32,
F64,
Str,
Struct(StructType),
Enum(EnumType),
Alias(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructType {
members : HashMap<String, String>
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumType {
members : HashMap<String, u8>
}
fn add_builtin_types(symbol_table : &mut HashMap<String, Type>) {
symbol_table.insert("u8".to_string(), Type::U8);
symbol_table.insert("u16".to_string(), Type::U16);
symbol_table.insert("u32".to_string(), Type::U32);
symbol_table.insert("u64".to_string(), Type::U64);
symbol_table.insert("i8".to_string(), Type::I8);
symbol_table.insert("i16".to_string(), Type::I16);
symbol_table.insert("i32".to_string(), Type::I32);
symbol_table.insert("i64".to_string(), Type::I64);
symbol_table.insert("bool".to_string(), Type::Bool);
symbol_table.insert("f32".to_string(), Type::F32);
symbol_table.insert("f64".to_string(), Type::F64);
}
pub fn get_protocols(ast : AST) -> Protocols{
let mut symbol_table : HashMap<String, Type> = HashMap::new();
let declarations = ast.0;
add_builtin_types(&mut symbol_table);
// First Pass
// We just populate the symbol table here
for decl in declarations.iter() {
match decl{
super::parser::Declaration::EnumDeclaration(e) => {
let mut members = HashMap::new();
for m in e.members.iter(){
members.insert(m.name.to_string(), m.number as u8);
}
symbol_table.insert(e.name.to_string(), Type::Enum(EnumType{members}));
},
super::parser::Declaration::StructDeclaration(s) => {
let mut members = HashMap::new();
for m in s.members.iter() {
members.insert(m.name.to_string(), m.type_name.to_string());
}
symbol_table.insert(s.name.to_string(), Type::Struct(StructType{members}));
},
super::parser::Declaration::TypeDeclaration(t) => {
symbol_table.insert(t.name.to_string(), Type::Alias(t.type_name.to_string()));
},
super::parser::Declaration::ProtocolDeclaration(_) => {},
}
}
let mut protocols = Protocols::new(symbol_table);
for decl in declarations.iter(){
match decl {
super::parser::Declaration::ProtocolDeclaration(p) => {
let mut funcs : HashMap<String, Function> = HashMap::new();
for i in p.interface.iter(){
funcs.insert(i.name.to_string(), Function{arguments : i.arg_list.clone()});
}
protocols.add_protocol(p.name.to_string(), funcs);
},
_ => {}
}
}
protocols
}

View file

@ -0,0 +1,3 @@
protocol Foo{
fn bar(i32, u8, bool) -> void;
}