diff --git a/Cargo.lock b/Cargo.lock index 95801eb2..f593de01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,160 @@ version = 3 [[package]] name = "ablescript" version = "0.6.66" +dependencies = [ + "lasso", + "logos", +] + +[[package]] +name = "ahash" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "lasso" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "logos" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "zerocopy" +version = "0.7.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d6f15f7ade05d2a4935e34a457b936c23dc70a05cc1d97133dc99e7a3fe0f0e" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbbad221e3f78500350ecbd7dfa4e63ef945c05f4c61cb7f4d3f84cd0bba649b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 4b24e1d9..675f6df1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,5 @@ version = "0.6.66" edition = "2021" [dependencies] +lasso = "0.7" +logos = "0.13" diff --git a/src/lib.rs b/src/lib.rs index 72038fb6..d25031e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,7 @@ +pub mod syntax; + +/// We know that AbleScript 0.6.66 will be a polished language +pub use syntax as składnia; + /// AbleScript Language version pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/src/main.rs b/src/main.rs index 3c0f0945..7b6804cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use logos::{Lexer, Logos}; use std::io::Read; fn main() -> Result<(), Box> { @@ -5,6 +6,10 @@ fn main() -> Result<(), Box> { let mut buf = String::new(); std::io::stdin().read_to_string(&mut buf)?; + + for token in ablescript::składnia::token::Token::lexer_with_extras(&buf, Default::default()) { + print!("{token:?}, "); + } Ok(()) } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs new file mode 100644 index 00000000..79c66ba6 --- /dev/null +++ b/src/syntax/mod.rs @@ -0,0 +1 @@ +pub mod token; diff --git a/src/syntax/token.rs b/src/syntax/token.rs new file mode 100644 index 00000000..3f41e4e1 --- /dev/null +++ b/src/syntax/token.rs @@ -0,0 +1,78 @@ +use lasso::{Rodeo, Spur}; +use logos::{Lexer, Logos, Skip}; + +#[derive(Default)] +pub struct Extras { + int: Rodeo, +} + +#[derive(Logos, Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[logos(skip r"[ \t\n\f]+")] +#[logos(skip r"owo .*")] +#[logos(extras = Extras)] +#[rustfmt::skip] +pub enum Token { + #[token("(")] LParen, + #[token(")")] RParen, + #[token(",")] Comma, + #[token(";")] Semicolon, + #[token("`")] Tick, + #[token("+")] Plus, + #[token("-")] Minus, + #[token("*")] Star, + #[token("/")] Slash, + #[token("\\")] Backslash, + #[token("^")] Caret, + #[token("<")] Lt, + #[token(">")] Gt, + #[token("=")] Eq, + #[token("ain't")] Aint, + #[token("=:")] Assign, + + #[token("top")] Top, + #[token("bottom")] Bottom, + #[token("functio")] Functio, + #[token("unless")] Unless, + #[token("result")] Result, + + #[regex(r"\p{XID_Start}(\p{XID_Continue}|-)*", intern)] + Ident(Spur), + + #[regex(r"-?[0-9]+", integer)] + Int((bool, u64)), + + #[doc(hidden)] + #[token("uwu top", mlcomment)] + Invalid, +} + +#[inline] +fn intern(lexer: &mut Lexer) -> Spur { + lexer.extras.int.get_or_intern(lexer.slice()) +} + +#[inline] +fn integer(lexer: &Lexer) -> Option<(bool, u64)> { + let (sign, num) = match lexer.slice().strip_prefix('-') { + Some(n) => (true, n), + None => (false, lexer.slice()), + }; + + Some((sign, num.parse().ok()?)) +} + +#[inline] +fn mlcomment(lexer: &mut Lexer) -> Skip { + let mut count = 1; + loop { + match lexer.next() { + Some(Ok(Token::Bottom)) if count == 1 => break, + Some(Ok(Token::Top)) => count += 1, + Some(Ok(Token::Bottom)) => count -= 1, + Some(_) => (), + None => break, + } + } + + Skip +}