From 6410de9feb6d8d5aa4b45edc288976801b23d74f Mon Sep 17 00:00:00 2001 From: Elfein Landers Date: Fri, 12 Aug 2022 16:53:32 -0700 Subject: [PATCH] initial commit --- .gitignore | 2 + BRKWORDS.TXT | 224 +++++++++++++++++++++++++++ Cargo.toml | 11 ++ src/lex/cluster.rs | 370 +++++++++++++++++++++++++++++++++++++++++++++ src/lex/mod.rs | 260 +++++++++++++++++++++++++++++++ src/lex/pattern.rs | 156 +++++++++++++++++++ src/lib.rs | 72 +++++++++ src/lojbanic.rs | 241 +++++++++++++++++++++++++++++ src/strange.rs | 53 +++++++ 9 files changed, 1389 insertions(+) create mode 100644 .gitignore create mode 100644 BRKWORDS.TXT create mode 100644 Cargo.toml create mode 100644 src/lex/cluster.rs create mode 100644 src/lex/mod.rs create mode 100644 src/lex/pattern.rs create mode 100644 src/lib.rs create mode 100644 src/lojbanic.rs create mode 100644 src/strange.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/BRKWORDS.TXT b/BRKWORDS.TXT new file mode 100644 index 0000000..bf8c42c --- /dev/null +++ b/BRKWORDS.TXT @@ -0,0 +1,224 @@ +Morphology Algorithm +Internal Revision 4.1, 8 June 1992 + +The following will become the official baseline algorithm for resolution of +Lojban text into individual words from sounds, stress, and pause. As such, +it is the ultimate standard of Lojban's unambiguous resolvability, which +may make Lojban speech recognition by computers more possible than for +other languages. While the algorithm looks very complicated, almost all of +it is resolving special cases, and performing what error detection and +correction may be possible. + + +We have a string representing the speech stream, marked with stress and +pauses. We want to break it up into words. + + +1. First, break at all pauses (cannot pause in the middle of a word). +2. Then, pick the first piece that has not been uniquely resolved. + A. The first thing is to deal with some constructs which are required to + end with a pause: + 1) Names: + a) If the last letter of the piece is a consonant, we have a name. A + name must have a pause before it UNLESS it is immediately preceded by + a /la/, /lai/, /la'i/ or /doi/ as a marker, and it cannot contain any + of these markers unless the marker is immediately preceded by a conso- + nant. So, look backwards from the end of the piece for any of the + allowed markers. If we don't find one (e.g. /jonz/), then the whole + piece has been resolved as a name. + b) If you do find such a marker, then check what immediately precedes + it. If there is nothing (e.g. /ladjAn/), or if a vowel precedes (e.g. + /mivIskaladjAn./, break off the marker as a resolved piece (/la/), and + what follows it is also a resolved piece, a name (/djAn/), leaving us + with whatever preceded the marker, if anything, as still unresolved + (/mivIska/). + c) If what precedes the marker is a consonant (e.g. /karoslAInas/) + then ignore the marker and continue looking backwards. This exception + is allowed because /karos/ with no following pause cannot represent a + separate word. + 2) ".y.", the hesitation: + If the piece consists solely of /y/, then it resolves as the + hesitation word (which is required to be surrounded by pauses). + + 3) If the piece ends in "y", check for some lerfu words: specifically, the last lerfu word of a string, if it ends in a "y" (e.g. /abubycydy/ + or /y'y/), must be followed by a pause: + a) If the "y" is preceded by a consonant, break off the consonant+"y" + as a resolved lerfu word (e.g. /abubycydy/ gives /abubycy/ unresolved, + and /dy/ resolved as a lerfu word). Continue breaking off any Cy + pieces as lerfu words if they're there (e.g. unresolved /abubycy/ + gives unresolved /abuby/ + resolved /cy/; then /abuby/ gives un- + resolved /abu/ plus resolved /by/). + Note that the Cy-type lerfu words will NEVER come before the other + lerfu word pieces in a breath-group - the "abu" and "y'y" types - + since they begin with vowels, they MUST be preceded by pauses; and Cy + followed by anything but another Cy must be followed by a pause + (because "y" is used as glue in lujvo, it could cause resolvability + problems if not separate; e.g. /micybusmAbru/ would not uniquely re- + solve). + b) If the "y" is preceded by "V'" or "y'" (e.g. /y'y/), break before + the "V", and the "V'y" is resolved as a lerfu word. + c) If the "y" is preceded by an "i" or "u" ("iy" and "uy" are + reserved) the piece cannot be resolved. + d) If the "y" is preceded by a vowel (V) other than "i" or "u", the + piece is in error and cannot be further resolved. + B. Next, see if the piece is composed entirely of cmavo. + 1) Check the piece to see if there are any consonant clusters (a + consonant cluster is of one of the forms CC or CyC). If there are none, + break up the piece before each consonant, resolving each piece as a + cmavo (e.g. /alenumibaca'a/ breaks into the cmavo /a/ + /le/ + /nu/ + + /mi/ + /ba/ + /ca'a/). If there are no consonants, the piece is a + single cmavo. In either case, the piece is completely resolved.o + C. Now we have a piece which we are sure contains a brivla (a gismu, a + lujvo or a le'avla). We know that a brivla must have a consonant cluster + (CC or CyC) within the 1st five letters (ignoring apostrophes in the + count), and must have penultimate stress (ignoring "y" syllables, which + are not allowed to be stressed). + 1) First, let's check for a potential error (a form which shouldn't + arise): + a) If the piece contains no stress, but has a consonant cluster (CC + or CyC), it is in error. The consonant cluster indicates it contains + a brivla (gismu, lujvo or le'avla), which requires penultimate stress. + The only place this MIGHT validly occur is inside a zoi-quote (and + therefore need not be resolved at all). + b) However, if stress information is not available, assume the brivla + ends at the end of the piece. (This rule gives the right behavior + with canonical written Lojban, where spaces separate all words except + for some cmavo compounds and stress is normally not marked.) + 2) Next, we need to find THE penultimate stress for the first brivla in + the piece (the brivla is expected to end after the syllable following + the stress, ignoring "y" syllables). Starting from the first consonant + cluster (CC or CyC): + a) If the previous letter is a stressed vowel, take that as THE + penultimate stress of the brivla. + b) If the previous letter is an unstressed vowel, but the letter + before that is a stressed vowel, then it is a stressed diphthong; + treat the entire diphthong as stressed (So that "find the next vowel" + will not get just the second half of the diphthong). Take that as THE + penultimate stress. + c) Otherwise, find the first stress after the consonant cluster. If + the stress is on a diphthong, treat the entire diphthong as stressed + (So that "find the next vowel" will not get just the second half of + the diphthong). Take that as THE penultimate stress. + 3) Next, let's find the end of the first brivla in the piece: a) If there is no vowel in the piece after the stress, it can't be a + penultimate stress, so the piece is in error (unresolvable). This is + also true if "y" is the only vowel after the stress (e.g. */stAsy/ is + not a valid breath-group). + b) If the NEXT vowel following the stress (skipping over "y"'s ) is + immediately followed by "'V" (as in /mlAtyci'a/), then the syllable + following the stress cannot be the last syllable of a word (since the + 'V cannot begin the next word). Ordinarily we would count this as an + error, but let's instead assume that this was a secondary stress and + ignore the fact that there is some stress on it. Go find the next + stress to use as THE penultimate stress for this brivla (e.g. in + /mlAtyci'abrIjuti/, assume the penultimate stress is "I", not "A"). + c) Having eliminated all the potential problems with finding the end, + let's cut the piece after the end of the brivla: + Find the first vowel (not counting "y") after the stress. If it is + part of a diphthong, break after the diphthong; otherwise, break + after the vowel itself. + 4) Now let's find the beginning of the brivla in the front part of the + piece we just broke off: + a) First, break off as many obvious cmavo pieces off the front as we + can: + 1] If there is no consonant cluster (CC or CyC) in the first 5 + letters (ignoring apostrophes in the count), then, if the piece + starts with a vowel, break off before the first consonant (e.g. + /alekArce/ becomes /a/ = cmavo) + /lekArce/ = unresolved), otherwise + break off before the second consonant (e.g. /vilekArce/ becomes /vi/ + = cmavo + /lekArce/ = unresolved). The front piece is then resolved + as a cmavo. + 2] Repeat the above as many times as we can (so, /lekArce/ becomes + /le/ = cmavo + /kArce/ = unresolved. Since /kArce/ has a consonant + cluster in the first five letters, we can't go any further). + 3] If the piece we have left starts with a vowel, find the first + consonant. If the first consonant is part of a consonant cluster + (only CC-form this time), and this consonant cluster is NOT a valid + initial cluster (with each adjacent pair of consonants is a valid + initial pair), then we can resolve the entire piece as a le'avla + (e.g. /antipAsto/); otherwise (if the first consonant is NOT part of + a consonant cluster, or the consonant cluster IS a valid initial + cluster), break off before the first consonant as a cmavo (e.g. + /a'ofArlu/ becomes /a'o/ = cmavo + /fArlu/ = unresolved; or, + /aismAcu/ becomes /ai/ = cmavo + /smAcu/ = unresolved). + b) What's left begins with a consonant and has a consonant cluster + (CC or CyC) in the first 5 letters. The whole thing may be a brivla, + or there may be (at most) one consonant-initial cmavo in front. Here + are the possibilities for the start of the piece, and their + resolutions: + 1] CC... or CVCyC...: + Resolve whole thing as a brivla (a gismu, lujvo, or le'avla). + 2] CyC... : + Invalid form. Unresolvable. + 3] CVVCC... : (Note: stressing a cmavo on the final syllable before a brivla is + not allowed.) + a] If there is no stress on the VV and the consonant cluster + beginning with the CC is a valid initial cluster (i.e., each + adjacent pair of consonants is a valid initial pair), then break + off the CVV, and resolve it as a cmavo; the remaining piece can + then be resolved as a brivla (see "CC....", above). For example, + /leiprEnu/ becomes /lei/ = cmavo + /prEnu/ = brivla. + b] Otherwise (i.e. there IS a stress on the VV, or the first + consonant cluster is not a valid initial cluster), resolve the + whole thing as a brivla (e.g. /cAItro/ = brivla) + 4] CV'VCC... : + (Note: stressing a cmavo on the final syllable before a brivla is + not allowed.) + a] If there is no stress on the final vowel of the V'V) and the + consonant cluster beginning with the CC is a valid initial cluster + (i.e., each adjacent pair of consonants is a valid initial pair), + then break off the CV'V, and resolve it as a cmavo; the remaining + piece can then be resolved as a brivla (see "CC....", above). For + example, /so'iprEnu/ becomes /so'i/ = cmavo + /prEnu/ = brivla. + b] Otherwise (i.e. there is a stress on the final vowel of the + V'V, or the first consonant cluster is not a valid initial + cluster), resolve the whole thing as a brivla (e.g. /cA'Itro/ = + brivla) + 5] CVCC... (This is the hard one. Is the front CV a separate + word?): + a] If the whole piece is CVCCV, then the whole thing resolves as a + gismu. + b] If the consonant cluster beginning with the CC is not a valid + initial cluster (with each adjacent pair of consonants is a valid + initial pair), then the whole piece can be resolved as a brivla + (gismu, lujvo, or le'avla). For example, /selfArlu/, + /cidjrspagEti/. + c] If the penultimate stress is on the 1st vowel of the CVCC (e.g. + /mAtcti/, then resolve the whole thing as a brivla (a lujvo or + le'avla). + d] If there is a "y", we need to look at the sub-piece up to the + first "y": + 1> If the sub-piece consists entirely of CVC's repeating (at + least 2 needed: e.g. /cacric/), and all the CC's of the sub-piece + are valid initial clusters, then resolve the initial CV as a + cmavo, and the rest of the whole piece is a brivla (a lujvo or + le'avla). + 2> Otherwise, if the sub-piece can be broken down into any + number (including 0) of valid lujvo "front-middles" in front and + exactly one valid lujvo "end" thereafter, resolve the whole piece + as a brivla. + a> Valid front-middles (we've eliminated all but those starting + with CV): CVC CVV CV'V CCV + b> Valid ends: CVC CCVC CVCC + 3> Otherwise, the front CV should be resolved as a cmavo, and + the remaining piece is resolved as a brivla (a lujvo or le'avla) + e] If there is no "y": + 1> If the piece consists of CVC's repeating (at least 2 needed) + up to a final CV (e.g. /cacricfu/), and all the CC's of the sub- + piece are valid initial clusters, then resolve the initial CV as + a cmavo, and the rest of the piece is a brivla (a lujvo). + 2> Otherwise, if the piece can be broken down into any number + (including 0) of valid lujvo "front-middles" in front and exactly + one valid lujvo "end", then resolve the whole piece as a brivla (a lujvo). + a> Valid front-middles (we've eliminated all but those starting + with CV): CVC CVV CV'V CVC + d> Valid ends: CVV CV'V CCV CCVCV CVCCV + + 3> Otherwise, the front CV should be resolved as a cmavo, and + the remaining piece is resolved as a brivla (a le'avla). + + 6] Any other beginning (e.g. CVVCyC): + Resolve the whole as an error. + + + +_______________________________________ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bbfc980 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "elf_lojban" +version = "0.1.0-beta" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[dev-dependencies] +rand = "0.8.5" diff --git a/src/lex/cluster.rs b/src/lex/cluster.rs new file mode 100644 index 0000000..c225d47 --- /dev/null +++ b/src/lex/cluster.rs @@ -0,0 +1,370 @@ +use crate::{ + lojbanic::{is_valid_consonant_pair, is_valid_vowel_pair_name, Lojbanic}, + strange::StrRange, +}; + +fn split_consonants(s: StrRange) -> Vec { + let mut output = Vec::new(); + let mut temp_buf = StrRange::new(s.src(), s.start(), 0); + let mut previous_ch = None; + for (at, ch) in s.as_str().char_indices() { + let char_len = ch.len_utf8(); + if let Some(previous_ch) = previous_ch { + if !is_valid_consonant_pair(previous_ch, ch) { + output.push(temp_buf); + temp_buf = StrRange::new(s.src(), at, char_len); + } else { + temp_buf.increase_length(char_len); + } + } else { + temp_buf.increase_length(char_len); + } + previous_ch = ch.into(); + } + if !temp_buf.is_empty() { + output.push(temp_buf); + } + output +} + +#[test] +fn splitsonants() { + let src = "mmmmmmmmm"; + assert_eq![ + split_consonants(StrRange::new(src, 0, src.len())) + .iter() + .map(|s| s.as_str()) + .collect::>(), + ["m", "m", "m", "m", "m", "m", "m", "m", "m",] + ]; + let src = "bcdfgjk"; + assert_eq![ + split_consonants(StrRange::new(src, 0, src.len())) + .iter() + .map(|s| s.as_str()) + .collect::>(), + ["b", "c", "d", "f", "gj", "k",] + ]; +} + +fn split_vowels(s: StrRange) -> Vec { + let mut output = Vec::new(); + let mut temp_buf = StrRange::new(s.src(), s.start(), 0); + let mut previous_ch = None; + for (at, ch) in s.as_str().char_indices() { + let char_len = ch.len_utf8(); + if let Some(previous_ch) = previous_ch { + if !is_valid_vowel_pair_name(previous_ch, ch) { + output.push(temp_buf); + temp_buf = StrRange::new(s.src(), at, char_len); + } else { + temp_buf.increase_length(char_len); + } + } else { + temp_buf.increase_length(char_len); + } + previous_ch = ch.into(); + } + if !temp_buf.is_empty() { + output.push(temp_buf); + } + output +} + +#[test] +fn splowels() { + let src = "aaaaaaaaa"; + assert_eq![ + split_vowels(StrRange::new(src, 0, src.len())) + .iter() + .map(|s| s.as_str()) + .collect::>(), + ["a", "a", "a", "a", "a", "a", "a", "a", "a",] + ]; +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Cluster<'src_buf> { + pub s: StrRange<'src_buf>, + pub kind: ClusterKind, +} + +macro_rules! gen_cluster_fns { + ($($nym:ident, $v:ident),*) => {$( + pub fn $nym(s: StrRange<'src_buf>) -> Self { + Self { + s, + kind: ClusterKind::$v, + } + })* + }; +} + +impl<'src_buf> Cluster<'src_buf> { + gen_cluster_fns![ + consonant, Consonant, number, Number, huhboo, Huhboo, glide, Glide, unknown, Unknown, vowel, + Vowel, whitespace, Whitespace + ]; +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ClusterKind { + Consonant, + Number, + Huhboo, + Glide, + Unknown, + Vowel, + Whitespace, +} + +impl<'src_buf> Cluster<'src_buf> { + pub fn len(&self) -> usize { + self.s.len() + } +} + +enum ClusterState { + Any, + BetweenVowels, + Consonant, + Number, + Unknown, + Vowel, + Whitespace, +} + +pub fn clusterise(s: &str) -> Vec { + let mut output = vec![]; + let mut temp_buf = StrRange::new(s, 0, 0); + let mut state = ClusterState::Any; + for (at, ch) in s.char_indices() { + let char_len = ch.len_utf8(); + match state { + ClusterState::Any => { + temp_buf.increase_length(char_len); + match ch { + e if e.is_lojban_apostrophe() | e.is_lojban_glide() | !e.is_lojbanic() => { + state = ClusterState::Unknown + } + e if e.is_lojban_consonant() => state = ClusterState::Consonant, + e if e.is_lojban_vowel() => state = ClusterState::Vowel, + e if e.is_whitespace() | e.is_lojban_stop() => state = ClusterState::Whitespace, + e if e.is_numeric() => state = ClusterState::Number, + _ => unreachable![], + }; + } + ClusterState::BetweenVowels => match ch { + e if e.is_lojban_vowel() => { + if temp_buf.as_str() == "'" { + output.push(Cluster::huhboo(temp_buf)); + } else { + output.push(Cluster::glide(temp_buf)); + } + state = ClusterState::Vowel; + temp_buf = StrRange::new(s, at, char_len); + } + _ => { + state = ClusterState::Unknown; + temp_buf.increase_length(char_len); + } + }, + ClusterState::Consonant => match ch { + e if e.is_lojban_consonant() => { + temp_buf.increase_length(char_len); + } + e if e.is_lojban_vowel() | e.is_whitespace() | e.is_lojban_stop() => { + let clusters = split_consonants(temp_buf.clone()); + let len = clusters.len(); + let mut accum = 0..0; + for (j, c) in clusters.into_iter().enumerate() { + if j == 0 || j == len - 1 { + output.push(Cluster::consonant(c)); + if j != 0 && !accum.is_empty() { + output.push(Cluster::unknown(StrRange::new( + s, + temp_buf.start() + accum.start, + accum.len(), + ))); + break; + } + } else { + accum.end += c.len(); + } + } + state = if !ch.is_lojban_vowel() { + ClusterState::Whitespace + } else { + ClusterState::Vowel + }; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_lojban_apostrophe() + | e.is_lojban_glide() + | !e.is_lojbanic() + | e.is_numeric() => + { + temp_buf.increase_length(char_len); + state = ClusterState::Unknown; + } + _ => unreachable![], + }, + ClusterState::Number => match ch { + e if e.is_numeric() => { + temp_buf.increase_length(char_len); + } + e if e.is_lojban_apostrophe() | e.is_lojban_glide() => { + state = ClusterState::Unknown; + temp_buf.increase_length(char_len); + } + e if e.is_lojban_vowel() + | e.is_lojban_consonant() + | e.is_whitespace() + | e.is_lojban_stop() => + { + output.push(Cluster::number(temp_buf)); + state = if ch.is_lojban_vowel() { + ClusterState::Vowel + } else if ch.is_lojban_consonant() { + ClusterState::Consonant + } else { + ClusterState::Whitespace + }; + temp_buf = StrRange::new(s, at, char_len); + } + e if !e.is_lojbanic() => { + state = ClusterState::Unknown; + temp_buf.increase_length(char_len); + } + _ => unreachable![], + }, + ClusterState::Unknown => match ch { + e if e.is_whitespace() => { + output.push(Cluster::unknown(temp_buf)); + state = ClusterState::Whitespace; + temp_buf = StrRange::new(s, at, char_len); + } + _ => temp_buf.increase_length(char_len), + }, + ClusterState::Vowel => match ch { + e if e.is_lojban_apostrophe() => { + for vowels in split_vowels(temp_buf) { + output.push(Cluster::vowel(vowels)); + } + state = ClusterState::BetweenVowels; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_lojban_glide() => { + for vowels in split_vowels(temp_buf) { + output.push(Cluster::vowel(vowels)); + } + state = ClusterState::BetweenVowels; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_lojban_vowel() => { + temp_buf.increase_length(char_len); + } + e if e.is_lojban_consonant() | e.is_lojban_stop() | e.is_whitespace() => { + for vowels in split_vowels(temp_buf) { + output.push(Cluster::vowel(vowels)); + } + state = if !ch.is_lojban_consonant() { + ClusterState::Whitespace + } else { + ClusterState::Consonant + }; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_numeric() => { + todo![] + } + e if !e.is_lojbanic() => { + state = ClusterState::Unknown; + temp_buf.increase_length(char_len); + } + _ => unreachable![], + }, + ClusterState::Whitespace => match ch { + e if e.is_lojban_apostrophe() | e.is_lojban_glide() => { + output.push(Cluster::whitespace(temp_buf)); + state = ClusterState::Unknown; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_lojban_consonant() => { + output.push(Cluster::whitespace(temp_buf)); + state = ClusterState::Consonant; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_lojban_vowel() => { + output.push(Cluster::whitespace(temp_buf)); + state = ClusterState::Vowel; + temp_buf = StrRange::new(s, at, char_len); + } + e if e.is_whitespace() | e.is_lojban_stop() => { + temp_buf.increase_length(char_len); + } + e if e.is_numeric() => { + output.push(Cluster::whitespace(temp_buf)); + state = ClusterState::Number; + temp_buf = StrRange::new(s, at, char_len); + } + e if !e.is_lojbanic() => { + output.push(Cluster::whitespace(temp_buf)); + state = ClusterState::Unknown; + temp_buf = StrRange::new(s, at, char_len); + } + _ => unreachable![], + }, + } + } + if !temp_buf.is_empty() { + match state { + ClusterState::Any => {} + ClusterState::BetweenVowels => { + if temp_buf.clone().as_str() == "'" { + output.push(Cluster::huhboo(temp_buf)); + } else { + output.push(Cluster::glide(temp_buf)); + } + } + ClusterState::Consonant => { + let clusters = split_consonants(temp_buf.clone()); + let len = clusters.len(); + let mut accum = 0..0; + for (j, c) in clusters.into_iter().enumerate() { + if j == 0 || j == len - 1 { + output.push(Cluster::consonant(c)); + if j != 0 && !accum.clone().is_empty() { + output.push(Cluster::unknown(StrRange::new( + s, + temp_buf.start() + accum.start, + accum.len(), + ))); + break; + } + } else { + accum.end += c.len(); + } + } + } + ClusterState::Number => { + output.push(Cluster::number(temp_buf)); + } + ClusterState::Unknown => { + output.push(Cluster::unknown(temp_buf)); + } + ClusterState::Vowel => { + for vowels in split_vowels(temp_buf) { + output.push(Cluster::vowel(vowels)); + } + } + ClusterState::Whitespace => output.push(Cluster::whitespace(temp_buf)), + } + } + output +} + +// #[test] +// fn periods_are_whitespace() { +// assert_eq![clusterise("tssssssssssssssi."), [Cluster::whitespace(".")]] +// } diff --git a/src/lex/mod.rs b/src/lex/mod.rs new file mode 100644 index 0000000..ffdac5f --- /dev/null +++ b/src/lex/mod.rs @@ -0,0 +1,260 @@ +mod cluster; +mod pattern; +use crate::{lojbanic::starts_with_permissible_initial_pair, strange::StrRange, Token}; +use cluster::{clusterise, Cluster, ClusterKind}; +use pattern::Pattern; + +fn matches_gismu(clusters: &[Cluster]) -> bool { + if Pattern::CVCCV.matches_strict(clusters) || Pattern::CCVCV.matches_strict(clusters) { + if let Some(Cluster { s: _, kind }) = clusters.get(5) { + (match kind { + ClusterKind::Consonant | ClusterKind::Number | ClusterKind::Whitespace => true, + ClusterKind::Vowel => false, + // pretty sure these are unreachable at this point + ClusterKind::Huhboo => false, + ClusterKind::Glide => false, + ClusterKind::Unknown => false, + }) && { clusters.iter().take(4).fold(0, |a, c| c.len() + a) == 5 } + } else { + true + } + } else { + false + } +} + +fn matches_lujvo(clusters: &[Cluster]) -> bool { + Pattern::CCVCCV.matches(clusters) + || Pattern::CCV.matches(clusters) + || if Pattern::CVCCV.matches(clusters) { + clusters.iter().take(2).fold(0, |a, c| c.len() + a) <= 3 + } else { + false + } +} + +fn matches_cmavo(clusters: &[Cluster]) -> bool { + if Pattern::CVCCV.matches(clusters) { + starts_with_permissible_initial_pair(&clusters[2].s) + } else { + Pattern::CVCCVCCV.matches(clusters) + || Pattern::CV.matches(clusters) + || Pattern::V.matches(clusters) + } +} + +fn matches_cmevla(clusters: &[Cluster]) -> bool { + let mut previous_was_consonant = false; + for Cluster { s: _, kind } in clusters { + match kind { + ClusterKind::Consonant => previous_was_consonant = true, + ClusterKind::Huhboo | ClusterKind::Glide | ClusterKind::Vowel => { + previous_was_consonant = false + } + ClusterKind::Unknown | ClusterKind::Whitespace => return false, + ClusterKind::Number => break, + } + } + previous_was_consonant +} + +fn matches_unknown(clusters: &[Cluster]) -> bool { + for Cluster { s: _, kind } in clusters { + if let ClusterKind::Unknown = kind { + return true; + } + if let ClusterKind::Whitespace = kind { + return false; + } + } + false +} + +fn eat_cmevla<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + match kind { + ClusterKind::Consonant | ClusterKind::Vowel | ClusterKind::Huhboo | ClusterKind::Glide => { + temp_buf.increase_length(s.len()) + } + _ => break, + } + new_offset = i + 1; + } + + (temp_buf, &rest[new_offset..]) +} + +fn eat_gismu<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let gismu_offset = 4; + (0..gismu_offset).for_each(|i| temp_buf.increase_length(rest[i].len())); + (temp_buf, &rest[gismu_offset..]) +} + +fn eat_lujvo<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + let mut stressed = false; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + match kind { + ClusterKind::Consonant | ClusterKind::Vowel => { + temp_buf.increase_length(s.len()); + if s.as_str().to_lowercase() != *s.as_str() { + stressed = true; + new_offset += 1; + continue; + } + } + ClusterKind::Huhboo | ClusterKind::Glide => temp_buf.increase_length(s.len()), + _ => break, + } + if stressed { + break; + } + new_offset = i + 1; + } + (temp_buf, &rest[new_offset..]) +} + +fn eat_cmavo<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + let mut found_consonant = false; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + match kind { + ClusterKind::Consonant => { + if found_consonant { + break; + } else { + found_consonant = true; + temp_buf.increase_length(s.len()); + } + } + ClusterKind::Vowel => { + temp_buf.increase_length(s.len()); + found_consonant = true; + } + ClusterKind::Huhboo => temp_buf.increase_length(s.len()), + ClusterKind::Glide => temp_buf.increase_length(s.len()), + _ => break, + } + new_offset = i; + } + (temp_buf, &rest[new_offset + 1..]) +} + +fn eat_number<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + let mut finished = false; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + new_offset = i; + finished = false; + match kind { + ClusterKind::Number => temp_buf.increase_length(s.len()), + _ => break, + } + finished = true; + } + if finished { + new_offset += 1; + } + (temp_buf, &rest[new_offset..]) +} + +fn eat_non_lojban<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + new_offset = i + 1; + match kind { + ClusterKind::Whitespace => break, + _ => temp_buf.increase_length(s.len()), + } + } + (temp_buf, &rest[new_offset..]) +} + +fn eat_whitespace<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) { + let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0); + let mut new_offset = 0; + let mut finished = false; + for (i, Cluster { s, kind }) in rest.iter().enumerate() { + new_offset = i; + finished = false; + match kind { + ClusterKind::Whitespace => temp_buf.increase_length(s.len()), + _ => break, + } + finished = true; + } + if finished { + new_offset += 1; + } + (temp_buf, &rest[new_offset..]) +} + +pub fn lex(src: &str) -> Vec { + let mut output = Vec::new(); + let clusters = clusterise(src); + let mut rest = clusters.as_slice(); + loop { + output.push(if matches_unknown(rest) { + let (buf, new_rest) = eat_non_lojban(rest); + rest = new_rest; + Token::unknown(buf.as_str()) + } else if matches_cmevla(rest) { + let (buf, new_rest) = eat_cmevla(rest); + rest = new_rest; + Token::cmevla(buf.as_str()) + } else if matches_gismu(rest) { + let (buf, new_rest) = eat_gismu(rest); + rest = new_rest; + Token::brivla(buf.as_str()) + } else if matches_lujvo(rest) { + let (buf, new_rest) = eat_lujvo(rest); + rest = new_rest; + Token::brivla(buf.as_str()) + } else if matches_cmavo(rest) { + let (buf, new_rest) = eat_cmavo(rest); + rest = new_rest; + Token::cmavo(buf.as_str()) + } else { + match rest.get(0) { + Some(Cluster { + s: _, + kind: ClusterKind::Number, + }) => { + let (buf, new_rest) = eat_number(rest); + rest = new_rest; + Token::number(buf.as_str()) + } + Some(Cluster { + s: _, + kind: ClusterKind::Unknown, + }) => { + let (buf, new_rest) = eat_non_lojban(rest); + rest = new_rest; + Token::unknown(buf.as_str()) + } + Some(Cluster { + s: _, + kind: ClusterKind::Whitespace, + }) => { + let (buf, new_rest) = eat_whitespace(rest); + rest = new_rest; + Token::whitespace(buf.as_str()) + } + Some(Cluster { s: _, kind: _ }) => { + let (buf, new_rest) = eat_non_lojban(rest); + rest = new_rest; + Token::unknown(buf.as_str()) + } + None => break, + } + }); + } + output +} diff --git a/src/lex/pattern.rs b/src/lex/pattern.rs new file mode 100644 index 0000000..5b952df --- /dev/null +++ b/src/lex/pattern.rs @@ -0,0 +1,156 @@ +use super::{cluster::Cluster, cluster::ClusterKind}; +use Clust::*; + +#[derive(Copy, Clone)] +pub enum Clust { + C, + Cc, + V, +} + +impl From<&'static [Clust]> for Pattern { + fn from(pat: &'static [Clust]) -> Self { + Self { pat } + } +} + +pub struct Pattern { + pub pat: &'static [Clust], +} + +impl Pattern { + pub const CV: Self = Self { pat: &[C, V] }; + pub const V: Self = Self { pat: &[V] }; + pub const CCV: Self = Self { pat: &[Cc, V] }; + pub const CCVCV: Self = Self { + pat: &[Cc, V, C, V], + }; + pub const CCVCCV: Self = Self { + pat: &[Cc, V, Cc, V], + }; + pub const CVCCV: Self = Self { + pat: &[C, V, Cc, V], + }; + pub const CVCCVCCV: Self = Self { + pat: &[C, V, Cc, V, Cc, V], + }; + fn matches_inner(&self, strict: bool, clusters: &[Cluster]) -> bool { + let mut pat = self.pat.iter(); + let mut clusters = clusters.iter(); + let mut checking_vowel = false; + let mut previous_punct = false; + loop { + if checking_vowel { + if let Some(Cluster { s, kind }) = clusters.next() { + match kind { + ClusterKind::Consonant => { + if previous_punct { + break false; + } + checking_vowel = false; + if let Some(clust_kind) = pat.next() { + match clust_kind { + Clust::C => { + if s.as_str().chars().count() > 1 { + break false; + } + } + Clust::Cc => { + if s.as_str().chars().count() <= 1 { + break false; + } + } + Clust::V => break false, + } + } else { + break true; + } + } + ClusterKind::Whitespace | ClusterKind::Number | ClusterKind::Unknown => { + if pat.next().is_some() { + break false; + } else { + break true; + } + } + ClusterKind::Huhboo | ClusterKind::Glide => { + if previous_punct { + break false; + } else { + previous_punct = true + } + } + ClusterKind::Vowel => { + if previous_punct { + previous_punct = false; + } else if let Some(clust_kind) = pat.next() { + match clust_kind { + Clust::C | Clust::Cc => break false, + Clust::V => {} + } + } else { + break true; + } + } + } + } else if previous_punct || pat.next().is_some() { + break false; + } else { + break true; + } + } else if let Some(clust_kind) = pat.next() { + if let Some(Cluster { s, kind }) = clusters.next() { + match kind { + ClusterKind::Consonant => match clust_kind { + Clust::C => { + if s.as_str().chars().count() != 1 { + break false; + } + } + Clust::Cc => { + if strict { + if s.as_str().chars().count() <= 1 { + break false; + } + } else if s.as_str().chars().count() != 2 { + break false; + } + } + Clust::V => break false, + }, + ClusterKind::Whitespace + | ClusterKind::Number + | ClusterKind::Huhboo + | ClusterKind::Glide + | ClusterKind::Unknown => break false, + ClusterKind::Vowel => match clust_kind { + Clust::C | Clust::Cc => break false, + Clust::V => { + if !strict { + checking_vowel = true; + } + } + }, + } + } else { + break false; + } + } else { + break true; + } + } + } + pub(crate) fn matches(&self, clusters: &[Cluster]) -> bool { + self.matches_inner(false, clusters) + } + pub(crate) fn matches_strict(&self, clusters: &[Cluster]) -> bool { + self.matches_inner(true, clusters) + } +} + +#[test] +fn patterns_match() { + use super::clusterise; + assert![Pattern::CV.matches(&clusterise("do".into()))]; + assert![Pattern::CV.matches(&clusterise("fa'i".into()))]; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..762b4ce --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,72 @@ +//! # elf_lojban +//! +//! elf_lojban: lex your lojban. Parser coming soon. +//! +//! ## Basic Usage +//! +//! ``` +//! use elf_lojban::lex; +//! +//! // Parse including whitespace +//! let tokens = lex("mi prami do"); +//! +//! assert_eq![ +//! tokens.iter().map(|t| t.s ).collect::>(), +//! ["mi", " ", "prami", " ", "do"] +//! ]; +//! ``` + +mod lex; +pub mod lojbanic; +mod strange; +pub use lex::*; + +#[derive(Debug, Eq, PartialEq)] +pub struct Token<'src_buf> { + pub s: &'src_buf str, + pub kind: TokenKind, +} + +macro_rules! gen_token_fns { + ($($nym:ident, $v:ident),*) => {$( + pub fn $nym(s: &'src_buf str) -> Self { + Self { + s, + kind: TokenKind::$v, + } + })* + }; +} + +impl<'src_buf> Token<'src_buf> { + gen_token_fns![ + brivla, Brivla, cmavo, Cmavo, cmevla, Cmevla, number, Number, unknown, Unknown, whitespace, + Whitespace + ]; +} + +#[derive(Debug, Eq, PartialEq)] +pub enum TokenKind { + Brivla, + Cmavo, + Cmevla, + Number, + Unknown, + Whitespace, +} + +#[test] +fn lexes() { + assert_eq![ + lex("mi prami do"), + [ + Token::cmavo("mi"), + Token::whitespace(" "), + Token::brivla("prami"), + Token::whitespace(" "), + Token::cmavo("do") + ] + ]; + assert_eq![lex("garbage"), [Token::brivla("garbage")]]; + assert_eq![lex("loprami"), [Token::brivla("loprami")]]; +} diff --git a/src/lojbanic.rs b/src/lojbanic.rs new file mode 100644 index 0000000..91c862e --- /dev/null +++ b/src/lojbanic.rs @@ -0,0 +1,241 @@ +use crate::strange::StrRange; + +const CONSONANT_LOWER: &str = "bcdfgjklmnprstvzx"; +const CONSONANT_UPPER: &str = "BCDFGJKLMNPRSTVZX"; +const VOICED: &str = "BDGJVZbdgjvz"; +const UNVOICED: &str = "CFKPSTXcfkpstx"; +const SYLLABIC_CONSONANTS: &str = "lmnrLMNR"; +#[rustfmt::skip] +const VOWEL_LOWER: &str = "aeiouyáạàảãăắặằẳẵâấậầẩẫeéẹèẻẽêếệềểễiíịìỉĩoóọòỏõôốộồổỗơớợờởỡuúụùủũưứựừửữyýỵỳỷỹ"; +#[rustfmt::skip] +const VOWEL_UPPER: &str = "AEIOUYÁẠÀẢÃĂẮẶẰẲẴÂẤẬẦẨẪEÉẸÈẺẼÊẾỆỀỂỄIÍỊÌỈĨOÓỌÒỎÕÔỐỘỒỔỖƠỚỢỜỞỠUÚỤÙỦŨƯỨỰỪỬỮYÝỴỲỶỸ"; +const APOSTROPHE_LOWER: &str = "'"; +const APOSTROPHE_UPPER: &str = "h"; +const STOP: &str = "."; +const GLIDE: &str = ","; +const NUMERAL: &str = "0123456789"; +const A: &str = "aáạàảãăắặằẳẵâấậầẩẫAÁẠÀẢÃĂẮẶẰẲẴÂẤẬẦẨẪ"; +const E: &str = "eeéẹèẻẽêếệềểễEEÉẸÈẺẼÊẾỆỀỂỄ"; +const I: &str = "iiíịìỉĩIIÍỊÌỈĨ"; +const O: &str = "ooóọòỏõôốộồổỗơớợờởỡOOÓỌÒỎÕÔỐỘỒỔỖƠỚỢỜỞỠ"; +const U: &str = "uuúụùủũưứựừửữUUÚỤÙỦŨƯỨỰỪỬỮ"; +const Y: &str = "yyýỵỳỷỹYYÝỴỲỶỸ"; +const PERMISSIBLE_INITIAL_PAIRS: &[&str] = &[ + "bl", "br", "cf", "ck", "cl", "cm", "cn", "cp", "cr", "ct", "dj", "dr", "dz", "fl", "fr", "gl", + "gr", "jb", "jd", "jg", "jm", "jv", "kl", "kr", "ml", "mr", "pl", "pr", "sf", "sk", "sl", "sm", + "sn", "sp", "sr", "st", "tc", "tr", "ts", "vl", "vr", "xl", "xr", "zb", "zd", "zg", "zm", "zv", +]; + +pub trait Lojbanic { + fn is_lojbanic(&self) -> bool; + fn is_lojban_consonant(&self) -> bool; + fn is_lojban_voiced(&self) -> bool; + fn is_lojban_unvoiced(&self) -> bool; + fn is_lojban_syllabic_consonant(&self) -> bool; + fn is_lojban_vowel(&self) -> bool; + fn is_lojban_uppercase(&self) -> bool; + fn is_lojban_lowercase(&self) -> bool; + fn is_lojban_apostrophe(&self) -> bool; + fn is_lojban_stop(&self) -> bool; + fn is_lojban_glide(&self) -> bool; + fn is_lojban_a(&self) -> bool; + fn is_lojban_e(&self) -> bool; + fn is_lojban_i(&self) -> bool; + fn is_lojban_o(&self) -> bool; + fn is_lojban_u(&self) -> bool; + fn is_lojban_y(&self) -> bool; +} + +impl Lojbanic for char { + fn is_lojbanic(&self) -> bool { + [ + CONSONANT_LOWER, + CONSONANT_UPPER, + VOWEL_LOWER, + VOWEL_UPPER, + APOSTROPHE_LOWER, + APOSTROPHE_UPPER, + NUMERAL, + GLIDE, + STOP, + ] + .iter() + .any(|s| s.contains(*self)) + } + fn is_lojban_consonant(&self) -> bool { + self.is_lojbanic() + && [CONSONANT_LOWER, CONSONANT_UPPER] + .iter() + .any(|s| s.contains(*self)) + } + fn is_lojban_vowel(&self) -> bool { + self.is_lojbanic() && [VOWEL_LOWER, VOWEL_UPPER].iter().any(|s| s.contains(*self)) + } + fn is_lojban_uppercase(&self) -> bool { + self.is_lojbanic() + && [CONSONANT_UPPER, VOWEL_UPPER, APOSTROPHE_UPPER] + .iter() + .any(|s| s.contains(*self)) + } + fn is_lojban_voiced(&self) -> bool { + self.is_lojbanic() && VOICED.contains(*self) + } + fn is_lojban_unvoiced(&self) -> bool { + self.is_lojbanic() && UNVOICED.contains(*self) + } + fn is_lojban_syllabic_consonant(&self) -> bool { + self.is_lojbanic() && SYLLABIC_CONSONANTS.contains(*self) + } + fn is_lojban_lowercase(&self) -> bool { + self.is_lojbanic() && (CONSONANT_UPPER.contains(*self) || VOWEL_LOWER.contains(*self)) + } + fn is_lojban_apostrophe(&self) -> bool { + [APOSTROPHE_LOWER, APOSTROPHE_UPPER] + .iter() + .any(|s| s.contains(*self)) + } + fn is_lojban_stop(&self) -> bool { + STOP.contains(*self) + } + fn is_lojban_glide(&self) -> bool { + GLIDE.contains(*self) + } + fn is_lojban_a(&self) -> bool { + A.contains(*self) + } + fn is_lojban_e(&self) -> bool { + E.contains(*self) + } + fn is_lojban_i(&self) -> bool { + I.contains(*self) + } + fn is_lojban_o(&self) -> bool { + O.contains(*self) + } + fn is_lojban_u(&self) -> bool { + U.contains(*self) + } + fn is_lojban_y(&self) -> bool { + Y.contains(*self) + } +} + +macro_rules! option_passthrough_methods { + ($($nym:ident),*$(,)?) => { + $(fn $nym(&self) -> bool { + self.iter().any(|ch| ch.$nym()) + })* + }; +} + +impl Lojbanic for Option { + // If `None`, false, otherwise `Some(ch).unwrap().{method}()` + option_passthrough_methods! { + is_lojbanic, + is_lojban_consonant, + is_lojban_vowel, + is_lojban_uppercase, + is_lojban_voiced, + is_lojban_unvoiced, + is_lojban_syllabic_consonant, + is_lojban_lowercase, + is_lojban_apostrophe, + is_lojban_stop, + is_lojban_glide, + is_lojban_a, + is_lojban_e, + is_lojban_i, + is_lojban_o, + is_lojban_u, + is_lojban_y, + } +} + +pub fn is_valid_consonant_pair(left: char, right: char) -> bool { + match left { + e if e == right => false, + e if e.is_lojban_voiced() => right.is_lojban_voiced() || right.is_lojban_syllabic_consonant(), + e if e.is_lojban_unvoiced() => { + right.is_lojban_unvoiced() || right.is_lojban_syllabic_consonant() + } + e if e.is_lojban_syllabic_consonant() => true, + _ => false, + } +} + +#[test] +fn consonant_pairs() { + let permissible_pairs = [ + ('b', "dgjvzmnlr"), + ('c', "fkptlrmn"), + ('d', "bgjvlmnzr"), + ('f', "ckpstxmnlr"), + ('g', "bdjvzmnlr"), + ('j', "bdgvmlnr"), + ('k', "cfpstmnlr"), + ('l', "bcdfgjkpstvxzmnr"), + ('m', "bcdfgjkpstvxlrn"), + ('n', "bcdfgjkpstvxzlmr"), + ('p', "cfkstxmnlr"), + ('r', "bcdfgjkpstvxzlmn"), + ('s', "fklprtmnx"), + ('t', "crsfklpxmn"), + ('v', "bdgjzmnlr"), + ('x', "fpstmnlr"), + ('z', "bdgvmlnr"), + ]; + for (left, s) in permissible_pairs { + for right in s.chars() { + assert![is_valid_consonant_pair(left, right)]; + } + } +} + +pub fn is_valid_vowel_pair(left: char, right: char) -> bool { + (match left { + e if e.is_lojban_a() => !right.is_lojban_a() && (right.is_lojban_u() || right.is_lojban_i()), + e if e.is_lojban_e() => !right.is_lojban_e() && right.is_lojban_i(), + e if e.is_lojban_i() => true, + e if e.is_lojban_o() => false, + e if e.is_lojban_u() => true, + e if e.is_lojban_y() => false, + _ => { + println!["{left}, {right}"]; + unreachable![] + } + }) && !right.is_lojban_y() +} + +pub fn is_valid_vowel_pair_name(left: char, right: char) -> bool { + is_valid_vowel_pair(left, right) || left.is_lojban_u() || left.is_lojban_i() +} + +pub fn starts_with_permissible_initial_pair(s: &StrRange) -> bool { + if s.len() > 1 { + for pair in PERMISSIBLE_INITIAL_PAIRS { + if s.as_str().starts_with(pair) { + return true; + } + } + false + } else { + true + } +} + +#[test] +fn vowel_pairs() { + let correct = [ + [false, false, true, false, true, false], + [false, false, true, false, false, false], + [true, true, true, true, true, true], + [false, false, false, false, false, false], + [true, true, true, true, true, true], + [false, false, false, false, false, false], + ]; + for (i, left) in "aeiouy".chars().enumerate() { + for (j, right) in "aeiouy".chars().enumerate() { + assert![is_valid_vowel_pair_name(left, right) == correct[i][j]]; + } + } +} diff --git a/src/strange.rs b/src/strange.rs new file mode 100644 index 0000000..fe80d66 --- /dev/null +++ b/src/strange.rs @@ -0,0 +1,53 @@ +use std::ops::Range; + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct StrRange<'src_buf> { + src: &'src_buf str, + range: Range, +} + +impl<'src_buf> StrRange<'src_buf> { + pub fn new(src: &'src_buf str, start: usize, length: usize) -> Self { + Self { + src, + range: start..(start + length), + } + } + + pub fn src(&self) -> &'src_buf str { + self.src + } + + pub fn increase_length(&mut self, add_len: usize) { + self.range = self.range.start..self.range.end + add_len; + } + + pub fn as_str(&self) -> &'src_buf str { + &self.src[self.range.clone()] + } + + pub fn len(&self) -> usize { + self.range.len() + } + + pub fn is_empty(&self) -> bool { + self.range.is_empty() + } + + // pub fn offset_range(&self, range: Range) -> Self { + // Self { + // range: (self.range.start + range.start)..(self.range.start + range.end), + // src: self.src, + // } + // } + + pub fn start(&self) -> usize { + self.range.start + } +} + +impl<'src_buf> core::fmt::Display for StrRange<'src_buf> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> core::fmt::Result { + write![f, "{}", self.as_str()] + } +}