261 lines
8.1 KiB
Rust
261 lines
8.1 KiB
Rust
mod cluster;
|
|
mod pattern;
|
|
use crate::{lojbanic::starts_with_permissible_initial_pair, strange::StrRange, Token};
|
|
use cluster::{clusterise, Cluster, ClusterKind};
|
|
use pattern::Pattern;
|
|
|
|
fn matches_gismu(clusters: &[Cluster]) -> bool {
|
|
if Pattern::CVCCV.matches_strict(clusters) || Pattern::CCVCV.matches_strict(clusters) {
|
|
if let Some(Cluster { s: _, kind }) = clusters.get(5) {
|
|
(match kind {
|
|
ClusterKind::Consonant | ClusterKind::Number | ClusterKind::Whitespace => true,
|
|
ClusterKind::Vowel => false,
|
|
// pretty sure these are unreachable at this point
|
|
ClusterKind::Huhboo => false,
|
|
ClusterKind::Glide => false,
|
|
ClusterKind::Unknown => false,
|
|
}) && { clusters.iter().take(4).fold(0, |a, c| c.len() + a) == 5 }
|
|
} else {
|
|
true
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn matches_lujvo(clusters: &[Cluster]) -> bool {
|
|
Pattern::CCVCCV.matches(clusters)
|
|
|| Pattern::CCV.matches(clusters)
|
|
|| if Pattern::CVCCV.matches(clusters) {
|
|
clusters.iter().take(2).fold(0, |a, c| c.len() + a) <= 3
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn matches_cmavo(clusters: &[Cluster]) -> bool {
|
|
if Pattern::CVCCV.matches(clusters) {
|
|
starts_with_permissible_initial_pair(&clusters[2].s)
|
|
} else {
|
|
Pattern::CVCCVCCV.matches(clusters)
|
|
|| Pattern::CV.matches(clusters)
|
|
|| Pattern::V.matches(clusters)
|
|
}
|
|
}
|
|
|
|
fn matches_cmevla(clusters: &[Cluster]) -> bool {
|
|
let mut previous_was_consonant = false;
|
|
for Cluster { s: _, kind } in clusters {
|
|
match kind {
|
|
ClusterKind::Consonant => previous_was_consonant = true,
|
|
ClusterKind::Huhboo | ClusterKind::Glide | ClusterKind::Vowel => {
|
|
previous_was_consonant = false
|
|
}
|
|
ClusterKind::Unknown | ClusterKind::Whitespace => return false,
|
|
ClusterKind::Number => break,
|
|
}
|
|
}
|
|
previous_was_consonant
|
|
}
|
|
|
|
fn matches_unknown(clusters: &[Cluster]) -> bool {
|
|
for Cluster { s: _, kind } in clusters {
|
|
if let ClusterKind::Unknown = kind {
|
|
return true;
|
|
}
|
|
if let ClusterKind::Whitespace = kind {
|
|
return false;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn eat_cmevla<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
match kind {
|
|
ClusterKind::Consonant | ClusterKind::Vowel | ClusterKind::Huhboo | ClusterKind::Glide => {
|
|
temp_buf.increase_length(s.len())
|
|
}
|
|
_ => break,
|
|
}
|
|
new_offset = i + 1;
|
|
}
|
|
|
|
(temp_buf, &rest[new_offset..])
|
|
}
|
|
|
|
fn eat_gismu<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let gismu_offset = 4;
|
|
(0..gismu_offset).for_each(|i| temp_buf.increase_length(rest[i].len()));
|
|
(temp_buf, &rest[gismu_offset..])
|
|
}
|
|
|
|
fn eat_lujvo<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
let mut stressed = false;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
match kind {
|
|
ClusterKind::Consonant | ClusterKind::Vowel => {
|
|
temp_buf.increase_length(s.len());
|
|
if s.as_str().to_lowercase() != *s.as_str() {
|
|
stressed = true;
|
|
new_offset += 1;
|
|
continue;
|
|
}
|
|
}
|
|
ClusterKind::Huhboo | ClusterKind::Glide => temp_buf.increase_length(s.len()),
|
|
_ => break,
|
|
}
|
|
if stressed {
|
|
break;
|
|
}
|
|
new_offset = i + 1;
|
|
}
|
|
(temp_buf, &rest[new_offset..])
|
|
}
|
|
|
|
fn eat_cmavo<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
let mut found_consonant = false;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
match kind {
|
|
ClusterKind::Consonant => {
|
|
if found_consonant {
|
|
break;
|
|
} else {
|
|
found_consonant = true;
|
|
temp_buf.increase_length(s.len());
|
|
}
|
|
}
|
|
ClusterKind::Vowel => {
|
|
temp_buf.increase_length(s.len());
|
|
found_consonant = true;
|
|
}
|
|
ClusterKind::Huhboo => temp_buf.increase_length(s.len()),
|
|
ClusterKind::Glide => temp_buf.increase_length(s.len()),
|
|
_ => break,
|
|
}
|
|
new_offset = i;
|
|
}
|
|
(temp_buf, &rest[new_offset + 1..])
|
|
}
|
|
|
|
fn eat_number<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
let mut finished = false;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
new_offset = i;
|
|
finished = false;
|
|
match kind {
|
|
ClusterKind::Number => temp_buf.increase_length(s.len()),
|
|
_ => break,
|
|
}
|
|
finished = true;
|
|
}
|
|
if finished {
|
|
new_offset += 1;
|
|
}
|
|
(temp_buf, &rest[new_offset..])
|
|
}
|
|
|
|
fn eat_non_lojban<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
new_offset = i + 1;
|
|
match kind {
|
|
ClusterKind::Whitespace => break,
|
|
_ => temp_buf.increase_length(s.len()),
|
|
}
|
|
}
|
|
(temp_buf, &rest[new_offset..])
|
|
}
|
|
|
|
fn eat_whitespace<'a, 'b>(rest: &'b [Cluster<'a>]) -> (StrRange<'a>, &'b [Cluster<'a>]) {
|
|
let mut temp_buf = StrRange::new(rest[0].s.src(), rest[0].s.start(), 0);
|
|
let mut new_offset = 0;
|
|
let mut finished = false;
|
|
for (i, Cluster { s, kind }) in rest.iter().enumerate() {
|
|
new_offset = i;
|
|
finished = false;
|
|
match kind {
|
|
ClusterKind::Whitespace => temp_buf.increase_length(s.len()),
|
|
_ => break,
|
|
}
|
|
finished = true;
|
|
}
|
|
if finished {
|
|
new_offset += 1;
|
|
}
|
|
(temp_buf, &rest[new_offset..])
|
|
}
|
|
|
|
pub fn lex(src: &str) -> Vec<Token> {
|
|
let mut output = Vec::new();
|
|
let clusters = clusterise(src);
|
|
let mut rest = clusters.as_slice();
|
|
loop {
|
|
output.push(if matches_unknown(rest) {
|
|
let (buf, new_rest) = eat_non_lojban(rest);
|
|
rest = new_rest;
|
|
Token::unknown(buf.as_str())
|
|
} else if matches_cmevla(rest) {
|
|
let (buf, new_rest) = eat_cmevla(rest);
|
|
rest = new_rest;
|
|
Token::cmevla(buf.as_str())
|
|
} else if matches_gismu(rest) {
|
|
let (buf, new_rest) = eat_gismu(rest);
|
|
rest = new_rest;
|
|
Token::brivla(buf.as_str())
|
|
} else if matches_lujvo(rest) {
|
|
let (buf, new_rest) = eat_lujvo(rest);
|
|
rest = new_rest;
|
|
Token::brivla(buf.as_str())
|
|
} else if matches_cmavo(rest) {
|
|
let (buf, new_rest) = eat_cmavo(rest);
|
|
rest = new_rest;
|
|
Token::cmavo(buf.as_str())
|
|
} else {
|
|
match rest.get(0) {
|
|
Some(Cluster {
|
|
s: _,
|
|
kind: ClusterKind::Number,
|
|
}) => {
|
|
let (buf, new_rest) = eat_number(rest);
|
|
rest = new_rest;
|
|
Token::number(buf.as_str())
|
|
}
|
|
Some(Cluster {
|
|
s: _,
|
|
kind: ClusterKind::Unknown,
|
|
}) => {
|
|
let (buf, new_rest) = eat_non_lojban(rest);
|
|
rest = new_rest;
|
|
Token::unknown(buf.as_str())
|
|
}
|
|
Some(Cluster {
|
|
s: _,
|
|
kind: ClusterKind::Whitespace,
|
|
}) => {
|
|
let (buf, new_rest) = eat_whitespace(rest);
|
|
rest = new_rest;
|
|
Token::whitespace(buf.as_str())
|
|
}
|
|
Some(Cluster { s: _, kind: _ }) => {
|
|
let (buf, new_rest) = eat_non_lojban(rest);
|
|
rest = new_rest;
|
|
Token::unknown(buf.as_str())
|
|
}
|
|
None => break,
|
|
}
|
|
});
|
|
}
|
|
output
|
|
}
|