elf_lojban/src/lex/mod.rs

155 lines
4.6 KiB
Rust

mod cluster;
mod eat;
mod pattern;
use crate::{lojbanic::starts_with_permissible_initial_pair, Token};
use cluster::{clusterise, Cluster, ClusterKind};
use eat::*;
use pattern::Pattern;
fn matches_gismu(clusters: &[Cluster]) -> bool {
if Pattern::CVCCV.matches_strict(clusters) || Pattern::CCVCV.matches_strict(clusters) {
if let Some(Cluster { s: _, kind }) = clusters.get(5) {
(match kind {
ClusterKind::Consonant | ClusterKind::Number | ClusterKind::Whitespace => true,
ClusterKind::Vowel => false,
// pretty sure these are unreachable at this point
ClusterKind::Huhboo => false,
ClusterKind::Glide => false,
ClusterKind::Unknown => false,
}) && { clusters.iter().take(4).fold(0, |a, c| c.len() + a) == 5 }
} else {
true
}
} else {
false
}
}
fn matches_lujvo(clusters: &[Cluster]) -> bool {
Pattern::CCVCCV.matches(clusters)
|| Pattern::CCV.matches(clusters)
|| if Pattern::VCC.matches(clusters) {
clusters[0].len() <= 3
} else {
false
}
|| if Pattern::CVCCV.matches(clusters) {
clusters[0].len() + clusters[1].len() <= 3
&& !starts_with_permissible_initial_pair(&clusters[2].s)
} else {
false
}
}
fn matches_cmavo(clusters: &[Cluster]) -> bool {
if Pattern::CVCCV.matches(clusters) {
starts_with_permissible_initial_pair(&clusters[2].s)
} else {
Pattern::CVCCVCCV.matches(clusters)
|| Pattern::CV.matches(clusters)
|| Pattern::V.matches(clusters)
}
}
fn matches_cmevla(clusters: &[Cluster]) -> bool {
let mut previous_was_consonant = false;
for Cluster { s: _, kind } in clusters {
previous_was_consonant = match kind {
ClusterKind::Consonant => true,
ClusterKind::Huhboo | ClusterKind::Glide | ClusterKind::Vowel => false,
ClusterKind::Unknown | ClusterKind::Whitespace => return previous_was_consonant,
ClusterKind::Number => break,
};
}
previous_was_consonant
}
fn matches_unknown(clusters: &[Cluster]) -> bool {
for Cluster { s: _, kind } in clusters {
if let ClusterKind::Unknown = kind {
return true;
}
if let ClusterKind::Whitespace = kind {
return false;
}
}
false
}
fn lex_inner(stripped: bool, src: &str) -> Vec<Token> {
let mut output = Vec::new();
let clusters = clusterise(src);
let mut rest = clusters.as_slice();
loop {
output.push(if matches_unknown(rest) {
let (buf, new_rest) = eat_non_lojban(rest);
rest = new_rest;
Token::unknown(buf.as_str())
} else if matches_cmevla(rest) {
let (buf, new_rest) = eat_cmevla(rest);
rest = new_rest;
Token::cmevla(buf.as_str())
} else if matches_gismu(rest) {
let (buf, new_rest) = eat_gismu(rest);
rest = new_rest;
Token::brivla(buf.as_str())
} else if matches_lujvo(rest) {
let (buf, new_rest) = eat_lujvo(rest);
rest = new_rest;
Token::brivla(buf.as_str())
} else if matches_cmavo(rest) {
let (buf, new_rest) = eat_cmavo(rest);
rest = new_rest;
Token::cmavo(buf.as_str())
} else {
match rest.get(0) {
Some(Cluster {
s: _,
kind: ClusterKind::Number,
}) => {
let (buf, new_rest) = eat_number(rest);
rest = new_rest;
Token::number(buf.as_str())
}
Some(Cluster {
s: _,
kind: ClusterKind::Unknown,
}) => {
let (buf, new_rest) = eat_non_lojban(rest);
rest = new_rest;
Token::unknown(buf.as_str())
}
Some(Cluster {
s: _,
kind: ClusterKind::Whitespace,
}) => {
let (buf, new_rest) = eat_whitespace(rest);
rest = new_rest;
if stripped {
continue;
} else {
Token::whitespace(buf.as_str())
}
}
Some(Cluster { s: _, kind: _ }) => {
let (buf, new_rest) = eat_non_lojban(rest);
rest = new_rest;
Token::unknown(buf.as_str())
}
None => break,
}
});
}
output
}
/// Lex tokens, including whitespace
pub fn lex(src: &str) -> Vec<Token> {
lex_inner(false, src)
}
/// Lex tokens, stripping whitespace
pub fn lex_stripped(src: &str) -> Vec<Token> {
lex_inner(true, src)
}