Merge pull request 'Added String character escaping by dozenal " delimited unicode values' (#9) from feature/string-unicode-escapes into master

Reviewed-on: https://git.ablecorp.us:443/AbleScript/able-script/pulls/9
This commit is contained in:
able 2022-04-25 12:48:07 +00:00
commit f99aad0152

View file

@ -138,7 +138,31 @@ fn get_value<T: std::str::FromStr>(lexer: &mut Lexer<Token>) -> Option<T> {
fn get_string(lexer: &mut Lexer<Token>) -> Option<String> { fn get_string(lexer: &mut Lexer<Token>) -> Option<String> {
lexer.bump(lexer.remainder().find("*/")?); lexer.bump(lexer.remainder().find("*/")?);
let string = lexer.slice()[2..].to_owned();
let mut string = String::new();
let mut slice = &lexer.slice()[2..];
while let Some(escape_start) = slice.find('"') {
// Push predeceasing string
string.push_str(&slice.get(..escape_start)?);
// Move slice behind escape start delimiter
slice = &slice.get(escape_start + 1..)?;
// Get escape end delimiter position and parse string before it to
// a character from it's unicode value (base-12) and push it to string
let escape_end = slice.find('"')?;
string.push(
u32::from_str_radix(&slice.get(..escape_end)?, 12)
.ok()
.and_then(char::from_u32)?,
);
// Move slice behind escape end delimiter
slice = &slice.get(escape_end + 1..)?;
}
// Push remaining string
string.push_str(&slice);
lexer.bump(2); lexer.bump(2);
Some(string) Some(string)
@ -179,8 +203,17 @@ mod tests {
RightCurly, RightCurly,
RightCurly, RightCurly,
]; ];
let lexer = Token::lexer(code);
let result: Vec<Token> = lexer.collect(); let result: Vec<_> = Token::lexer(code).collect();
assert_eq!(result, expected);
}
#[test]
fn escapes() {
let code = r#"/*»"720B""722B""7195"«*/"#;
let expected = &[Token::String("»にゃぁ«".to_owned())];
let result: Vec<_> = Token::lexer(code).collect();
assert_eq!(result, expected); assert_eq!(result, expected);
} }
} }