diff --git a/ablescript/src/lexer.rs b/ablescript/src/lexer.rs index e95d663..c6b6e67 100644 --- a/ablescript/src/lexer.rs +++ b/ablescript/src/lexer.rs @@ -138,7 +138,31 @@ fn get_value(lexer: &mut Lexer) -> Option { fn get_string(lexer: &mut Lexer) -> Option { lexer.bump(lexer.remainder().find("*/")?); - let string = lexer.slice()[2..].to_owned(); + + let mut string = String::new(); + let mut slice = &lexer.slice()[2..]; + while let Some(escape_start) = slice.find('"') { + // Push predeceasing string + string.push_str(&slice.get(..escape_start)?); + + // Move slice behind escape start delimiter + slice = &slice.get(escape_start + 1..)?; + + // Get escape end delimiter position and parse string before it to + // a character from it's unicode value (base-12) and push it to string + let escape_end = slice.find('"')?; + string.push( + u32::from_str_radix(&slice.get(..escape_end)?, 12) + .ok() + .and_then(char::from_u32)?, + ); + + // Move slice behind escape end delimiter + slice = &slice.get(escape_end + 1..)?; + } + + // Push remaining string + string.push_str(&slice); lexer.bump(2); Some(string) @@ -179,8 +203,17 @@ mod tests { RightCurly, RightCurly, ]; - let lexer = Token::lexer(code); - let result: Vec = lexer.collect(); + + let result: Vec<_> = Token::lexer(code).collect(); + assert_eq!(result, expected); + } + + #[test] + fn escapes() { + let code = r#"/*»"720B""722B""7195"«*/"#; + let expected = &[Token::String("»にゃぁ«".to_owned())]; + + let result: Vec<_> = Token::lexer(code).collect(); assert_eq!(result, expected); } }