From 90f8137b0daefcd4e7fd42d37b90205fc8a31547 Mon Sep 17 00:00:00 2001 From: Erin Date: Sun, 24 Apr 2022 23:29:15 +0200 Subject: [PATCH 1/3] Implemented string unicode escapes --- ablescript/src/lexer.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/ablescript/src/lexer.rs b/ablescript/src/lexer.rs index e95d663c..5271724a 100644 --- a/ablescript/src/lexer.rs +++ b/ablescript/src/lexer.rs @@ -138,7 +138,24 @@ fn get_value(lexer: &mut Lexer) -> Option { fn get_string(lexer: &mut Lexer) -> Option { lexer.bump(lexer.remainder().find("*/")?); - let string = lexer.slice()[2..].to_owned(); + + let mut string = String::new(); + let mut slice = &lexer.slice()[2..]; + while let Some(escape_start) = slice.find('"') { + string.push_str(&slice.get(..escape_start)?); + slice = &slice.get(escape_start + 1..)?; + + let escape_end = slice.find('"')?; + string.push( + u32::from_str_radix(&slice.get(..escape_end)?, 12) + .ok() + .and_then(char::from_u32)?, + ); + + slice = &slice.get(escape_end + 1..)?; + } + + string.push_str(&slice); lexer.bump(2); Some(string) From c6f4aaef24aaafdecadd75acff52ee2389521c56 Mon Sep 17 00:00:00 2001 From: Erin Date: Sun, 24 Apr 2022 23:33:11 +0200 Subject: [PATCH 2/3] Added some comments --- ablescript/src/lexer.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ablescript/src/lexer.rs b/ablescript/src/lexer.rs index 5271724a..61da475e 100644 --- a/ablescript/src/lexer.rs +++ b/ablescript/src/lexer.rs @@ -142,9 +142,14 @@ fn get_string(lexer: &mut Lexer) -> Option { let mut string = String::new(); let mut slice = &lexer.slice()[2..]; while let Some(escape_start) = slice.find('"') { + // Push predeceasing string string.push_str(&slice.get(..escape_start)?); + + // Move slice behind escape start delimiter slice = &slice.get(escape_start + 1..)?; + // Get escape end delimiter position and parse string before it to + // a character from it's unicode value (base-12) and push it to string let escape_end = slice.find('"')?; string.push( u32::from_str_radix(&slice.get(..escape_end)?, 12) @@ -152,9 +157,11 @@ fn get_string(lexer: &mut Lexer) -> Option { .and_then(char::from_u32)?, ); + // Move slice behind escape end delimiter slice = &slice.get(escape_end + 1..)?; } + // Push remaining string string.push_str(&slice); lexer.bump(2); From 19744aa63a2ac1b2f4f74d60894bf2847e1076e8 Mon Sep 17 00:00:00 2001 From: Erin Date: Sun, 24 Apr 2022 23:47:51 +0200 Subject: [PATCH 3/3] Added escapes test --- ablescript/src/lexer.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ablescript/src/lexer.rs b/ablescript/src/lexer.rs index 61da475e..c6b6e670 100644 --- a/ablescript/src/lexer.rs +++ b/ablescript/src/lexer.rs @@ -203,8 +203,17 @@ mod tests { RightCurly, RightCurly, ]; - let lexer = Token::lexer(code); - let result: Vec = lexer.collect(); + + let result: Vec<_> = Token::lexer(code).collect(); + assert_eq!(result, expected); + } + + #[test] + fn escapes() { + let code = r#"/*»"720B""722B""7195"«*/"#; + let expected = &[Token::String("»にゃぁ«".to_owned())]; + + let result: Vec<_> = Token::lexer(code).collect(); assert_eq!(result, expected); } }