From 96ca73e1304d7539f7bfedd84a7a1eb49e569abe Mon Sep 17 00:00:00 2001 From: Claudio Bley Date: Tue, 8 May 2018 23:02:24 +0200 Subject: [PATCH] Treat unicode hex digits case-insensitively In Rust >= 1.24.0 we could have used `char::is_ascii_hexdigit`, but to keep compatiblity with older versions, `char::is_digit(16)` is used. Fixes #240. --- src/tokens.rs | 11 ++++------- test-suite/tests/valid/unicode-escape.json | 5 ++++- test-suite/tests/valid/unicode-escape.toml | 3 +++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/tokens.rs b/src/tokens.rs index bcabd94..3f47f02 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::char; use std::str; use std::string; +use std::string::String as StdString; use self::Token::*; @@ -369,19 +370,15 @@ impl<'a> Tokenizer<'a> { } fn hex(&mut self, start: usize, i: usize, len: usize) -> Result { - let mut val = 0; + let mut buf = StdString::with_capacity(len); for _ in 0..len { match self.one() { - Some((_, ch)) if '0' <= ch && ch <= '9' => { - val = val * 16 + (ch as u32 - '0' as u32); - } - Some((_, ch)) if 'A' <= ch && ch <= 'F' => { - val = val * 16 + (ch as u32 - 'A' as u32) + 10; - } + Some((_, ch)) if ch as u32 <= 0x7F && ch.is_digit(16) => buf.push(ch), Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)), None => return Err(Error::UnterminatedString(start)), } } + let val = u32::from_str_radix(&buf, 16).unwrap(); match char::from_u32(val) { Some(ch) => Ok(ch), None => Err(Error::InvalidEscapeValue(i, val)), diff --git a/test-suite/tests/valid/unicode-escape.json b/test-suite/tests/valid/unicode-escape.json index 32948c6..06fae70 100644 --- a/test-suite/tests/valid/unicode-escape.json +++ b/test-suite/tests/valid/unicode-escape.json @@ -1,5 +1,8 @@ { "answer1": {"type": "string", "value": "\u000B"}, "answer4": {"type": "string", "value": "\u03B4α"}, - "answer8": {"type": "string", "value": "\u03B4β"} + "answer8": {"type": "string", "value": "\u03B4β"}, + "answer9": {"type": "string", "value": "\uc0de"}, + "answer10": {"type": "string", "value": "\u03B4α"}, + "answer11": {"type": "string", "value": "\uABC1"} } diff --git a/test-suite/tests/valid/unicode-escape.toml b/test-suite/tests/valid/unicode-escape.toml index c0d5a25..6654252 100644 --- a/test-suite/tests/valid/unicode-escape.toml +++ b/test-suite/tests/valid/unicode-escape.toml @@ -1,3 +1,6 @@ answer1 = "\u000B" answer4 = "\u03B4α" answer8 = "\U000003B4β" +answer9 = "\uc0de" +answer10 = "\u03b4α" +answer11 = "\U0000abc1"