From 935d95ce4f14dc989e3c5fd46018170641797ec1 Mon Sep 17 00:00:00 2001 From: rhysd Date: Tue, 25 Sep 2018 16:33:52 +0900 Subject: [PATCH] Fix multi-line strings are not allowed for key In spec https://github.com/toml-lang/toml#keys Quoted keys are clarified as > he exact same rules as either basic strings or literal strings TOML clearly distinguishes basic string and multi-line basic string (literal string is also). https://github.com/toml-lang/toml#string So table key and quoted key should not allow multi-line basic string and multi-line literal string. ABNF definition also describes that. https://github.com/toml-lang/toml/blob/master/toml.abnf ``` string = ml-basic-string / basic-string / ml-literal-string / literal-string quoted-key = basic-string / literal-string ``` `string` contains `ml-*` but `quoted-key` doesn't. --- src/de.rs | 8 +++++ src/tokens.rs | 70 +++++++++++++++++++++----------------- test-suite/tests/parser.rs | 6 ++++ 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/de.rs b/src/de.rs index 517a62b..491ae47 100644 --- a/src/de.rs +++ b/src/de.rs @@ -152,6 +152,9 @@ enum ErrorKind { /// An empty table key was found. EmptyTableKey, + /// Multiline strings are not allowed for key + MultilineStringKey, + /// A custom error which could be generated when deserializing a particular /// type. Custom, @@ -1274,6 +1277,9 @@ impl<'a> Deserializer<'a> { TokenError::EmptyTableKey(at) => { self.error(at, ErrorKind::EmptyTableKey) } + TokenError::MultilineStringKey(at) => { + self.error(at, ErrorKind::MultilineStringKey) + } } } @@ -1377,6 +1383,7 @@ impl fmt::Display for Error { } ErrorKind::RedefineAsArray => "table redefined as array".fmt(f)?, ErrorKind::EmptyTableKey => "empty table key found".fmt(f)?, + ErrorKind::MultilineStringKey => "multiline strings are not allowed for key".fmt(f)?, ErrorKind::Custom => self.inner.message.fmt(f)?, ErrorKind::ExpectedString => "expected string".fmt(f)?, ErrorKind::DottedKeyInvalidType => "dotted key attempted to extend non-table type".fmt(f)?, @@ -1421,6 +1428,7 @@ impl error::Error for Error { ErrorKind::DuplicateTable(_) => "duplicate table", ErrorKind::RedefineAsArray => "table redefined as array", ErrorKind::EmptyTableKey => "empty table key found", + ErrorKind::MultilineStringKey => "invalid multiline string for key", ErrorKind::Custom => "a custom error", ErrorKind::ExpectedString => "expected string", ErrorKind::DottedKeyInvalidType => "dotted key invalid type", diff --git a/src/tokens.rs b/src/tokens.rs index 79aeb60..15c3b41 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -38,7 +38,7 @@ pub enum Token<'a> { RightBracket, Keylike(&'a str), - String { src: &'a str, val: Cow<'a, str> }, + String { src: &'a str, val: Cow<'a, str>, multiline: bool }, } #[derive(Eq, PartialEq, Debug)] @@ -51,6 +51,7 @@ pub enum Error { Unexpected(usize, char), UnterminatedString(usize), NewlineInTableKey(usize), + MultilineStringKey(usize), EmptyTableKey(usize), Wanted { at: usize, expected: &'static str, found: &'static str }, } @@ -169,8 +170,11 @@ impl<'a> Tokenizer<'a> { let current = self.current(); match self.next()? { Some((span, Token::Keylike(k))) => Ok((span, k.into())), - Some((span, Token::String { src, val })) => { + Some((span, Token::String { src, val, multiline })) => { let offset = self.substr_offset(src); + if multiline { + return Err(Error::MultilineStringKey(offset)) + } if val == "" { return Err(Error::EmptyTableKey(offset)) } @@ -286,6 +290,7 @@ impl<'a> Tokenizer<'a> { return Ok(String { src: &self.input[start..start+2], val: Cow::Borrowed(""), + multiline: false, }) } } @@ -321,6 +326,7 @@ impl<'a> Tokenizer<'a> { return Ok(String { src: &self.input[start..self.current()], val: val.into_cow(&self.input[..i]), + multiline: multiline, }) } Some((i, c)) => new_ch(self, &mut val, multiline, i, c)?, @@ -497,7 +503,7 @@ impl<'a> Token<'a> { Token::LeftBrace => "a left brace", Token::RightBracket => "a right bracket", Token::LeftBracket => "a left bracket", - Token::String { .. } => "a string", + Token::String { multiline, .. } => if multiline { "a multiline string" } else { "a string" }, Token::Colon => "a colon", Token::Plus => "a plus", } @@ -518,56 +524,58 @@ mod tests { #[test] fn literal_strings() { - fn t(input: &str, val: &str) { + fn t(input: &str, val: &str, multiline: bool) { let mut t = Tokenizer::new(input); let (_, token) = t.next().unwrap().unwrap(); assert_eq!(token, Token::String { src: input, val: Cow::Borrowed(val), + multiline: multiline, }); assert!(t.next().unwrap().is_none()); } - t("''", ""); - t("''''''", ""); - t("'''\n'''", ""); - t("'a'", "a"); - t("'\"a'", "\"a"); - t("''''a'''", "'a"); - t("'''\n'a\n'''", "'a\n"); - t("'''a\n'a\r\n'''", "a\n'a\n"); + t("''", "", false); + t("''''''", "", true); + t("'''\n'''", "", true); + t("'a'", "a", false); + t("'\"a'", "\"a", false); + t("''''a'''", "'a", true); + t("'''\n'a\n'''", "'a\n", true); + t("'''a\n'a\r\n'''", "a\n'a\n", true); } #[test] fn basic_strings() { - fn t(input: &str, val: &str) { + fn t(input: &str, val: &str, multiline: bool) { let mut t = Tokenizer::new(input); let (_, token) = t.next().unwrap().unwrap(); assert_eq!(token, Token::String { src: input, val: Cow::Borrowed(val), + multiline: multiline, }); assert!(t.next().unwrap().is_none()); } - t(r#""""#, ""); - t(r#""""""""#, ""); - t(r#""a""#, "a"); - t(r#""""a""""#, "a"); - t(r#""\t""#, "\t"); - t(r#""\u0000""#, "\0"); - t(r#""\U00000000""#, "\0"); - t(r#""\U000A0000""#, "\u{A0000}"); - t(r#""\\t""#, "\\t"); - t("\"\"\"\\\n\"\"\"", ""); - t("\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", ""); - t(r#""\r""#, "\r"); - t(r#""\n""#, "\n"); - t(r#""\b""#, "\u{8}"); - t(r#""a\fa""#, "a\u{c}a"); - t(r#""\"a""#, "\"a"); - t("\"\"\"\na\"\"\"", "a"); - t("\"\"\"\n\"\"\"", ""); + t(r#""""#, "", false); + t(r#""""""""#, "", true); + t(r#""a""#, "a", false); + t(r#""""a""""#, "a", true); + t(r#""\t""#, "\t", false); + t(r#""\u0000""#, "\0", false); + t(r#""\U00000000""#, "\0", false); + t(r#""\U000A0000""#, "\u{A0000}", false); + t(r#""\\t""#, "\\t", false); + t("\"\"\"\\\n\"\"\"", "", true); + t("\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", "", true); + t(r#""\r""#, "\r", false); + t(r#""\n""#, "\n", false); + t(r#""\b""#, "\u{8}", false); + t(r#""a\fa""#, "a\u{c}a", false); + t(r#""\"a""#, "\"a", false); + t("\"\"\"\na\"\"\"", "a", true); + t("\"\"\"\n\"\"\"", "", true); err(r#""\a"#, Error::InvalidEscape(2, 'a')); err("\"\\\n", Error::InvalidEscape(2, '\n')); err("\"\\\r\n", Error::InvalidEscape(2, '\n')); diff --git a/test-suite/tests/parser.rs b/test-suite/tests/parser.rs index 2282416..2534ce6 100644 --- a/test-suite/tests/parser.rs +++ b/test-suite/tests/parser.rs @@ -275,6 +275,10 @@ fn bad_keys() { "\"\"|=3".parse::().unwrap_err(); "\"\n\"|=3".parse::().unwrap_err(); "\"\r\"|=3".parse::().unwrap_err(); + "''''''=3".parse::().unwrap_err(); + "\"\"\"\"\"\"=3".parse::().unwrap_err(); + "'''key'''=3".parse::().unwrap_err(); + "\"\"\"key\"\"\"=3".parse::().unwrap_err(); } #[test] @@ -290,6 +294,8 @@ fn bad_table_names() { "[']".parse::().unwrap_err(); "[''']".parse::().unwrap_err(); "['''''']".parse::().unwrap_err(); + "['''foo''']".parse::().unwrap_err(); + "[\"\"\"bar\"\"\"]".parse::().unwrap_err(); "['\n']".parse::().unwrap_err(); "['\r\n']".parse::().unwrap_err(); }