Tighten up CRLF parsing

Fix lots of false positives with stray \r characters and also fix CRFL parsing
in raw string literals with escapes.

Closes #49
This commit is contained in:
Alex Crichton 2015-01-15 15:10:07 -08:00
parent 9dd46d6fbf
commit 5f2c7b4986

View file

@ -87,12 +87,17 @@ impl<'a> Parser<'a> {
// Returns true and consumes the next character if it matches `ch`, // Returns true and consumes the next character if it matches `ch`,
// otherwise do nothing and return false // otherwise do nothing and return false
fn eat(&mut self, ch: char) -> bool { fn eat(&mut self, ch: char) -> bool {
match self.cur.clone().next() { match self.peek(0) {
Some((_, c)) if c == ch => { self.cur.next(); true } Some((_, c)) if c == ch => { self.cur.next(); true }
Some(_) | None => false, Some(_) | None => false,
} }
} }
// Peeks ahead `n` characters
fn peek(&self, n: usize) -> Option<(usize, char)> {
self.cur.clone().skip(n).next()
}
fn expect(&mut self, ch: char) -> bool { fn expect(&mut self, ch: char) -> bool {
if self.eat(ch) { return true } if self.eat(ch) { return true }
let mut it = self.cur.clone(); let mut it = self.cur.clone();
@ -109,26 +114,38 @@ impl<'a> Parser<'a> {
false false
} }
// Consumes whitespace ('\t' and ' ') until another character (or EOF) is reached // Consumes whitespace ('\t' and ' ') until another character (or EOF) is
fn ws(&mut self) { // reached. Returns if any whitespace was consumed
fn ws(&mut self) -> bool {
let mut ret = false;
loop { loop {
match self.cur.clone().next() { match self.peek(0) {
Some((_, '\t')) | Some((_, '\t')) |
Some((_, ' ')) => { self.cur.next(); } Some((_, ' ')) => { self.cur.next(); ret = true; }
_ => break, _ => break,
} }
} }
ret
} }
// Consumes the rest of the line after a comment character // Consumes the rest of the line after a comment character
fn comment(&mut self) { fn comment(&mut self) -> bool {
match self.cur.clone().next() { if !self.eat('#') { return false }
Some((_, '#')) => {}
_ => return,
}
for (_, ch) in self.cur { for (_, ch) in self.cur {
if ch == '\n' { break } if ch == '\n' { break }
} }
true
}
// Consumes a newline if one is next
fn newline(&mut self) -> bool {
match self.peek(0) {
Some((_, '\n')) => { self.cur.next(); true }
Some((_, '\r')) if self.peek(1).map(|c| c.1) == Some('\n') => {
self.cur.next(); self.cur.next(); true
}
_ => false
}
} }
/// Executes the parser, parsing the string contained within. /// Executes the parser, parsing the string contained within.
@ -143,10 +160,9 @@ impl<'a> Parser<'a> {
let mut ret = BTreeMap::new(); let mut ret = BTreeMap::new();
loop { loop {
self.ws(); self.ws();
match self.cur.clone().next() { if self.newline() { continue }
match self.peek(0) {
Some((_, '#')) => { self.comment(); } Some((_, '#')) => { self.comment(); }
Some((_, '\n')) |
Some((_, '\r')) => { self.cur.next(); }
Some((start, '[')) => { Some((start, '[')) => {
self.cur.next(); self.cur.next();
let array = self.eat('['); let array = self.eat('[');
@ -201,10 +217,9 @@ impl<'a> Parser<'a> {
fn values(&mut self, into: &mut TomlTable) -> bool { fn values(&mut self, into: &mut TomlTable) -> bool {
loop { loop {
self.ws(); self.ws();
if self.newline() { continue }
match self.cur.clone().next() { match self.cur.clone().next() {
Some((_, '#')) => self.comment(), Some((_, '#')) => { self.comment(); }
Some((_, '\n')) |
Some((_, '\r')) => { self.cur.next(); }
Some((_, '[')) => break, Some((_, '[')) => break,
Some((start, _)) => { Some((start, _)) => {
let mut key = String::new(); let mut key = String::new();
@ -282,7 +297,7 @@ impl<'a> Parser<'a> {
if self.eat('"') { if self.eat('"') {
if self.eat('"') { if self.eat('"') {
multiline = true; multiline = true;
self.eat('\n'); self.newline();
} else { } else {
// empty // empty
return Some(Value::String(ret)) return Some(Value::String(ret))
@ -290,6 +305,7 @@ impl<'a> Parser<'a> {
} }
loop { loop {
while self.newline() { ret.push('\n') }
match self.cur.next() { match self.cur.next() {
Some((_, '"')) => { Some((_, '"')) => {
if multiline { if multiline {
@ -304,8 +320,6 @@ impl<'a> Parser<'a> {
None => {} None => {}
} }
} }
Some((_, '\n')) |
Some((_, '\r')) if multiline => ret.push('\n'),
Some((pos, ch)) if ch < '\u{1f}' => { Some((pos, ch)) if ch < '\u{1f}' => {
let mut escaped = String::new(); let mut escaped = String::new();
for c in ch.escape_default() { for c in ch.escape_default() {
@ -333,6 +347,10 @@ impl<'a> Parser<'a> {
return Some(Value::String(ret)); return Some(Value::String(ret));
fn escape(me: &mut Parser, pos: usize, multiline: bool) -> Option<char> { fn escape(me: &mut Parser, pos: usize, multiline: bool) -> Option<char> {
if multiline && me.newline() {
while me.ws() || me.newline() { /* ... */ }
return None
}
match me.cur.next() { match me.cur.next() {
Some((_, 'b')) => Some('\u{8}'), Some((_, 'b')) => Some('\u{8}'),
Some((_, 't')) => Some('\u{9}'), Some((_, 't')) => Some('\u{9}'),
@ -381,17 +399,6 @@ impl<'a> Parser<'a> {
} }
None None
} }
Some((_, '\n')) if multiline => {
loop {
match me.cur.clone().next() {
Some((_, '\t')) |
Some((_, ' ')) |
Some((_, '\n')) => { me.cur.next(); }
_ => break
}
}
None
}
Some((pos, ch)) => { Some((pos, ch)) => {
let mut escaped = String::new(); let mut escaped = String::new();
for c in ch.escape_default() { for c in ch.escape_default() {
@ -427,7 +434,7 @@ impl<'a> Parser<'a> {
if self.eat('\'') { if self.eat('\'') {
multiline = true; multiline = true;
if !self.expect('\'') { return None } if !self.expect('\'') { return None }
self.eat('\n'); self.newline();
} }
loop { loop {
@ -568,12 +575,7 @@ impl<'a> Parser<'a> {
fn consume(me: &mut Parser) { fn consume(me: &mut Parser) {
loop { loop {
me.ws(); me.ws();
match me.cur.clone().next() { if !me.newline() && !me.comment() { break }
Some((_, '#')) => { me.comment(); }
Some((_, '\n')) |
Some((_, '\r')) => { me.cur.next(); }
_ => break,
}
} }
} }
let mut type_str = None; let mut type_str = None;
@ -932,4 +934,34 @@ trimmed in raw strings.
assert_eq!(table.lookup("fruit.1.variety.0.name").and_then(|k| k.as_str()), assert_eq!(table.lookup("fruit.1.variety.0.name").and_then(|k| k.as_str()),
Some("plantain")); Some("plantain"));
} }
#[test]
fn stray_cr() {
assert!(Parser::new("\r").parse().is_none());
assert!(Parser::new("a = [ \r ]").parse().is_none());
assert!(Parser::new("a = \"\"\"\r\"\"\"").parse().is_none());
assert!(Parser::new("a = \"\"\"\\ \r \"\"\"").parse().is_none());
let mut p = Parser::new("foo = '''\r'''");
let table = Table(p.parse().unwrap());
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some("\r"));
}
#[test]
fn many_blank() {
let mut p = Parser::new("foo = \"\"\"\n\n\n\"\"\"");
let table = Table(p.parse().unwrap());
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some("\n\n"));
}
#[test]
fn literal_eats_crlf() {
let mut p = Parser::new("
foo = \"\"\"\\\r\n\"\"\"
bar = \"\"\"\\\r\n \r\n \r\n a\"\"\"
");
let table = Table(p.parse().unwrap());
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some(""));
assert_eq!(table.lookup("bar").and_then(|k| k.as_str()), Some("a"));
}
} }