Tighten up CRLF parsing
Fix lots of false positives with stray \r characters and also fix CRFL parsing in raw string literals with escapes. Closes #49
This commit is contained in:
parent
9dd46d6fbf
commit
5f2c7b4986
106
src/parser.rs
106
src/parser.rs
|
@ -87,12 +87,17 @@ impl<'a> Parser<'a> {
|
||||||
// Returns true and consumes the next character if it matches `ch`,
|
// Returns true and consumes the next character if it matches `ch`,
|
||||||
// otherwise do nothing and return false
|
// otherwise do nothing and return false
|
||||||
fn eat(&mut self, ch: char) -> bool {
|
fn eat(&mut self, ch: char) -> bool {
|
||||||
match self.cur.clone().next() {
|
match self.peek(0) {
|
||||||
Some((_, c)) if c == ch => { self.cur.next(); true }
|
Some((_, c)) if c == ch => { self.cur.next(); true }
|
||||||
Some(_) | None => false,
|
Some(_) | None => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Peeks ahead `n` characters
|
||||||
|
fn peek(&self, n: usize) -> Option<(usize, char)> {
|
||||||
|
self.cur.clone().skip(n).next()
|
||||||
|
}
|
||||||
|
|
||||||
fn expect(&mut self, ch: char) -> bool {
|
fn expect(&mut self, ch: char) -> bool {
|
||||||
if self.eat(ch) { return true }
|
if self.eat(ch) { return true }
|
||||||
let mut it = self.cur.clone();
|
let mut it = self.cur.clone();
|
||||||
|
@ -109,26 +114,38 @@ impl<'a> Parser<'a> {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consumes whitespace ('\t' and ' ') until another character (or EOF) is reached
|
// Consumes whitespace ('\t' and ' ') until another character (or EOF) is
|
||||||
fn ws(&mut self) {
|
// reached. Returns if any whitespace was consumed
|
||||||
|
fn ws(&mut self) -> bool {
|
||||||
|
let mut ret = false;
|
||||||
loop {
|
loop {
|
||||||
match self.cur.clone().next() {
|
match self.peek(0) {
|
||||||
Some((_, '\t')) |
|
Some((_, '\t')) |
|
||||||
Some((_, ' ')) => { self.cur.next(); }
|
Some((_, ' ')) => { self.cur.next(); ret = true; }
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consumes the rest of the line after a comment character
|
// Consumes the rest of the line after a comment character
|
||||||
fn comment(&mut self) {
|
fn comment(&mut self) -> bool {
|
||||||
match self.cur.clone().next() {
|
if !self.eat('#') { return false }
|
||||||
Some((_, '#')) => {}
|
|
||||||
_ => return,
|
|
||||||
}
|
|
||||||
for (_, ch) in self.cur {
|
for (_, ch) in self.cur {
|
||||||
if ch == '\n' { break }
|
if ch == '\n' { break }
|
||||||
}
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consumes a newline if one is next
|
||||||
|
fn newline(&mut self) -> bool {
|
||||||
|
match self.peek(0) {
|
||||||
|
Some((_, '\n')) => { self.cur.next(); true }
|
||||||
|
Some((_, '\r')) if self.peek(1).map(|c| c.1) == Some('\n') => {
|
||||||
|
self.cur.next(); self.cur.next(); true
|
||||||
|
}
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Executes the parser, parsing the string contained within.
|
/// Executes the parser, parsing the string contained within.
|
||||||
|
@ -143,10 +160,9 @@ impl<'a> Parser<'a> {
|
||||||
let mut ret = BTreeMap::new();
|
let mut ret = BTreeMap::new();
|
||||||
loop {
|
loop {
|
||||||
self.ws();
|
self.ws();
|
||||||
match self.cur.clone().next() {
|
if self.newline() { continue }
|
||||||
|
match self.peek(0) {
|
||||||
Some((_, '#')) => { self.comment(); }
|
Some((_, '#')) => { self.comment(); }
|
||||||
Some((_, '\n')) |
|
|
||||||
Some((_, '\r')) => { self.cur.next(); }
|
|
||||||
Some((start, '[')) => {
|
Some((start, '[')) => {
|
||||||
self.cur.next();
|
self.cur.next();
|
||||||
let array = self.eat('[');
|
let array = self.eat('[');
|
||||||
|
@ -201,10 +217,9 @@ impl<'a> Parser<'a> {
|
||||||
fn values(&mut self, into: &mut TomlTable) -> bool {
|
fn values(&mut self, into: &mut TomlTable) -> bool {
|
||||||
loop {
|
loop {
|
||||||
self.ws();
|
self.ws();
|
||||||
|
if self.newline() { continue }
|
||||||
match self.cur.clone().next() {
|
match self.cur.clone().next() {
|
||||||
Some((_, '#')) => self.comment(),
|
Some((_, '#')) => { self.comment(); }
|
||||||
Some((_, '\n')) |
|
|
||||||
Some((_, '\r')) => { self.cur.next(); }
|
|
||||||
Some((_, '[')) => break,
|
Some((_, '[')) => break,
|
||||||
Some((start, _)) => {
|
Some((start, _)) => {
|
||||||
let mut key = String::new();
|
let mut key = String::new();
|
||||||
|
@ -282,7 +297,7 @@ impl<'a> Parser<'a> {
|
||||||
if self.eat('"') {
|
if self.eat('"') {
|
||||||
if self.eat('"') {
|
if self.eat('"') {
|
||||||
multiline = true;
|
multiline = true;
|
||||||
self.eat('\n');
|
self.newline();
|
||||||
} else {
|
} else {
|
||||||
// empty
|
// empty
|
||||||
return Some(Value::String(ret))
|
return Some(Value::String(ret))
|
||||||
|
@ -290,6 +305,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
while self.newline() { ret.push('\n') }
|
||||||
match self.cur.next() {
|
match self.cur.next() {
|
||||||
Some((_, '"')) => {
|
Some((_, '"')) => {
|
||||||
if multiline {
|
if multiline {
|
||||||
|
@ -304,8 +320,6 @@ impl<'a> Parser<'a> {
|
||||||
None => {}
|
None => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some((_, '\n')) |
|
|
||||||
Some((_, '\r')) if multiline => ret.push('\n'),
|
|
||||||
Some((pos, ch)) if ch < '\u{1f}' => {
|
Some((pos, ch)) if ch < '\u{1f}' => {
|
||||||
let mut escaped = String::new();
|
let mut escaped = String::new();
|
||||||
for c in ch.escape_default() {
|
for c in ch.escape_default() {
|
||||||
|
@ -333,6 +347,10 @@ impl<'a> Parser<'a> {
|
||||||
return Some(Value::String(ret));
|
return Some(Value::String(ret));
|
||||||
|
|
||||||
fn escape(me: &mut Parser, pos: usize, multiline: bool) -> Option<char> {
|
fn escape(me: &mut Parser, pos: usize, multiline: bool) -> Option<char> {
|
||||||
|
if multiline && me.newline() {
|
||||||
|
while me.ws() || me.newline() { /* ... */ }
|
||||||
|
return None
|
||||||
|
}
|
||||||
match me.cur.next() {
|
match me.cur.next() {
|
||||||
Some((_, 'b')) => Some('\u{8}'),
|
Some((_, 'b')) => Some('\u{8}'),
|
||||||
Some((_, 't')) => Some('\u{9}'),
|
Some((_, 't')) => Some('\u{9}'),
|
||||||
|
@ -381,17 +399,6 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
Some((_, '\n')) if multiline => {
|
|
||||||
loop {
|
|
||||||
match me.cur.clone().next() {
|
|
||||||
Some((_, '\t')) |
|
|
||||||
Some((_, ' ')) |
|
|
||||||
Some((_, '\n')) => { me.cur.next(); }
|
|
||||||
_ => break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
Some((pos, ch)) => {
|
Some((pos, ch)) => {
|
||||||
let mut escaped = String::new();
|
let mut escaped = String::new();
|
||||||
for c in ch.escape_default() {
|
for c in ch.escape_default() {
|
||||||
|
@ -427,7 +434,7 @@ impl<'a> Parser<'a> {
|
||||||
if self.eat('\'') {
|
if self.eat('\'') {
|
||||||
multiline = true;
|
multiline = true;
|
||||||
if !self.expect('\'') { return None }
|
if !self.expect('\'') { return None }
|
||||||
self.eat('\n');
|
self.newline();
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
@ -568,12 +575,7 @@ impl<'a> Parser<'a> {
|
||||||
fn consume(me: &mut Parser) {
|
fn consume(me: &mut Parser) {
|
||||||
loop {
|
loop {
|
||||||
me.ws();
|
me.ws();
|
||||||
match me.cur.clone().next() {
|
if !me.newline() && !me.comment() { break }
|
||||||
Some((_, '#')) => { me.comment(); }
|
|
||||||
Some((_, '\n')) |
|
|
||||||
Some((_, '\r')) => { me.cur.next(); }
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut type_str = None;
|
let mut type_str = None;
|
||||||
|
@ -932,4 +934,34 @@ trimmed in raw strings.
|
||||||
assert_eq!(table.lookup("fruit.1.variety.0.name").and_then(|k| k.as_str()),
|
assert_eq!(table.lookup("fruit.1.variety.0.name").and_then(|k| k.as_str()),
|
||||||
Some("plantain"));
|
Some("plantain"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stray_cr() {
|
||||||
|
assert!(Parser::new("\r").parse().is_none());
|
||||||
|
assert!(Parser::new("a = [ \r ]").parse().is_none());
|
||||||
|
assert!(Parser::new("a = \"\"\"\r\"\"\"").parse().is_none());
|
||||||
|
assert!(Parser::new("a = \"\"\"\\ \r \"\"\"").parse().is_none());
|
||||||
|
|
||||||
|
let mut p = Parser::new("foo = '''\r'''");
|
||||||
|
let table = Table(p.parse().unwrap());
|
||||||
|
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some("\r"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn many_blank() {
|
||||||
|
let mut p = Parser::new("foo = \"\"\"\n\n\n\"\"\"");
|
||||||
|
let table = Table(p.parse().unwrap());
|
||||||
|
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some("\n\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn literal_eats_crlf() {
|
||||||
|
let mut p = Parser::new("
|
||||||
|
foo = \"\"\"\\\r\n\"\"\"
|
||||||
|
bar = \"\"\"\\\r\n \r\n \r\n a\"\"\"
|
||||||
|
");
|
||||||
|
let table = Table(p.parse().unwrap());
|
||||||
|
assert_eq!(table.lookup("foo").and_then(|k| k.as_str()), Some(""));
|
||||||
|
assert_eq!(table.lookup("bar").and_then(|k| k.as_str()), Some("a"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue