From 76cf5e496ce1e9964af442ce5a4f1a16219d7765 Mon Sep 17 00:00:00 2001 From: Alex Bethel Date: Mon, 29 Nov 2021 22:16:57 -0700 Subject: [PATCH] Start work on parser --- src/lib.rs | 117 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fc1415d..40e6420 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,20 +81,117 @@ pub struct List {} pub fn parse(source: &str) -> Document { let lines = source.lines().collect::>(); - let mut document = vec![]; - let mut paragraph = vec![]; + let mut document = Vec::new(); - for line in lines { - if line.starts_with('#') { - todo!() - } else if line.is_empty() { - document.push(paragraph.join(" ")); - } else { - paragraph.push(line); + let mut to_parse = &lines[..]; + while let Some((node, remaining)) = parse_node(to_parse) { + document.push(node); + to_parse = &to_parse[remaining..]; + } + + Document(document) +} + +/// Parse a single node from the list of lines, if one exists. Return +/// the node parsed, and the number of lines of input consumed. +fn parse_node(mut lines: &[&str]) -> Option<(Node, usize)> { + if lines.is_empty() { + return None; + } + + while lines[0].is_empty() { + lines = &lines[1..]; + if lines.is_empty() { + return None; } } - todo!() + None.or_else(|| parse_normal_header(lines)) + .or_else(|| parse_underline_header(lines)) + .or_else(|| parse_hrule(lines)) + .or_else(|| parse_paragraph(lines)) +} + +/// Parse a header as a number of "#"-signs followed by the header +/// text. +fn parse_normal_header(lines: &[&str]) -> Option<(Node, usize)> { + if lines[0].starts_with('#') { + let depth = lines[0].chars().take_while(|&c| c == '#').count(); + let text = lines[0][depth..].trim().to_string(); + Some(( + Node::Header(Header { + label: text, + level: depth as u8, + }), + 1, + )) + } else { + None + } +} + +/// Parse a header as a single line of text followed by an underline, +/// i.e. a line of "-" or "=" characters. +fn parse_underline_header(lines: &[&str]) -> Option<(Node, usize)> { + if lines.len() < 2 { + return None; + } + + let text = lines[0]; + let underline = lines[1]; + if !text.is_empty() && !underline.is_empty() { + if underline.chars().all(|c| c == '-') { + Some(( + Node::Header(Header { + label: text.to_string(), + level: 2, + }), + 2, + )) + } else if underline.chars().all(|c| c == '=') { + Some(( + Node::Header(Header { + label: text.to_string(), + level: 1, + }), + 2, + )) + } else { + None + } + } else { + None + } +} + +/// Parse a horizontal rule. +fn parse_hrule(lines: &[&str]) -> Option<(Node, usize)> { + let line = lines[0]; + if !line.is_empty() && (line.chars().all(|c| c == '-') || line.chars().all(|c| c == '=')) { + Some((Node::HRule, 1)) + } else { + None + } +} + +/// Parse a paragraph of text. +fn parse_paragraph(lines: &[&str]) -> Option<(Node, usize)> { + let len = lines.iter().take_while(|line| !line.is_empty()).count(); + let text = lines[0..len].join(" "); + + // TODO: Parse style components from `text` here, and turn it into + // a Vec. + + Some(( + Node::Paragraph(vec![StyledText { + italic: false, + bold: false, + underline: false, + color: None, + text, + }]), + len, + )) } #[cfg(test)]