pdf_parser/src/lib.rs

122 lines
2.1 KiB
Rust

mod structs;
use anyhow::Result;
use rev_lines::RevLines;
use std::collections::HashMap;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
mod reader;
enum Token {}
enum Value {
None,
Num,
Ident(String),
String(Vec<u8>),
Stream(Vec<u8>),
Object(HashMap<String, Box<Value>>),
}
struct Lexer<'a> {
source: &'a [u8],
limit: Option<usize>,
state: Value,
}
impl<'a> Lexer<'a> {
fn lex(&mut self) -> Result<Vec<Token>> {
let mut output = vec![];
for c in self.source.iter() {
match *c as char {
'/' => match self.state {
Value::None => todo!(),
Value::Num => todo!(),
Value::Ident(_) => todo!(),
Value::String(_) => todo!(),
Value::Stream(_) => todo!(),
Value::Object(_) => todo!(),
},
x => unimplemented!["{}", x],
}
}
Ok(output)
}
}
// #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
// struct Object {
// id: u64,
// version: u64,
// header: HashMap<String, >,
// }
enum State {
Start,
FoundEof,
FoundStartXref,
FoundXrefOffset,
}
pub fn parse(file: std::fs::File) -> Result<()> {
let mut reader = RevLines::new(BufReader::new(file))?;
let mut offset_to_trailer = 0u64;
let mut found_eof = false;
let mut forwards = false;
let mut state = State::Start;
// reader.;
// while let Ok(Some(line)) = if forwards {
// reader.next_line()
// } else {
// reader.prev_line()
// } {
// match state {
// State::Start => {
// if line.contains("%%EOF") {
// state = State::FoundEof;
// continue;
// }
// }
// State::FoundEof => {
// if line.contains("startxref") {
// state = State::FoundStartXref;
// forwards = true;
// continue;
// }
// }
// State::FoundStartXref => {
// state = State::FoundXrefOffset;
// offset_to_trailer = line.parse::<u64>()?;
// }
// State::FoundXrefOffset => {
// reader.seek();
// }
// }
// }
Ok(())
}
#[test]
fn open_a_file() -> Result<()> {
let the_pdf = std::fs::File::open("1005.1497.pdf")?;
parse(the_pdf);
Ok(())
}
#[test]
fn open_another_file() -> Result<()> {
let the_pdf = std::fs::File::open("PDF32000_2008.pdf")?;
parse(the_pdf);
Ok(())
}