gonna make a reader
commit
384681329a
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
Cargo.lock
|
Binary file not shown.
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "pdf_parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.44"
|
||||
rev_lines = "0.2.1"
|
Binary file not shown.
|
@ -0,0 +1,6 @@
|
|||
%PDF-1.6
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
124332 0 obj
|
||||
<</Linearized 1/L 8995189/O 124334/E 49521/N 756/T 8983957/H [ 2916 7320]>>
|
||||
endobj
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
mod structs;
|
||||
|
||||
use anyhow::Result;
|
||||
use rev_lines::RevLines;
|
||||
use std::collections::HashMap;
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
use std::io::Read;
|
||||
|
||||
enum Token {}
|
||||
|
||||
enum Value {
|
||||
None,
|
||||
Num,
|
||||
Ident(String),
|
||||
String(Vec<u8>),
|
||||
Stream(Vec<u8>),
|
||||
Object(HashMap<String, Box<Value>>),
|
||||
}
|
||||
struct Lexer<'a> {
|
||||
source: &'a [u8],
|
||||
limit: Option<usize>,
|
||||
state: Value,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
fn lex(&mut self) -> Result<Vec<Token>> {
|
||||
let mut output = vec![];
|
||||
|
||||
for c in self.source.iter() {
|
||||
match *c as char {
|
||||
'/' => match self.state {
|
||||
Value::None => todo!(),
|
||||
Value::Num => todo!(),
|
||||
Value::Ident(_) => todo!(),
|
||||
Value::String(_) => todo!(),
|
||||
Value::Stream(_) => todo!(),
|
||||
Value::Object(_) => todo!(),
|
||||
},
|
||||
x => unimplemented!["{}", x],
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
|
||||
// struct Object {
|
||||
// id: u64,
|
||||
// version: u64,
|
||||
// header: HashMap<String, >,
|
||||
// }
|
||||
|
||||
enum State {
|
||||
Start,
|
||||
FoundEof,
|
||||
FoundStartXref,
|
||||
FoundXrefOffset,
|
||||
}
|
||||
|
||||
pub fn parse(file: std::fs::File) -> Result<()> {
|
||||
let mut reader = RevLines::new(BufReader::new(file))?;
|
||||
let mut offset_to_trailer = 0u64;
|
||||
let mut found_eof = false;
|
||||
let mut forwards = false;
|
||||
let mut state = State::Start;
|
||||
|
||||
reader.;
|
||||
|
||||
while let Ok(Some(line)) = if forwards {
|
||||
reader.next_line()
|
||||
} else {
|
||||
reader.prev_line()
|
||||
} {
|
||||
match state {
|
||||
State::Start => {
|
||||
if line.contains("%%EOF") {
|
||||
state = State::FoundEof;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
State::FoundEof => {
|
||||
if line.contains("startxref") {
|
||||
state = State::FoundStartXref;
|
||||
forwards = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
State::FoundStartXref => {
|
||||
state = State::FoundXrefOffset;
|
||||
offset_to_trailer = line.parse::<u64>()?;
|
||||
}
|
||||
State::FoundXrefOffset => {
|
||||
reader.seek();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_a_file() -> Result<()> {
|
||||
let the_pdf = std::fs::File::open("1005.1497.pdf")?;
|
||||
|
||||
parse(the_pdf);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_another_file() -> Result<()> {
|
||||
let the_pdf = std::fs::File::open("PDF32000_2008.pdf")?;
|
||||
|
||||
parse(the_pdf);
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue