gonna make a reader

daddy
elfein727 2021-12-22 13:42:32 -08:00
commit 384681329a
8 changed files with 137 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
Cargo.lock

BIN
1005.1497.pdf Normal file

Binary file not shown.

10
Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "pdf_parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.44"
rev_lines = "0.2.1"

BIN
PDF32000_2008.pdf Normal file

Binary file not shown.

6
measure Normal file
View File

@ -0,0 +1,6 @@
%PDF-1.6
%<25><><EFBFBD><EFBFBD>
124332 0 obj
<</Linearized 1/L 8995189/O 124334/E 49521/N 756/T 8983957/H [ 2916 7320]>>
endobj

119
src/lib.rs Normal file
View File

@ -0,0 +1,119 @@
mod structs;
use anyhow::Result;
use rev_lines::RevLines;
use std::collections::HashMap;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
enum Token {}
enum Value {
None,
Num,
Ident(String),
String(Vec<u8>),
Stream(Vec<u8>),
Object(HashMap<String, Box<Value>>),
}
struct Lexer<'a> {
source: &'a [u8],
limit: Option<usize>,
state: Value,
}
impl<'a> Lexer<'a> {
fn lex(&mut self) -> Result<Vec<Token>> {
let mut output = vec![];
for c in self.source.iter() {
match *c as char {
'/' => match self.state {
Value::None => todo!(),
Value::Num => todo!(),
Value::Ident(_) => todo!(),
Value::String(_) => todo!(),
Value::Stream(_) => todo!(),
Value::Object(_) => todo!(),
},
x => unimplemented!["{}", x],
}
}
Ok(output)
}
}
// #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
// struct Object {
// id: u64,
// version: u64,
// header: HashMap<String, >,
// }
enum State {
Start,
FoundEof,
FoundStartXref,
FoundXrefOffset,
}
pub fn parse(file: std::fs::File) -> Result<()> {
let mut reader = RevLines::new(BufReader::new(file))?;
let mut offset_to_trailer = 0u64;
let mut found_eof = false;
let mut forwards = false;
let mut state = State::Start;
reader.;
while let Ok(Some(line)) = if forwards {
reader.next_line()
} else {
reader.prev_line()
} {
match state {
State::Start => {
if line.contains("%%EOF") {
state = State::FoundEof;
continue;
}
}
State::FoundEof => {
if line.contains("startxref") {
state = State::FoundStartXref;
forwards = true;
continue;
}
}
State::FoundStartXref => {
state = State::FoundXrefOffset;
offset_to_trailer = line.parse::<u64>()?;
}
State::FoundXrefOffset => {
reader.seek();
}
}
}
Ok(())
}
#[test]
fn open_a_file() -> Result<()> {
let the_pdf = std::fs::File::open("1005.1497.pdf")?;
parse(the_pdf);
Ok(())
}
#[test]
fn open_another_file() -> Result<()> {
let the_pdf = std::fs::File::open("PDF32000_2008.pdf")?;
parse(the_pdf);
Ok(())
}

0
src/reader.rs Normal file
View File

0
src/structs/mod.rs Normal file
View File