Working on epub generation. Got to work on setup_html2xhtml() next.

This commit is contained in:
NA 2024-01-26 03:17:50 +00:00
parent 71763b7f07
commit 0bfcfd496f
11 changed files with 472 additions and 45 deletions

View file

@ -2,16 +2,21 @@
"cSpell.words": [ "cSpell.words": [
"archiver", "archiver",
"Audiobook", "Audiobook",
"dogshit",
"epub", "epub",
"stylesheet",
"Webnovel" "Webnovel"
], ],
"rust-analyzer.showUnlinkedFileNotification": false, "rust-analyzer.showUnlinkedFileNotification": false,
"cSpell.ignoreWords": [ "cSpell.ignoreWords": [
"TMPDIR",
"autotools", "autotools",
"chrono", "chrono",
"indicatif", "indicatif",
"reftype",
"reqwest", "reqwest",
"royalroad", "royalroad",
"tempdir",
"ureq" "ureq"
] ]
} }

140
Cargo.lock generated
View file

@ -279,6 +279,21 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]] [[package]]
name = "cssparser" name = "cssparser"
version = "0.31.2" version = "0.31.2"
@ -302,6 +317,15 @@ dependencies = [
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.17" version = "0.99.17"
@ -349,6 +373,23 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "epub-builder"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6fcc8fc7b93c7001e0d47c269aa5a30a78a1f44692dc09cc9d0f781378545e1"
dependencies = [
"chrono",
"eyre",
"html-escape",
"log",
"once_cell",
"tempfile",
"upon",
"uuid",
"zip",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.1" version = "1.0.1"
@ -365,12 +406,32 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "eyre"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6267a1fa6f59179ea4afc8e50fd8612a3cc60bc858f786ff877a4a8cb042799"
dependencies = [
"indenter",
"once_cell",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.0.1" version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -531,6 +592,15 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
[[package]]
name = "html-escape"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
dependencies = [
"utf8-width",
]
[[package]] [[package]]
name = "html2md" name = "html2md"
version = "0.2.14" version = "0.2.14"
@ -663,6 +733,12 @@ dependencies = [
"unicode-normalization", "unicode-normalization",
] ]
[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.1.0" version = "2.1.0"
@ -1081,6 +1157,12 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.17" version = "0.2.17"
@ -1224,6 +1306,7 @@ dependencies = [
"bytes", "bytes",
"chrono", "chrono",
"clap", "clap",
"epub-builder",
"html2md", "html2md",
"indicatif", "indicatif",
"path-slash", "path-slash",
@ -1549,6 +1632,24 @@ dependencies = [
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "time"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
dependencies = [
"deranged",
"powerfmt",
"serde",
"time-core",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.6.0" version = "1.6.0"
@ -1662,6 +1763,17 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "upon"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21a9260fe394dfd8ab204a8eab40f88eb9a331bb852147d24fc0aff6b30daa02"
dependencies = [
"serde",
"unicode-ident",
"unicode-width",
]
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.0" version = "2.5.0"
@ -1679,12 +1791,27 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
[[package]] [[package]]
name = "utf8parse" name = "utf8parse"
version = "0.2.1" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "uuid"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.15" version = "0.2.15"
@ -2010,3 +2137,16 @@ dependencies = [
"quote", "quote",
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "zip"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261"
dependencies = [
"byteorder",
"crc32fast",
"crossbeam-utils",
"flate2",
"time",
]

View file

@ -17,6 +17,7 @@ path = "src/binary.rs"
bytes = "1.5.0" bytes = "1.5.0"
chrono = "0.4.33" chrono = "0.4.33"
clap = { version = "4.4.18", features = ["derive"] } clap = { version = "4.4.18", features = ["derive"] }
epub-builder = "0.7.4"
html2md = "0.2.14" html2md = "0.2.14"
indicatif = "0.17.7" indicatif = "0.17.7"
path-slash = "0.2.1" path-slash = "0.2.1"

View file

@ -43,10 +43,10 @@ fn main() {
let output_directory: PathBuf; let output_directory: PathBuf;
match cli_input.output_directory { match cli_input.output_directory {
Some(output_directory_input) => { Some(output_directory_input) => {
output_directory = Path::new(&output_directory_input).to_path_buf(); output_directory = PathBuf::from(&output_directory_input);
}, },
None => { None => {
output_directory = env::current_dir().unwrap().as_path().to_path_buf(); output_directory = env::current_dir().unwrap();
} }
} }
@ -61,7 +61,9 @@ fn main() {
} }
} }
// Check if the directory exists and is writeable. Creates one if not. /// Check if the directory exists and is writeable. Creates one if not.
///
/// Exits the program of failure.
fn valid_directory_check(output_directory: &Path) { fn valid_directory_check(output_directory: &Path) {
// Check if the directory exists, if it does not; attempt to create one. // Check if the directory exists, if it does not; attempt to create one.
if !output_directory.exists() { if !output_directory.exists() {
@ -95,7 +97,7 @@ fn valid_url_check(book_url: &str) -> Url {
} }
}, },
Err(error) => { Err(error) => {
eprintln!("Error! Unable to parse url: {error}"); eprintln!("Error! Unable to parse url: {book_url}\n{error}");
exit(1); exit(1);
} }
} }

View file

@ -1,24 +1,29 @@
use std::collections::HashMap; use std::collections::HashMap;
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use crate::misc::HashMapExt;
use scraper::Html; use scraper::Html;
use url::Url; use url::Url;
use crate::{html, http}; use crate::{file_system_crap::remove_illegal_chars, html, http};
/// A struct representing a book & all the needed data to generate one. /// A struct representing a book & all the needed data to generate one.
pub struct Book { pub struct Book {
/// The RoyalRoad Url for the book. /// The RoyalRoad Url for the book.
book_url: Url, pub book_url: Url,
/// The book's title. /// The book's title.
pub title: String, pub title: String,
/// Book title used for the filename.
/// Should have illegal chars expunged via file_system_crap::remove_illegal_chars.
pub file_name_title: String,
/// The book's author. /// The book's author.
pub author: String, pub author: String,
/// A Url to the book's cover image. /// A Url to the book's cover image.
cover_image_url: Url, pub cover_image_url: Url,
/// The raw html data of the RoyalRoad index page. /// The raw html data of the RoyalRoad index page.
index_html: Html, index_html: Html,
@ -27,7 +32,7 @@ pub struct Book {
pub chapters: Vec<Chapter>, pub chapters: Vec<Chapter>,
/// A hashmap representing the book image urls and their corresponding img html tags. /// A hashmap representing the book image urls and their corresponding img html tags.
image_urls: HashMap<Url, Vec<String>>, pub image_urls_and_tags: HashMap<Url, Vec<String>>,
} }
impl Book { impl Book {
@ -36,10 +41,9 @@ impl Book {
let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text()); let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html); let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len()); let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
let mut image_urls: HashMap<Url, Vec<String>> = HashMap::new(); let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
println!("\nDownloading and processing chapters:"); println!("\nDownloading and processing chapters:");
// Spawn a progress bar showing how many chapters have been downloaded & processed. // Spawn a progress bar showing how many chapters have been downloaded & processed.
@ -54,8 +58,8 @@ impl Book {
for i in 0..chapter_names_and_urls.len() { for i in 0..chapter_names_and_urls.len() {
let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]); let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
// extract the image urls and add em to the image_urls hashmap. // extract the image urls and add em to the image_urls_and_tags hashmap.
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html));
chapters.push(chapter); chapters.push(chapter);
@ -64,14 +68,17 @@ impl Book {
progress_bar.finish(); progress_bar.finish();
let title = html::get_title_from_index(&index_html);
Book { Book {
book_url: book_url, book_url: book_url,
title: html::get_title_from_index(&index_html), title: title.clone(),
file_name_title: remove_illegal_chars(title),
author: html::get_author_from_index(&index_html), author: html::get_author_from_index(&index_html),
cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)), cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
index_html: index_html, index_html: index_html,
chapters: chapters, chapters: chapters,
image_urls: image_urls, image_urls_and_tags,
} }
} }
@ -90,7 +97,7 @@ pub struct Chapter {
/// The name of the chapter. /// The name of the chapter.
pub chapter_name: String, pub chapter_name: String,
/// The raw html data of the page. /// The raw html data of the chapter page.
raw_chapter_html: Html, raw_chapter_html: Html,
/// The isolated chapter html. /// The isolated chapter html.
@ -105,13 +112,8 @@ impl Chapter {
Chapter { Chapter {
chapter_url: chapter_url, chapter_url: chapter_url,
chapter_name: chapter_name.to_string(), chapter_name: chapter_name.to_string(),
raw_chapter_html: raw_chapter_html.clone(), isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html),
isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html) raw_chapter_html: raw_chapter_html,
} }
} }
}
// TODO!
struct BookCss {
} }

110
src/constants.rs Normal file
View file

@ -0,0 +1,110 @@
pub const EPUB_XML_HEAD: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">"#;
pub const EPUB_XML_TAIL: &str = r#"</html>"#;
pub const EPUB_CSS: &str = r#"
/*
html5doctor.com Reset Stylesheet
v1.6.1
Last Updated: 2010-09-17
Author: Richard Clark - http://richclarkdesign.com
Twitter: @rich_clark
*/
html, body, div, span, object, iframe,
h1, h2, h3, h4, h5, h6, p, blockquote, pre,
abbr, address, cite, code,
del, dfn, em, img, ins, kbd, q, samp,
small, strong, sub, sup, var,
b, i,
dl, dt, dd, ol, ul, li,
fieldset, form, label, legend,
table, caption, tbody, tfoot, thead, tr, th, td,
article, aside, canvas, details, figcaption, figure,
footer, header, hgroup, menu, nav, section, summary,
time, mark, audio, video {
margin:0;
padding:0;
border:0;
outline:0;
font-size:100%;
vertical-align:baseline;
background:transparent;
}
body {
line-height:1;
}
article,aside,details,figcaption,figure,
footer,header,hgroup,menu,nav,section {
display:block;
}
nav ul {
list-style:none;
}
blockquote, q {
quotes:none;
}
blockquote:before, blockquote:after,
q:before, q:after {
content:'';
content:none;
}
a {
margin:0;
padding:0;
font-size:100%;
vertical-align:baseline;
background:transparent;
}
/* change colours to suit your needs */
ins {
background-color:#ff9;
color:#000;
text-decoration:none;
}
/* change colours to suit your needs */
mark {
background-color:#ff9;
color:#000;
font-style:italic;
font-weight:bold;
}
del {
text-decoration: line-through;
}
abbr[title], dfn[title] {
border-bottom:1px dotted;
cursor:help;
}
table {
border-collapse:collapse;
border-spacing:0;
}
/* change border colour to suit your needs */
hr {
display:block;
height:1px;
border:0;
border-top:1px solid #cccccc;
margin:1em 0;
padding:0;
}
input, select {
vertical-align:middle;
}
"#;

50
src/file_system_crap.rs Normal file
View file

@ -0,0 +1,50 @@
use std::path::PathBuf;
use path_slash::PathBufExt as _;
/// Converts a given path to windows style if needed.
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
// If target os is windows.
#[cfg(target_os = "windows")] {
return PathBuf::from_slash_lossy(path.into_os_string());
}
// If target os is not windows.
#[cfg(not(target_os = "windows"))] {
return PathBuf::from_backslash_lossy(path.into_os_string());
}
}
/// Remove chars that are illegal to be used in filenames on both unix & windows.
pub fn remove_illegal_chars(mut string: String) -> String {
const ILLEGAL_CHARS: [char; 9] = ['/', '\u{005C}', '<', '>', ':', '\u{0022}', '|', '?', '*'];
for char in ILLEGAL_CHARS {
string = string.replace(char, " ");
}
return string;
}
/// Setup html2xhtml in the operating system's temp directory.
pub fn setup_html2xhtml() {
#[cfg(target_os = "windows")] {
//TODO!
// Thinking of using C:\Users\<username>\AppData\Local\Temp\html2xhtml-windows
}
#[cfg(target_os = "linux")] {
// TODO!
// Thinking of using /tmp/html2xhtml-linux
}
#[cfg(target_os = "macos")] {
// TODO!
// You can find the macos tempdir by doing: echo $TMPDIR
}
}
/// Delete html2xhtml from the operating system's temp directory.
pub fn delete_html2xhtml() {
// TODO!
}

View file

@ -4,6 +4,8 @@ use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use url::Url; use url::Url;
use crate::misc::HashMapExt;
/// Convert a string to an html document. /// Convert a string to an html document.
pub fn string_to_html_document(document_string: &str) -> Html { pub fn string_to_html_document(document_string: &str) -> Html {
Html::parse_document(document_string) Html::parse_document(document_string)
@ -123,7 +125,7 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String;
} }
/// Isolate chapter content from the rest of the shit on the page. /// Isolate chapter content from the rest of the shit on the page.
pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html { pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html {
let page_html = Html::parse_document(&raw_chapter_html.html()); let page_html = Html::parse_document(&raw_chapter_html.html());
let selector = Selector::parse("div").unwrap(); let selector = Selector::parse("div").unwrap();
@ -142,7 +144,7 @@ pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html {
} }
/// Remove all img tags from the html fragment. /// Remove all img tags from the html fragment.
pub fn remove_image_tags(html_fragment: Html) -> String { pub fn remove_image_tags(html_fragment: &Html) -> String {
let mut image_tags: Vec<String> = Vec::new(); let mut image_tags: Vec<String> = Vec::new();
let selector = Selector::parse("img").unwrap(); let selector = Selector::parse("img").unwrap();
@ -161,10 +163,29 @@ pub fn remove_image_tags(html_fragment: Html) -> String {
return html_fragment; return html_fragment;
} }
pub fn extract_urls_and_imgs_tag(chapter_html: Html) -> HashMap<Url, Vec<String>> { /// Extract the urls and image tags from a chapter and put them in the hashmap:
/// ``Hashmap<Url, Vec<String>>``
pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>> {
let mut chapter_image_urls: HashMap<Url, Vec<String>> = HashMap::new(); let mut chapter_image_urls: HashMap<Url, Vec<String>> = HashMap::new();
let selector = Selector::parse("img").unwrap();
for element in chapter_html.select(&selector) {
let url = element.attr("src");
let image_tag = element.html();
if url.is_none() { continue; }
let url = match Url::parse(url.unwrap()) {
Ok(url) => url,
Err(error) => {
eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{error}");
continue;
},
};
let temp_map: HashMap<Url, Vec<String>> = HashMap::from([(url, vec![image_tag])]);
chapter_image_urls = chapter_image_urls.join(temp_map);
}
return chapter_image_urls; return chapter_image_urls;
} }

View file

@ -56,7 +56,7 @@ pub fn string_to_url(url: &str) -> Url {
match Url::parse(url) { match Url::parse(url) {
Ok(url) => url, Ok(url) => url,
Err(error) => { Err(error) => {
eprintln!("Error! Unable to parse: {url} into a valid url."); eprintln!("Error! Unable to parse: {url} into a valid url.\n{error}");
exit(1); exit(1);
} }
} }

View file

@ -2,12 +2,16 @@ use std::{fs::OpenOptions, io::Write, path::PathBuf, process::exit};
use chrono::prelude::Local; use chrono::prelude::Local;
use clap::Args; use clap::Args;
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
use file_system_crap::convert_path_to_os_specific;
use url::Url; use url::Url;
mod book; mod book;
mod constants;
mod file_system_crap;
mod html; mod html;
mod http; mod http;
mod misc;
/// struct that corresponds to arguments for Audiobook generation. /// struct that corresponds to arguments for Audiobook generation.
#[derive(Args, Debug)] #[derive(Args, Debug)]
@ -61,7 +65,80 @@ pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_d
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) { pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) {
// Until xhtml is working on MacOS this notice & exit code will remain.
// See file_system_crap::setup_html2xhtml() for current status on MacOS support for this mode.
#[cfg(target_os = "macos")] {
eprint!("Error! This mode does not currently support MacOS. Try either html mode or markdown mode.");
exit(1);
}
let book = book::Book::new(book_url); let book = book::Book::new(book_url);
// Initialize the epub builder.
let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
// Add author and title metadata.
epub_builder.stylesheet(constants::EPUB_CSS.as_bytes()).unwrap(); // Use the epub_css in the constants.rs file.
epub_builder
.metadata("author", &book.author)
.expect("Unable to add author metadata");
epub_builder
.metadata("title", &book.title)
.expect("Unable to add title metadata");
// Download the cover image & add it to the epub.
let cover_image = http::get_response(book.cover_image_url).get_bytes().to_vec();
epub_builder.add_cover_image("cover.jpeg", cover_image.as_slice(), "image/jpeg").expect("Unable to add cover image.");
// Generate the cover xhtml.
let cover_xhtml = format!(
r#"<head></head><body><div style="text-align: center;">
<h1><a href="{0}">{1}</a></h1>
<img src="cover.jpeg"/>
<h2>by: {2}</h2>
<h3>Archived on: {3}</h3></div></body>"#,
book.book_url,
book.title,
book.author,
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
);
let cover_xhtml = format!("{0}{cover_xhtml}{1}", constants::EPUB_XML_HEAD, constants::EPUB_XML_TAIL);
// Add the cover xhtml to the epub.
epub_builder.add_content(
EpubContent::new("title.xhtml", cover_xhtml.as_bytes())
.title("Cover")
.reftype(ReferenceType::Cover),
).expect("Unable to add cover");
// Add a table of contents after the cover page.
epub_builder.inline_toc();
// Setup html2xhtml on the operating system.
file_system_crap::setup_html2xhtml();
// TODO! Generate the epub body, deal with images etc etc. You know pickup from last night etc etc.
// Finish setup_html2xhtml() first though dummy.
// Generate the finished epub data as a byte vector.
let mut finished_epub: Vec<u8> = vec![];
epub_builder.generate(&mut finished_epub).expect("Unable to generate epub data");
// Create the epub file and write the finished epub data to it.
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.epub", book.file_name_title)));
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) {
Ok(output_file) => output_file,
Err(error) => {
eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy());
exit(1);
}
};
output_file.write_all(finished_epub.as_slice())
.expect(format!("Unable to write finished epub data to {0}", output_path.to_string_lossy()).as_str());
// Delete html2xhtml from the temp directory. It's good to clean up after yourself.
file_system_crap::delete_html2xhtml();
} }
/// Generate an html archive from the given arguments, url, & outputs it to the output directory. /// Generate an html archive from the given arguments, url, & outputs it to the output directory.
@ -79,7 +156,7 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) { pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
let book = book::Book::new(book_url); let book = book::Book::new(book_url);
let output_path = convert_path_to_windows(output_directory.join(format!("{0}.md", book.title))); let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title)));
// Create the md file. This will crash if it already exists or can not be created. // Create the md file. This will crash if it already exists or can not be created.
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) {
@ -110,7 +187,7 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
if markdown_args.no_image_tags { if markdown_args.no_image_tags {
// Remove image tags or not depending on args. // Remove image tags or not depending on args.
buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(chapter.isolated_chapter_html))); buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html)));
} else { } else {
buf = format!("\n\n{}\n\n", html2md::parse_html(&chapter.isolated_chapter_html.html())); buf = format!("\n\n{}\n\n", html2md::parse_html(&chapter.isolated_chapter_html.html()));
@ -118,19 +195,4 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
output_file.write_all(buf.as_bytes()).unwrap(); output_file.write_all(buf.as_bytes()).unwrap();
} }
}
/// Converts a given path to windows style if needed.
fn convert_path_to_windows(path: PathBuf) -> PathBuf {
// If target os is windows.
#[cfg(target_os = "windows")] {
use path_slash::PathBufExt as _;
return PathBuf::from_slash(path.into_os_string().into_string().unwrap());
}
// If target os is not windows.
#[cfg(not(target_os = "windows"))] {
return path;
}
} }

34
src/misc.rs Normal file
View file

@ -0,0 +1,34 @@
use std::collections::HashMap;
/// An extension to ``std::collections::HashMap<K, Vec<String>>``
pub trait HashMapExt<K> {
/// Merges two ``Hashmap<K, Vec<String>>`` returning the merged hashmap.
fn join(self, new_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>>;
}
impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for HashMap<K, Vec<String>> {
fn join(mut self, other_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>> {
// I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it.
for key in other_hashmap.keys() {
if self.contains_key(key) {
for string in &other_hashmap[key] {
if self[key].contains(string) { continue; } // Avoid repeating strings in the vectors.
}
let mut self_vector = self[key].clone();
let mut other_vector = other_hashmap[key].clone();
self_vector.append(&mut other_vector);
self.insert(key.clone(), self_vector);
}
else {
self.insert(key.clone(), other_hashmap[key].clone());
}
}
return self;
}
}