Formatted through rustfmt

This commit is contained in:
NA 2024-02-10 22:32:27 +00:00
parent 3df3d05c34
commit 50b1b93436
9 changed files with 353 additions and 193 deletions

5
rustfmt.toml Normal file
View file

@ -0,0 +1,5 @@
match_block_trailing_comma = true
merge_imports = true
normalize_comments = true
normalize_doc_attributes = true
use_try_shorthand = true

View file

@ -1,4 +1,8 @@
use std::{env, fs, path::{Path, PathBuf}, process::exit}; use std::{
env, fs,
path::{Path, PathBuf},
process::exit,
};
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use royal_road_archiver_lib::GenerationError; use royal_road_archiver_lib::GenerationError;
@ -48,23 +52,33 @@ fn main() {
}, },
None => { None => {
output_directory = env::current_dir().unwrap(); output_directory = env::current_dir().unwrap();
} },
} }
valid_directory_check(&output_directory); valid_directory_check(&output_directory);
let book_url = valid_url_check(&cli_input.book_url.to_lowercase()); let book_url = valid_url_check(&cli_input.book_url.to_lowercase());
let result: Result<std::sync::MutexGuard<'_, royal_road_archiver_lib::GenerationWarnings>, GenerationError> = match cli_input.subcommand { let result: Result<
Subcommands::Audiobook(audiobook_args) => royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory), std::sync::MutexGuard<'_, royal_road_archiver_lib::GenerationWarnings>,
Subcommands::Epub(epub_args) => royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory), GenerationError,
Subcommands::Html(html_args) => royal_road_archiver_lib::generate_html(html_args, book_url, output_directory), > = match cli_input.subcommand {
Subcommands::Markdown(markdown_args) => royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory), Subcommands::Audiobook(audiobook_args) => {
royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory)
},
Subcommands::Epub(epub_args) => {
royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory)
},
Subcommands::Html(html_args) => {
royal_road_archiver_lib::generate_html(html_args, book_url, output_directory)
},
Subcommands::Markdown(markdown_args) => {
royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory)
},
}; };
match result { match result {
Ok(generation_warnings) => { Ok(generation_warnings) => {
if !&generation_warnings.warnings_count() == 0 { if !&generation_warnings.warnings_count() == 0 {
println!("The following warnings were generated:"); println!("The following warnings were generated:");
for warning in generation_warnings.get_warnings() { for warning in generation_warnings.get_warnings() {
println!("\n{warning}"); println!("\n{warning}");
@ -73,7 +87,7 @@ fn main() {
}, },
Err(generation_error) => { Err(generation_error) => {
eprintln!("{}", generation_error); eprintln!("{}", generation_error);
} },
} }
} }
@ -88,7 +102,7 @@ fn valid_directory_check(output_directory: &Path) {
Err(error) => { Err(error) => {
eprintln!("Error! Unable to create directory: {error}"); eprintln!("Error! Unable to create directory: {error}");
exit(1); exit(1);
} },
} }
} }
} }
@ -97,11 +111,9 @@ fn valid_directory_check(output_directory: &Path) {
fn valid_url_check(book_url: &str) -> Url { fn valid_url_check(book_url: &str) -> Url {
match Url::parse(book_url) { match Url::parse(book_url) {
Ok(book_url) => { Ok(book_url) => {
if book_url.host_str() == Some("www.royalroad.com") { if book_url.host_str() == Some("www.royalroad.com") {
return book_url; return book_url;
} } else {
else {
eprintln!("Error! Please enter a RoyalRoad URL."); eprintln!("Error! Please enter a RoyalRoad URL.");
exit(1); exit(1);
} }
@ -109,6 +121,6 @@ fn valid_url_check(book_url: &str) -> Url {
Err(error) => { Err(error) => {
eprintln!("Error! Unable to parse url: {book_url}\n{error}"); eprintln!("Error! Unable to parse url: {book_url}\n{error}");
exit(1); exit(1);
} },
} }
} }

View file

@ -1,7 +1,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use indicatif::{ProgressBar, ProgressStyle};
use crate::{misc::HashMapExt, GenerationError}; use crate::{misc::HashMapExt, GenerationError};
use indicatif::{ProgressBar, ProgressStyle};
use scraper::Html; use scraper::Html;
use url::Url; use url::Url;
@ -38,9 +38,11 @@ pub struct Book {
impl Book { impl Book {
/// Generate a new book instance with all the needed data from a given url. /// Generate a new book instance with all the needed data from a given url.
pub fn new(book_url: Url) -> Result<Book, GenerationError> { pub fn new(book_url: Url) -> Result<Book, GenerationError> {
let index_html = html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?); let index_html =
html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?);
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?; let chapter_names_and_urls =
html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?;
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len()); let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new(); let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
@ -49,7 +51,9 @@ impl Book {
// Spawn a progress bar showing how many chapters have been downloaded & processed. // Spawn a progress bar showing how many chapters have been downloaded & processed.
let progress_bar = ProgressBar::new(chapter_names_and_urls.len().try_into().unwrap()); let progress_bar = ProgressBar::new(chapter_names_and_urls.len().try_into().unwrap());
progress_bar.set_style( progress_bar.set_style(
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ") ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ",
)
.unwrap() .unwrap()
.progress_chars("#>-"), .progress_chars("#>-"),
); );
@ -59,7 +63,9 @@ impl Book {
let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?; let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?;
// extract the image urls and add em to the image_urls_and_tags hashmap. // extract the image urls and add em to the image_urls_and_tags hashmap.
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html)); image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(
&chapter.isolated_chapter_html,
));
chapters.push(chapter); chapters.push(chapter);
@ -109,7 +115,8 @@ pub struct Chapter {
impl Chapter { impl Chapter {
fn new(chapter_name: &str, chapter_url: &str) -> Result<Self, GenerationError> { fn new(chapter_name: &str, chapter_url: &str) -> Result<Self, GenerationError> {
let chapter_url = http::string_to_url(&chapter_url)?; let chapter_url = http::string_to_url(&chapter_url)?;
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?); let raw_chapter_html =
html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?);
let chapter = Chapter { let chapter = Chapter {
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?, isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?,

View file

@ -8,12 +8,14 @@ use crate::{misc, GenerationError, Warning, WARNINGS};
/// Converts a given path to windows style if needed. /// Converts a given path to windows style if needed.
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf { pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
// If target os is windows. // If target os is windows.
#[cfg(target_os = "windows")] { #[cfg(target_os = "windows")]
{
return PathBuf::from_slash_lossy(path.into_os_string()); return PathBuf::from_slash_lossy(path.into_os_string());
} }
// If target os is not windows. // If target os is not windows.
#[cfg(not(target_os = "windows"))] { #[cfg(not(target_os = "windows"))]
{
return PathBuf::from_backslash_lossy(path.into_os_string()); return PathBuf::from_backslash_lossy(path.into_os_string());
} }
} }
@ -31,7 +33,8 @@ pub fn remove_illegal_chars(mut string: String) -> String {
/// Setup html2xhtml in the operating system's temp directory. /// Setup html2xhtml in the operating system's temp directory.
pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> { pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
#[cfg(target_os = "windows")] { #[cfg(target_os = "windows")]
{
const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit. const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit.
// Compile it on linux for windows like a sane person. // Compile it on linux for windows like a sane person.
let html2xhtml_temp_dir = create_temp_dir()?; let html2xhtml_temp_dir = create_temp_dir()?;
@ -44,7 +47,8 @@ pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
return Ok(html2xhtml_temp_dir); return Ok(html2xhtml_temp_dir);
} }
#[cfg(target_os = "linux")] { #[cfg(target_os = "linux")]
{
const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip"); const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip");
let html2xhtml_temp_dir = create_temp_dir()?; let html2xhtml_temp_dir = create_temp_dir()?;
@ -56,13 +60,18 @@ pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
return Ok(html2xhtml_temp_dir); return Ok(html2xhtml_temp_dir);
} }
#[cfg(target_os = "macos")] { #[cfg(target_os = "macos")]
Err(GenerationError::OsUnsupportedError {os: misc::Oses::MacOs}) {
Err(GenerationError::OsUnsupportedError {
os: misc::Oses::MacOs,
})
} }
// In the event the OS is unknown. // In the event the OS is unknown.
#[allow(unreachable_code)] #[allow(unreachable_code)]
Err(GenerationError::OsUnsupportedError {os: misc::Oses::OtherUnknownOs}) Err(GenerationError::OsUnsupportedError {
os: misc::Oses::OtherUnknownOs,
})
} }
/// Function to create a temporary directory. /// Function to create a temporary directory.
@ -83,9 +92,9 @@ pub fn delete_temp_dir(temp_dir: TempDir) {
let warning = Warning::TempDirDeletionError { let warning = Warning::TempDirDeletionError {
warning_msg: "Unable to close and delete temp directory".to_string(), warning_msg: "Unable to close and delete temp directory".to_string(),
temp_directory_path: temp_dir_path, temp_directory_path: temp_dir_path,
error: warning error: warning,
}; };
WARNINGS.lock().unwrap().add_warning(warning); WARNINGS.lock().unwrap().add_warning(warning);
} },
} }
} }

View file

@ -1,4 +1,8 @@
use std::{collections::HashMap, io::Write, process::{Command, Stdio}}; use std::{
collections::HashMap,
io::Write,
process::{Command, Stdio},
};
use regex::Regex; use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
@ -31,10 +35,12 @@ pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String,
return Ok(element.value().attr("content").unwrap().to_owned()); return Ok(element.value().attr("content").unwrap().to_owned());
// If it is, extract the data from the content attribute. // If it is, extract the data from the content attribute.
} }
},
} }
} }
} Err(GenerationError::BookTitleFetchError {
Err(GenerationError::BookTitleFetchError{url: book_url.clone()}) url: book_url.clone(),
})
} }
/// Get the book's author from index /// Get the book's author from index
@ -47,14 +53,19 @@ pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result<String
if x == "books:author" { if x == "books:author" {
return Ok(element.value().attr("content").unwrap().to_owned()); return Ok(element.value().attr("content").unwrap().to_owned());
} }
},
} }
} }
} Err(GenerationError::BookAuthorFetchError {
Err(GenerationError::BookAuthorFetchError{url: book_url.clone()}) url: book_url.clone(),
})
} }
/// Get the book's cover image url from the index /// Get the book's cover image url from the index
pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Result<Url, GenerationError> { pub fn get_cover_image_url_from_index(
index_html: &Html,
book_url: &Url,
) -> Result<Url, GenerationError> {
let selector = Selector::parse("meta").unwrap(); let selector = Selector::parse("meta").unwrap();
for element in index_html.select(&selector) { for element in index_html.select(&selector) {
match element.value().attr("property") { match element.value().attr("property") {
@ -63,16 +74,21 @@ pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Resu
if x == "og:image" { if x == "og:image" {
return http::string_to_url(element.value().attr("content").unwrap()); return http::string_to_url(element.value().attr("content").unwrap());
} }
},
} }
} }
} Err(GenerationError::BookCoverImageUrlFetchError {
Err(GenerationError::BookCoverImageUrlFetchError{url: book_url.clone()}) url: book_url.clone(),
})
} }
/// Gets the chapter names and urls from the index. /// Gets the chapter names and urls from the index.
/// ///
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url. /// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url) -> Result<Vec<(String, String)>, GenerationError> { pub fn get_chapter_names_and_urls_from_index(
index_html: &Html,
book_url: &Url,
) -> Result<Vec<(String, String)>, GenerationError> {
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad. // I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
// I've commented in the gist of it, but we have no memory actually writing this function. // I've commented in the gist of it, but we have no memory actually writing this function.
@ -89,7 +105,9 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url)
} }
// Exit it if unable to find the needed json data. That probably means royal road has changed their code. // Exit it if unable to find the needed json data. That probably means royal road has changed their code.
if raw_json_data.is_empty() { if raw_json_data.is_empty() {
return Err(GenerationError::BookChapterNameAndUrlFetchError { url: book_url.clone()}); return Err(GenerationError::BookChapterNameAndUrlFetchError {
url: book_url.clone(),
});
} }
// I have absolutely no idea what this regex does; but it's probably important. // I have absolutely no idea what this regex does; but it's probably important.
@ -122,7 +140,10 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url)
} }
/// Isolate chapter content from the rest of the shit on the page. /// Isolate chapter content from the rest of the shit on the page.
pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Result<Html, GenerationError> { pub fn isolate_chapter_content(
raw_chapter_html: &Html,
chapter_url: &Url,
) -> Result<Html, GenerationError> {
let page_html = Html::parse_document(&raw_chapter_html.html()); let page_html = Html::parse_document(&raw_chapter_html.html());
let selector = Selector::parse("div").unwrap(); let selector = Selector::parse("div").unwrap();
@ -133,10 +154,12 @@ pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Re
if x == "chapter-inner chapter-content" { if x == "chapter-inner chapter-content" {
return Ok(string_to_html_fragment(&element.inner_html())); return Ok(string_to_html_fragment(&element.inner_html()));
} }
},
} }
} }
} Err(GenerationError::ChapterContentIsolationError {
Err(GenerationError::ChapterContentIsolationError{url: chapter_url.clone()}) url: chapter_url.clone(),
})
} }
/// Remove all img tags from the html fragment. /// Remove all img tags from the html fragment.
@ -169,7 +192,9 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>
let url = element.attr("src"); let url = element.attr("src");
let image_tag = element.html(); let image_tag = element.html();
if url.is_none() { continue; } if url.is_none() {
continue;
}
let url = match Url::parse(url.unwrap()) { let url = match Url::parse(url.unwrap()) {
Ok(url) => url, Ok(url) => url,
Err(warning) => { Err(warning) => {
@ -199,16 +224,19 @@ pub fn replace_img_src(img_tag: String, new_src: String) -> String {
let selector = Selector::parse("img").unwrap(); let selector = Selector::parse("img").unwrap();
let element = img_tag.select(&selector).next().unwrap(); let element = img_tag.select(&selector).next().unwrap();
if element.attr("src").is_some() { if element.attr("src").is_some() {
let image_tag = element.html(); let image_tag = element.html();
let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap(); let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
let src_attr = src_match_regex.captures(&image_tag).unwrap().get(0).map(|m| m.as_str()).unwrap(); let src_attr = src_match_regex
.captures(&image_tag)
.unwrap()
.get(0)
.map(|m| m.as_str())
.unwrap();
return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#)); return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
} } else {
else {
return element.html(); return element.html();
} }
} }
@ -238,7 +266,12 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result<String, Gen
}; };
// Write the html to the stdin, then wait for xhtml to be outputted to the stdout. // Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
html2xhtml.stdin.as_mut().unwrap().write_all(html.as_bytes()).unwrap(); html2xhtml
.stdin
.as_mut()
.unwrap()
.write_all(html.as_bytes())
.unwrap();
let html2xhtml_output = html2xhtml.wait_with_output().unwrap(); let html2xhtml_output = html2xhtml.wait_with_output().unwrap();
// Generate a lossy string from the stdout. // Generate a lossy string from the stdout.

View file

@ -56,13 +56,15 @@ impl HttpResponse {
WARNINGS.lock().unwrap().add_warning(warning); WARNINGS.lock().unwrap().add_warning(warning);
return (String::with_capacity(0), String::with_capacity(0)); return (String::with_capacity(0), String::with_capacity(0));
} },
}; };
if mime_to_file_extension.contains_key(content_type) { if mime_to_file_extension.contains_key(content_type) {
return (content_type.to_string(), mime_to_file_extension[content_type].to_string()); return (
} content_type.to_string(),
else { mime_to_file_extension[content_type].to_string(),
);
} else {
return (content_type.to_string(), String::with_capacity(0)); return (content_type.to_string(), String::with_capacity(0));
} }
} }
@ -82,6 +84,9 @@ pub fn get_response(url: Url) -> Result<HttpResponse, GenerationError> {
pub fn string_to_url(url: &str) -> Result<Url, GenerationError> { pub fn string_to_url(url: &str) -> Result<Url, GenerationError> {
match Url::parse(url) { match Url::parse(url) {
Ok(url) => Ok(url), Ok(url) => Ok(url),
Err(error) => Err(GenerationError::UrlParseError {error, string_url: url.to_string()}), Err(error) => Err(GenerationError::UrlParseError {
error,
string_url: url.to_string(),
}),
} }
} }

View file

@ -1,4 +1,11 @@
use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit, sync::{Mutex, MutexGuard}}; use std::{
collections::HashMap,
fs::OpenOptions,
io::Write,
path::PathBuf,
process::exit,
sync::{Mutex, MutexGuard},
};
use bytes::Buf; use bytes::Buf;
use chrono::prelude::Local; use chrono::prelude::Local;
@ -6,8 +13,8 @@ use clap::Args;
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary}; use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
use file_system_crap::convert_path_to_os_specific; use file_system_crap::convert_path_to_os_specific;
use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment}; use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
use lazy_static::lazy_static;
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use lazy_static::lazy_static;
use misc::Oses; use misc::Oses;
use reqwest::header::ToStrError; use reqwest::header::ToStrError;
use thiserror::Error; use thiserror::Error;
@ -43,9 +50,7 @@ pub struct EpubArgs {
/// struct that corresponds to arguments for Html generation. /// struct that corresponds to arguments for Html generation.
#[derive(Args, Debug)] #[derive(Args, Debug)]
pub struct HtmlArgs { pub struct HtmlArgs {}
}
/// struct that corresponds to arguments for Markdown generation. /// struct that corresponds to arguments for Markdown generation.
#[derive(Args, Debug)] #[derive(Args, Debug)]
@ -67,7 +72,11 @@ lazy_static! {
/// ///
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> { pub fn generate_audiobook(
audiobook_args: AudiobookArgs,
book_url: Url,
output_directory: PathBuf,
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
return Err(GenerationError::GenerationUnsupportedError); return Err(GenerationError::GenerationUnsupportedError);
} }
@ -75,14 +84,20 @@ pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_d
/// ///
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> { pub fn generate_epub(
epub_args: EpubArgs,
book_url: Url,
output_directory: PathBuf,
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
let book = book::Book::new(book_url)?; let book = book::Book::new(book_url)?;
// Initialize the epub builder. // Initialize the epub builder.
let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
// Add author and title metadata. // Add author and title metadata.
epub_builder.stylesheet(constants::EPUB_CSS.as_bytes()).unwrap(); // Use the epub_css in the constants.rs file. epub_builder
.stylesheet(constants::EPUB_CSS.as_bytes())
.unwrap(); // Use the epub_css in the constants.rs file.
epub_builder epub_builder
.metadata("author", &book.author) .metadata("author", &book.author)
.expect("Unable to add author metadata"); .expect("Unable to add author metadata");
@ -93,10 +108,13 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
// Download the cover image & add it to the epub. // Download the cover image & add it to the epub.
let cover_image = http::get_response(book.cover_image_url)?; let cover_image = http::get_response(book.cover_image_url)?;
let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension(); let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
epub_builder.add_cover_image( epub_builder
.add_cover_image(
format!("cover.{cover_file_extension}"), format!("cover.{cover_file_extension}"),
cover_image.get_bytes()?.to_vec().as_slice(), cover_image.get_bytes()?.to_vec().as_slice(),
cover_mime_type).expect("Error! Unable to add cover image."); cover_mime_type,
)
.expect("Error! Unable to add cover image.");
// Generate the cover xhtml. // Generate the cover xhtml.
let cover_xhtml = format!( let cover_xhtml = format!(
@ -111,14 +129,20 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
book.author, book.author,
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false) chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
); );
let cover_xhtml = format!("{0}{cover_xhtml}{1}", constants::EPUB_XML_HEAD, constants::EPUB_XML_TAIL); let cover_xhtml = format!(
"{0}{cover_xhtml}{1}",
constants::EPUB_XML_HEAD,
constants::EPUB_XML_TAIL
);
// Add the cover xhtml to the epub. // Add the cover xhtml to the epub.
epub_builder.add_content( epub_builder
.add_content(
EpubContent::new("title.xhtml", cover_xhtml.as_bytes()) EpubContent::new("title.xhtml", cover_xhtml.as_bytes())
.title("Cover") .title("Cover")
.reftype(ReferenceType::Cover), .reftype(ReferenceType::Cover),
).expect("Error! Unable to add cover"); )
.expect("Error! Unable to add cover");
// Add a table of contents after the cover page. // Add a table of contents after the cover page.
epub_builder.inline_toc(); epub_builder.inline_toc();
@ -133,9 +157,12 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
println!("\nDownloading and processing images:"); println!("\nDownloading and processing images:");
// Spawn a progress bar showing how many images have been downloaded & processed. // Spawn a progress bar showing how many images have been downloaded & processed.
let progress_bar = ProgressBar::new(book.image_urls_and_tags.keys().len().try_into().unwrap()); let progress_bar =
ProgressBar::new(book.image_urls_and_tags.keys().len().try_into().unwrap());
progress_bar.set_style( progress_bar.set_style(
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ") ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ",
)
.unwrap() .unwrap()
.progress_chars("#>-"), .progress_chars("#>-"),
); );
@ -143,14 +170,21 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
let mut i: usize = 0; let mut i: usize = 0;
for image_url in book.image_urls_and_tags.keys() { for image_url in book.image_urls_and_tags.keys() {
let image = http::get_response(image_url.clone())?; let image = http::get_response(image_url.clone())?;
let (image_mime_type, image_file_extension) = image.get_content_type_and_file_extension(); let (image_mime_type, image_file_extension) =
epub_builder.add_resource( image.get_content_type_and_file_extension();
epub_builder
.add_resource(
format!("image_{i}.{image_file_extension}"), format!("image_{i}.{image_file_extension}"),
image.get_bytes()?.to_vec().reader(), image.get_bytes()?.to_vec().reader(),
image_mime_type).expect("Error! Unable to add content image"); image_mime_type,
)
.expect("Error! Unable to add content image");
for image_tag in book.image_urls_and_tags[image_url].clone() { for image_tag in book.image_urls_and_tags[image_url].clone() {
old_tags_new_tags.insert(image_tag.clone(), html::replace_img_src(image_tag, format!("image_{i}.{image_file_extension}"))); old_tags_new_tags.insert(
image_tag.clone(),
html::replace_img_src(image_tag, format!("image_{i}.{image_file_extension}")),
);
} }
i += 1; i += 1;
@ -162,41 +196,66 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
// Convert the html to xhtml and add the xhtml to the epub for each chapter. // Convert the html to xhtml and add the xhtml to the epub for each chapter.
for (i, chapter) in book.chapters.iter().enumerate() { for (i, chapter) in book.chapters.iter().enumerate() {
let xhtml: String; let xhtml: String;
if epub_args.no_images { if epub_args.no_images {
xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_temp_dir)? xhtml = html_to_xhtml(
} string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)),
else { &html2xhtml_temp_dir,
)?
} else {
let mut replaced_html = chapter.isolated_chapter_html.html(); let mut replaced_html = chapter.isolated_chapter_html.html();
for old_img_tag in old_tags_new_tags.keys() { for old_img_tag in old_tags_new_tags.keys() {
replaced_html = replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]); replaced_html =
replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]);
} }
xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_temp_dir)?; xhtml = html_to_xhtml(
string_to_html_fragment(&replaced_html),
&html2xhtml_temp_dir,
)?;
} }
epub_builder.add_content(EpubContent::new(format!("chapter_{}.xhtml", i+1), xhtml.as_bytes()) epub_builder
.add_content(
EpubContent::new(format!("chapter_{}.xhtml", i + 1), xhtml.as_bytes())
.title(chapter.chapter_name.clone()) .title(chapter.chapter_name.clone())
.reftype(ReferenceType::Text)).expect("Error! Unable to add chapter"); .reftype(ReferenceType::Text),
)
.expect("Error! Unable to add chapter");
} }
// Generate the finished epub data as a byte vector. // Generate the finished epub data as a byte vector.
let mut finished_epub: Vec<u8> = vec![]; let mut finished_epub: Vec<u8> = vec![];
epub_builder.generate(&mut finished_epub).expect("Unable to generate epub data"); epub_builder
.generate(&mut finished_epub)
.expect("Unable to generate epub data");
// Create the epub file and write the finished epub data to it. // Create the epub file and write the finished epub data to it.
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.epub", book.file_name_title))); let output_path = convert_path_to_os_specific(
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { output_directory.join(format!("{0}.epub", book.file_name_title)),
);
let mut output_file = match OpenOptions::new()
.write(true)
.create_new(true)
.open(&output_path)
{
Ok(output_file) => output_file, Ok(output_file) => output_file,
Err(error) => { Err(error) => {
eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy()); eprintln!(
"Error! Unable to create: {0}\n{error}",
output_path.to_string_lossy()
);
exit(1); exit(1);
} },
}; };
output_file.write_all(finished_epub.as_slice()) output_file.write_all(finished_epub.as_slice()).expect(
.expect(format!("Unable to write finished epub data to {0}", output_path.to_string_lossy()).as_str()); format!(
"Unable to write finished epub data to {0}",
output_path.to_string_lossy()
)
.as_str(),
);
// Delete the html2xhtml temp directory. It's good to clean up after yourself. // Delete the html2xhtml temp directory. It's good to clean up after yourself.
file_system_crap::delete_temp_dir(html2xhtml_temp_dir); file_system_crap::delete_temp_dir(html2xhtml_temp_dir);
@ -208,7 +267,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
/// ///
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> { pub fn generate_html(
html_args: HtmlArgs,
book_url: Url,
output_directory: PathBuf,
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
return Err(GenerationError::GenerationUnsupportedError); return Err(GenerationError::GenerationUnsupportedError);
} }
@ -216,17 +279,29 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
/// ///
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> { pub fn generate_markdown(
markdown_args: MarkdownArgs,
book_url: Url,
output_directory: PathBuf,
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
let book = book::Book::new(book_url)?; let book = book::Book::new(book_url)?;
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title))); let output_path =
convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title)));
// Create the md file. This will crash if it already exists or can not be created. // Create the md file. This will crash if it already exists or can not be created.
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { let mut output_file = match OpenOptions::new()
.write(true)
.create_new(true)
.open(&output_path)
{
Ok(output_file) => output_file, Ok(output_file) => output_file,
Err(error) => { Err(error) => {
return Err(GenerationError::FileCreationError{error, file_path: output_path}); return Err(GenerationError::FileCreationError {
} error,
file_path: output_path,
});
},
}; };
// Append the book title & author. // Append the book title & author.
@ -249,10 +324,15 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
if markdown_args.no_image_tags { if markdown_args.no_image_tags {
// Remove image tags or not depending on args. // Remove image tags or not depending on args.
buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html))); buf = format!(
"\n\n{}\n\n",
html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html))
);
} else { } else {
buf = format!("\n\n{}\n\n", html2md::parse_html(&chapter.isolated_chapter_html.html())); buf = format!(
"\n\n{}\n\n",
html2md::parse_html(&chapter.isolated_chapter_html.html())
);
} }
output_file.write_all(buf.as_bytes()).unwrap(); output_file.write_all(buf.as_bytes()).unwrap();
@ -266,7 +346,10 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
pub enum GenerationError { pub enum GenerationError {
/// Represents errors during file creation. /// Represents errors during file creation.
#[error("Unable to create file: {file_path}\n{error}")] #[error("Unable to create file: {file_path}\n{error}")]
FileCreationError{error: std::io::Error, file_path: PathBuf}, FileCreationError {
error: std::io::Error,
file_path: PathBuf,
},
/// Represents errors when getting a Response from a Url. /// Represents errors when getting a Response from a Url.
#[error("Unable to get response for: {url}\n{error}")] #[error("Unable to get response for: {url}\n{error}")]
@ -282,7 +365,10 @@ pub enum GenerationError {
/// Represents errors when trying to parse a String to a Url. /// Represents errors when trying to parse a String to a Url.
#[error("Unable to parse a valid Url from: {string_url}\n{error}")] #[error("Unable to parse a valid Url from: {string_url}\n{error}")]
UrlParseError{error: url::ParseError, string_url: String}, UrlParseError {
error: url::ParseError,
string_url: String,
},
/// Represents io errors when trying to create a temporary directory. /// Represents io errors when trying to create a temporary directory.
#[error("Unable to create temporary directory: {error}")] #[error("Unable to create temporary directory: {error}")]
@ -327,9 +413,10 @@ pub enum GenerationError {
GenerationUnsupportedError, GenerationUnsupportedError,
} }
/// A struct that contains a vector of warnings. /// A struct that contains a vector of warnings.
pub struct GenerationWarnings{warnings: Vec<Warning>} pub struct GenerationWarnings {
warnings: Vec<Warning>,
}
impl GenerationWarnings { impl GenerationWarnings {
fn new() -> Self { fn new() -> Self {
@ -378,5 +465,5 @@ pub enum Warning {
warning_msg: String, warning_msg: String,
raw_image_tag: String, raw_image_tag: String,
error: url::ParseError, error: url::ParseError,
} },
} }

View file

@ -6,15 +6,18 @@ pub trait HashMapExt<K> {
fn join(self, new_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>>; fn join(self, new_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>>;
} }
impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K>
impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for HashMap<K, Vec<String>> { for HashMap<K, Vec<String>>
{
fn join(mut self, other_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>> { fn join(mut self, other_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>> {
// I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it. // I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it.
for key in other_hashmap.keys() { for key in other_hashmap.keys() {
if self.contains_key(key) { if self.contains_key(key) {
for string in &other_hashmap[key] { for string in &other_hashmap[key] {
if self[key].contains(string) { continue; } // Avoid repeating strings in the vectors. if self[key].contains(string) {
continue;
} // Avoid repeating strings in the vectors.
} }
let mut self_vector = self[key].clone(); let mut self_vector = self[key].clone();
@ -23,8 +26,7 @@ impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for Ha
self_vector.append(&mut other_vector); self_vector.append(&mut other_vector);
self.insert(key.clone(), self_vector); self.insert(key.clone(), self_vector);
} } else {
else {
self.insert(key.clone(), other_hashmap[key].clone()); self.insert(key.clone(), other_hashmap[key].clone());
} }
} }