From f6725fafcf10a819bba9f8aff1293c905724b14c Mon Sep 17 00:00:00 2001 From: NA Date: Sun, 28 Jan 2024 18:18:40 +0000 Subject: [PATCH] Updated to v1.0.1, redid all the error handling, made the library better. --- .vscode/settings.json | 1 + Cargo.lock | 4 +- Cargo.toml | 7 +- TODO | 1 + src/binary.rs | 18 ++++- src/book.rs | 36 +++++---- src/file_system_crap.rs | 57 +++++++------- src/html.rs | 60 +++++++-------- src/http.rs | 48 ++++++------ src/library.rs | 167 +++++++++++++++++++++++++++++++++++----- src/misc.rs | 18 ++++- 11 files changed, 291 insertions(+), 126 deletions(-) create mode 100644 TODO diff --git a/.vscode/settings.json b/.vscode/settings.json index 74666fd..10da6ac 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "reqwest", "royalroad", "tempdir", + "thiserror", "ureq" ] } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 2516172..9418506 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1522,7 +1522,7 @@ dependencies = [ [[package]] name = "royal_road_archiver" -version = "0.1.1" +version = "1.0.1" dependencies = [ "bytes", "chrono", @@ -1530,12 +1530,14 @@ dependencies = [ "epub-builder", "html2md", "indicatif", + "lazy_static", "path-slash", "regex", "reqwest", "scraper", "serde_json", "tempdir", + "thiserror", "url", "zip-extract", ] diff --git a/Cargo.toml b/Cargo.toml index 5797578..0ab148e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [package] name = "royal_road_archiver" -version = "0.1.1" +version = "1.0.1" edition = "2021" description = "An archival program and library for the webnovel site RoyalRoad." -license = "EUPL-1.2 " +repository = "https://github.com/Raine-gay/royal_road_archiver" +license = "EUPL-1.2" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -29,11 +30,13 @@ clap = { version = "4.4.18", features = ["derive"] } epub-builder = "0.7.4" html2md = "0.2.14" indicatif = "0.17.7" +lazy_static = "1.4.0" path-slash = "0.2.1" regex = "1.10.3" reqwest = { version = "0.11.23", features = ["blocking", "rustls"] } scraper = "0.18.1" serde_json = "1.0.111" tempdir = "0.3.7" +thiserror = "1.0.56" url = "2.5.0" zip-extract = "0.1.3" diff --git a/TODO b/TODO new file mode 100644 index 0000000..6632d59 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +Make images download to disk then read from there to avoid high ram usage when processing books that are image heavy. \ No newline at end of file diff --git a/src/binary.rs b/src/binary.rs index 4ffde8a..7bd6425 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -1,6 +1,7 @@ use std::{env, fs, path::{Path, PathBuf}, process::exit}; use clap::{Parser, Subcommand}; +use royal_road_archiver_lib::GenerationError; use url::Url; #[derive(clap::Parser, Debug)] @@ -53,11 +54,26 @@ fn main() { valid_directory_check(&output_directory); let book_url = valid_url_check(&cli_input.book_url.to_lowercase()); - match cli_input.subcommand { + let result: Result, GenerationError> = match cli_input.subcommand { Subcommands::Audiobook(audiobook_args) => royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory), Subcommands::Epub(epub_args) => royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory), Subcommands::Html(html_args) => royal_road_archiver_lib::generate_html(html_args, book_url, output_directory), Subcommands::Markdown(markdown_args) => royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory), + }; + + match result { + Ok(generation_warnings) => { + if !&generation_warnings.warnings_count() == 0 { + + println!("The following warnings were generated:"); + for warning in generation_warnings.get_warnings() { + println!("\n{warning}"); + } + } + }, + Err(generation_error) => { + eprintln!("{}", generation_error); + } } } diff --git a/src/book.rs b/src/book.rs index bed16cb..94e7bfe 100644 --- a/src/book.rs +++ b/src/book.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use indicatif::{ProgressBar, ProgressStyle}; -use crate::misc::HashMapExt; +use crate::{misc::HashMapExt, GenerationError}; use scraper::Html; use url::Url; @@ -37,10 +37,10 @@ pub struct Book { impl Book { /// Generate a new book instance with all the needed data from a given url. - pub fn new(book_url: Url) -> Book { - let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text()); + pub fn new(book_url: Url) -> Result { + let index_html = html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?); - let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html); + let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?; let mut chapters: Vec = Vec::with_capacity(chapter_names_and_urls.len()); let mut image_urls_and_tags: HashMap> = HashMap::new(); @@ -56,7 +56,7 @@ impl Book { // Generate the chapters and add em to the book. for i in 0..chapter_names_and_urls.len() { - let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]); + let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?; // extract the image urls and add em to the image_urls_and_tags hashmap. image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html)); @@ -68,18 +68,20 @@ impl Book { progress_bar.finish(); - let title = html::get_title_from_index(&index_html); + let title = html::get_title_from_index(&index_html, &book_url)?; - Book { + let book = Book { + author: html::get_author_from_index(&index_html, &book_url)?, + cover_image_url: html::get_cover_image_url_from_index(&index_html, &book_url)?, book_url: book_url, title: title.clone(), file_name_title: remove_illegal_chars(title), - author: html::get_author_from_index(&index_html), - cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)), index_html: index_html, chapters: chapters, image_urls_and_tags: image_urls_and_tags, - } + }; + + return Ok(book); } /// Count how many paragraphs are in the book. @@ -105,15 +107,17 @@ pub struct Chapter { } impl Chapter { - fn new(chapter_name: &str, chapter_url: &str) -> Self { - let chapter_url = http::string_to_url(&chapter_url); - let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text()); + fn new(chapter_name: &str, chapter_url: &str) -> Result { + let chapter_url = http::string_to_url(&chapter_url)?; + let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?); - Chapter { + let chapter = Chapter { + isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?, chapter_url: chapter_url, chapter_name: chapter_name.to_string(), - isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html), raw_chapter_html: raw_chapter_html, - } + }; + + return Ok(chapter); } } \ No newline at end of file diff --git a/src/file_system_crap.rs b/src/file_system_crap.rs index 36aaaf0..54be4ad 100644 --- a/src/file_system_crap.rs +++ b/src/file_system_crap.rs @@ -1,8 +1,10 @@ -use std::{io::Cursor, path::PathBuf, process::exit}; +use std::{io::Cursor, path::PathBuf}; use path_slash::PathBufExt as _; use tempdir::TempDir; +use crate::{misc, GenerationError, Warning, WARNINGS}; + /// Converts a given path to windows style if needed. pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf { // If target os is windows. @@ -28,65 +30,60 @@ pub fn remove_illegal_chars(mut string: String) -> String { } /// Setup html2xhtml in the operating system's temp directory. -pub fn setup_html2xhtml() -> TempDir { +pub fn setup_html2xhtml() -> Result { #[cfg(target_os = "windows")] { const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit. // Compile it on linux for windows like a sane person. - let html2xhtml_dir = match TempDir::new("html2xhtml-windows") { + let html2xhtml_temp_dir = match TempDir::new("html2xhtml-windows") { Ok(temp_dir) => temp_dir, - Err(error) => { - eprintln!("Error! Unable to create temp directory: {error}"); - exit(1); - } + Err(error) => return Err(GenerationError::TempDirCreationError {error}), }; - match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) { + match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) { Ok(_) => (), - Err(error) => { - eprintln!("Error! Unable to extract html2xhtml into into the temp directory\n{error}"); - exit(1); - } + Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}), } - return html2xhtml_dir; + return Ok(html2xhtml_temp_dir); } #[cfg(target_os = "linux")] { const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip"); - let html2xhtml_dir = match TempDir::new("html2xhtml-linux") { + let html2xhtml_temp_dir = match TempDir::new("html2xhtml-linux") { Ok(temp_dir) => temp_dir, - Err(error) => { - eprintln!("Error! Unable to create temp directory: {error}"); - exit(1); - } + Err(error) => return Err(GenerationError::TempDirCreationError {error}), }; - match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) { + match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) { Ok(_) => (), - Err(error) => { - eprintln!("Error! Unable to extract html2xhtml into the temp directory\n{error}"); - exit(1); - } + Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}), } - return html2xhtml_dir; + return Ok(html2xhtml_temp_dir); } #[cfg(target_os = "macos")] { - // TODO! - // You can find the macos tempdir by doing: echo $TMPDIR - - eprint!("Error! This mode does not currently support MacOS. Try either html mode or markdown mode."); - exit(1); + Err(GenerationError::OsUnsupportedError {os: misc::Oses::MacOs}) } + + // In the event the OS is unknown. + #[allow(unreachable_code)] + Err(GenerationError::OsUnsupportedError {os: misc::Oses::OtherUnknownOs}) } /// Delete html2xhtml from the operating system's temp directory. pub fn delete_html2xhtml(html2xhtml_dir: TempDir) { + let temp_dir_path = html2xhtml_dir.path().to_path_buf(); + match html2xhtml_dir.close() { Ok(_) => (), Err(warning) => { - eprintln!("Warning! Unable to close & delete temp directory: {warning}"); + let warning = Warning::TempDirDeletionError { + warning_msg: "Unable to close and delete temp directory".to_string(), + temp_directory_path: temp_dir_path, + error: warning + }; + WARNINGS.lock().unwrap().add_warning(warning); } } } \ No newline at end of file diff --git a/src/html.rs b/src/html.rs index cb0fc08..5c5c185 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,11 +1,11 @@ -use std::{collections::HashMap, io::Write, process::{exit, Command, Stdio}}; +use std::{collections::HashMap, io::Write, process::{Command, Stdio}}; use regex::Regex; use scraper::{Html, Selector}; use tempdir::TempDir; use url::Url; -use crate::misc::HashMapExt; +use crate::{http, misc::HashMapExt, GenerationError, Warning, WARNINGS}; /// Convert a string to an html document. pub fn string_to_html_document(document_string: &str) -> Html { @@ -18,7 +18,7 @@ pub fn string_to_html_fragment(fragment_string: &str) -> Html { } /// Get the book's title from the index. -pub fn get_title_from_index(index_html: &Html) -> String { +pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result { let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag for element in index_html.select(&selector) { // Loop through all meta tags in the html document. @@ -28,58 +28,55 @@ pub fn get_title_from_index(index_html: &Html) -> String { Some(x) => { if x == "twitter:title" { // If it does contain attribute "name", check if the content of that attribute is "twitter:title" - return element.value().attr("content").unwrap().to_owned(); + return Ok(element.value().attr("content").unwrap().to_owned()); // If it is, extract the data from the content attribute. } } } } - eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver"); - exit(1); + Err(GenerationError::BookTitleFetchError{url: book_url.clone()}) } /// Get the book's author from index -pub fn get_author_from_index(index_html: &Html) -> String { +pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result { let selector = Selector::parse("meta").unwrap(); for element in index_html.select(&selector) { match element.value().attr("property") { None => continue, Some(x) => { if x == "books:author" { - return element.value().attr("content").unwrap().to_owned(); + return Ok(element.value().attr("content").unwrap().to_owned()); } } } } - eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver"); - exit(1); + Err(GenerationError::BookAuthorFetchError{url: book_url.clone()}) } /// Get the book's cover image url from the index -pub fn get_cover_image_url_from_index(index_html: &Html) -> String { +pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Result { let selector = Selector::parse("meta").unwrap(); for element in index_html.select(&selector) { match element.value().attr("property") { None => continue, Some(x) => { if x == "og:image" { - return element.value().attr("content").unwrap().to_owned(); + return http::string_to_url(element.value().attr("content").unwrap()); } } } } - eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver"); - exit(1); + Err(GenerationError::BookCoverImageUrlFetchError{url: book_url.clone()}) } /// Gets the chapter names and urls from the index. /// /// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url. -pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> { +pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url) -> Result, GenerationError> { // I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad. // I've commented in the gist of it, but we have no memory actually writing this function. - let mut chapters: Vec<[String; 2]> = Vec::new(); + let mut chapters: Vec<(String, String)> = Vec::new(); let mut raw_json_data = String::new(); // Find a script tag that has "window.chapters" inside the inner html. This is all in json format. @@ -92,8 +89,7 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; } // Exit it if unable to find the needed json data. That probably means royal road has changed their code. if raw_json_data.is_empty() { - eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver"); - exit(1); + return Err(GenerationError::BookChapterNameAndUrlFetchError { url: book_url.clone()}); } // I have absolutely no idea what this regex does; but it's probably important. @@ -118,15 +114,15 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; chapter["url"].to_string().replace('"', "") ); - chapters.push([chapter_name, url]); + chapters.push((chapter_name, url)); } // Return that wanker. - return chapters; + return Ok(chapters); } /// Isolate chapter content from the rest of the shit on the page. -pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html { +pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Result { let page_html = Html::parse_document(&raw_chapter_html.html()); let selector = Selector::parse("div").unwrap(); @@ -135,13 +131,12 @@ pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html { None => continue, Some(x) => { if x == "chapter-inner chapter-content" { - return string_to_html_fragment(&element.inner_html()); + return Ok(string_to_html_fragment(&element.inner_html())); } } } } - eprintln!("Error! Unable to isolate chapter content"); - exit(1); + Err(GenerationError::ChapterContentIsolationError{url: chapter_url.clone()}) } /// Remove all img tags from the html fragment. @@ -178,7 +173,13 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap let url = match Url::parse(url.unwrap()) { Ok(url) => url, Err(warning) => { - eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{warning}"); + let warning = Warning::ImageTagParseError { + warning_msg: "Unable to parse url in image tag".to_string(), + raw_image_tag: image_tag, + error: warning, + }; + WARNINGS.lock().unwrap().add_warning(warning); + continue; }, }; @@ -213,7 +214,7 @@ pub fn replace_img_src(img_tag: String, new_src: String) -> String { } /// Convert a given html dom into xhtml. -pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String { +pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result { #[cfg(target_os = "windows")] const HTML2XHTML_ENTRY: &str = "html2xhtml.exe"; @@ -233,10 +234,7 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String { .spawn() { Ok(child) => child, - Err(error) => { - eprintln!("Error! Unable to start html2xhtml: {error}"); - exit(1); - }, + Err(error) => return Err(GenerationError::Html2XhtmlStartError{error}), }; // Write the html to the stdin, then wait for xhtml to be outputted to the stdout. @@ -246,5 +244,5 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String { // Generate a lossy string from the stdout. let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string(); - return xhtml; + return Ok(xhtml); } \ No newline at end of file diff --git a/src/http.rs b/src/http.rs index 317424d..54d347d 100644 --- a/src/http.rs +++ b/src/http.rs @@ -1,8 +1,10 @@ -use std::{collections::HashMap, process::exit}; +use std::collections::HashMap; use reqwest::{blocking::Response, header::HeaderMap}; use url::Url; +use crate::{GenerationError, Warning, WARNINGS}; + // A struct representing an HttpResponse and the Url it originated from. pub struct HttpResponse { url: Url, @@ -16,24 +18,18 @@ impl HttpResponse { } /// Attempt to convert the response to text. Exits the program if it fails. - pub fn get_text(self) -> String { + pub fn get_text(self) -> Result { match self.response.text() { - Ok(response_text) => response_text, - Err(error) => { - eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url); - exit(1); - } + Ok(response_text) => Ok(response_text), + Err(error) => Err(GenerationError::ResponseConvertToTextError {error}), } } /// Attempt to convert the response to bytes. Used for images. Exits the program if it fails. - pub fn get_bytes(self) -> bytes::Bytes{ + pub fn get_bytes(self) -> Result{ match self.response.bytes() { - Ok(response_bytes) => response_bytes, - Err(error) => { - eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url); - exit(1); - } + Ok(response_bytes) => Ok(response_bytes), + Err(error) => Err(GenerationError::ResponseConvertToBytesError {error}), } } @@ -52,7 +48,13 @@ impl HttpResponse { let content_type = match self.get_headers()["content-type"].to_str() { Ok(content_type) => content_type, Err(warning) => { - eprintln!("Warning! Unable to get content type from the http-header: {warning}"); + let warning = Warning::MissingContentType { + warning_msg: "Unable to find or parse the content-type header".to_string(), + url: self.url.clone(), + error: warning, + }; + WARNINGS.lock().unwrap().add_warning(warning); + return (String::with_capacity(0), String::with_capacity(0)); } }; @@ -67,25 +69,19 @@ impl HttpResponse { } /// Get an http response for a given url. Exits the program if it fails. -pub fn get_response(url: Url) -> HttpResponse { +pub fn get_response(url: Url) -> Result { let response_result = reqwest::blocking::get(url.clone()); match response_result { - Ok(response) => HttpResponse { url, response }, - Err(error) => { - eprintln!("Error! Unable to get a response from: {url}\n{error}"); - exit(1); - }, + Ok(response) => Ok(HttpResponse { url, response }), + Err(error) => return Err(GenerationError::ResponseGetError {error, url}), } } /// A function to convert a string to a url. Exits the program if it fails. -pub fn string_to_url(url: &str) -> Url { +pub fn string_to_url(url: &str) -> Result { match Url::parse(url) { - Ok(url) => url, - Err(error) => { - eprintln!("Error! Unable to parse: {url} into a valid url.\n{error}"); - exit(1); - } + Ok(url) => Ok(url), + Err(error) => Err(GenerationError::UrlParseError {error, string_url: url.to_string()}), } } \ No newline at end of file diff --git a/src/library.rs b/src/library.rs index aab8a6d..fa8482e 100644 --- a/src/library.rs +++ b/src/library.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit}; +use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit, sync::{Mutex, MutexGuard}}; use bytes::Buf; use chrono::prelude::Local; @@ -6,7 +6,11 @@ use clap::Args; use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary}; use file_system_crap::convert_path_to_os_specific; use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment}; +use lazy_static::lazy_static; use indicatif::{ProgressBar, ProgressStyle}; +use misc::Oses; +use reqwest::header::ToStrError; +use thiserror::Error; use url::Url; mod book; @@ -55,20 +59,24 @@ pub struct MarkdownArgs { pub no_image_tags: bool, } +lazy_static! { + static ref WARNINGS: Mutex = Mutex::new(GenerationWarnings::new()); +} + /// Generate an audiobook from the given arguments, url, & outputs it to the output directory. /// /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. -pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) { - eprintln!("This is not implemented yet."); +pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) -> Result, GenerationError> { + return Err(GenerationError::GenerationUnsupportedError); } /// Generate an epub file from the given arguments, url, & outputs it to the output directory. /// /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. -pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) { - let book = book::Book::new(book_url); +pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) -> Result, GenerationError> { + let book = book::Book::new(book_url)?; // Initialize the epub builder. let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); @@ -83,11 +91,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB .expect("Unable to add title metadata"); // Download the cover image & add it to the epub. - let cover_image = http::get_response(book.cover_image_url); + let cover_image = http::get_response(book.cover_image_url)?; let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension(); epub_builder.add_cover_image( format!("cover.{cover_file_extension}"), - cover_image.get_bytes().to_vec().as_slice(), + cover_image.get_bytes()?.to_vec().as_slice(), cover_mime_type).expect("Error! Unable to add cover image."); // Generate the cover xhtml. @@ -116,7 +124,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB epub_builder.inline_toc(); // Setup html2xhtml on the operating system. - let html2xhtml_dir = file_system_crap::setup_html2xhtml(); + let html2xhtml_dir = file_system_crap::setup_html2xhtml()?; let mut old_tags_new_tags: HashMap = HashMap::new(); @@ -134,11 +142,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB let mut i: usize = 0; for image_url in book.image_urls_and_tags.keys() { - let image = http::get_response(image_url.clone()); + let image = http::get_response(image_url.clone())?; let (image_mime_type, image_file_extension) = image.get_content_type_and_file_extension(); epub_builder.add_resource( format!("image_{i}.{image_file_extension}"), - image.get_bytes().to_vec().reader(), + image.get_bytes()?.to_vec().reader(), image_mime_type).expect("Error! Unable to add content image"); for image_tag in book.image_urls_and_tags[image_url].clone() { @@ -157,7 +165,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB let xhtml: String; if epub_args.no_images { - xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_dir) + xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_dir)? } else { let mut replaced_html = chapter.isolated_chapter_html.html(); @@ -165,7 +173,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB replaced_html = replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]); } - xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_dir); + xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_dir)?; } epub_builder.add_content(EpubContent::new(format!("chapter_{}.xhtml", i+1), xhtml.as_bytes()) @@ -192,22 +200,24 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB // Delete the html2xhtml temp directory. It's good to clean up after yourself. file_system_crap::delete_html2xhtml(html2xhtml_dir); + + Ok(WARNINGS.lock().unwrap()) } /// Generate an html archive from the given arguments, url, & outputs it to the output directory. /// /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. -pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) { - eprintln!("This is not implemented yet."); +pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) -> Result, GenerationError> { + return Err(GenerationError::GenerationUnsupportedError); } /// Generate a markdown file from the given arguments, url, & outputs it to the output directory. /// /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. -pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) { - let book = book::Book::new(book_url); +pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) -> Result, GenerationError> { + let book = book::Book::new(book_url)?; let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title))); @@ -215,8 +225,7 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { Ok(output_file) => output_file, Err(error) => { - eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy()); - exit(1); + return Err(GenerationError::FileCreationError{error, file_path: output_path}); } }; @@ -248,4 +257,126 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire output_file.write_all(buf.as_bytes()).unwrap(); } + + Ok(WARNINGS.lock().unwrap()) +} + +/// An error struct representing all the documented errors that can occur while archiving a RoyalRoad webnovel. +#[derive(Error, Debug)] +pub enum GenerationError { + /// Represents errors during file creation. + #[error("Unable to create file: {file_path}\n{error}")] + FileCreationError{error: std::io::Error, file_path: PathBuf}, + + /// Represents errors when getting a Response from a Url. + #[error("Unable to get response for: {url}\n{error}")] + ResponseGetError{error: reqwest::Error, url: Url}, + + /// Represents errors when converting a Response to a String. + #[error("Unable to convert response to text: {error}")] + ResponseConvertToTextError{error: reqwest::Error}, + + /// Represents errors when converting a Response to Bytes. + #[error("Unable to convert response to bytes: {error}")] + ResponseConvertToBytesError{error: reqwest::Error}, + + /// Represents errors when trying to parse a String to a Url. + #[error("Unable to parse a valid Url from: {string_url}\n{error}")] + UrlParseError{error: url::ParseError, string_url: String}, + + /// Represents io errors when trying to create a temporary directory. + #[error("Unable to create temporary directory: {error}")] + TempDirCreationError{error: std::io::Error}, + + /// Represents an error when trying to extract the html2xhtml binaries into the temporary directory. + #[error("Unable to extract html2xhtml into the temporary directory: {error}")] + Html2XhtmlExtractionError{error: zip_extract::ZipExtractError}, + + /// Represents an error when trying to start html2xhtml. + #[error("Unable to start html2xhtml: {error}")] + Html2XhtmlStartError{error: std::io::Error}, + + /// Represents an error when trying to find the book title. + #[error("Unable to fetch the book title for: {url}")] + BookTitleFetchError{url: Url}, + + /// Represents an error when trying to find the book author. + #[error("Unable to fetch the book author for: {url}")] + BookAuthorFetchError{url: Url}, + + /// Represents an error when trying to find the book cover image url. + #[error("Unable to fetch the book cover image url: {url}")] + BookCoverImageUrlFetchError{url: Url}, + + /// Represents an error when trying to find the chapter names and urls. + /// + /// This typically occurs due to RoyalRoad changing their json scheme. + #[error("Unable to fetch the chapter names and urls for: {url}")] + BookChapterNameAndUrlFetchError{url: Url}, + + /// Represents an error when trying to isolate the chapter content. + #[error("Unable to isolate chapter content for: {url}")] + ChapterContentIsolationError{url: Url}, + + /// Represents an error for when the target os is unsupported. + #[error("{os} is unsupported")] + OsUnsupportedError{os: Oses}, + + /// Represents an error that shows the generation method is unsupported. + #[error("This generation mode is currently unsupported")] + GenerationUnsupportedError, +} + + +/// A struct that contains a vector of warnings. +pub struct GenerationWarnings{warnings: Vec} + +impl GenerationWarnings { + fn new() -> Self { + GenerationWarnings { + warnings: Vec::new(), + } + } + + /// Push a warning into this struct. + pub fn add_warning(&mut self, warning: Warning) { + self.warnings.push(warning); + } + + pub fn get_warnings(&self) -> &Vec { + &self.warnings + } + + /// Returns how many warnings have been accumulated. + pub fn warnings_count(&self) -> usize { + self.warnings.len() + } +} + +/// An enum to represent a warning. +#[derive(Error, Debug)] +pub enum Warning { + /// Warning for when no ``content-type`` header can be found in the Response headers. + #[error("{warning_msg}")] + MissingContentType { + warning_msg: String, + url: Url, + error: ToStrError, + }, + + /// Warning for when a temporary directory is unable to be deleted. + #[error("{warning_msg}")] + TempDirDeletionError { + warning_msg: String, + temp_directory_path: PathBuf, + error: std::io::Error, + }, + + /// Warning for when the program can not parse a url in an image tag. + #[error("{warning_msg}")] + ImageTagParseError { + warning_msg: String, + raw_image_tag: String, + error: url::ParseError, + } } \ No newline at end of file diff --git a/src/misc.rs b/src/misc.rs index 7b414e1..a0b9f15 100644 --- a/src/misc.rs +++ b/src/misc.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, fmt::Display}; /// An extension to ``std::collections::HashMap>`` pub trait HashMapExt { @@ -31,4 +31,20 @@ impl HashMapExt for Ha return self; } +} + +/// A list of Oses for error handling purposes. +#[derive(Debug)] +pub enum Oses { + Windows, + Linux, + MacOs, + OtherUnknownOs, +} + +/// Implement display for Oses. +impl Display for Oses { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } } \ No newline at end of file