mirror of
https://github.com/Raine-gay/royal_road_archiver.git
synced 2025-01-05 10:28:19 -06:00
Updated to v1.0.1, redid all the error handling, made the library better.
This commit is contained in:
parent
34c7564008
commit
f6725fafcf
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
|
@ -20,6 +20,7 @@
|
|||
"reqwest",
|
||||
"royalroad",
|
||||
"tempdir",
|
||||
"thiserror",
|
||||
"ureq"
|
||||
]
|
||||
}
|
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -1522,7 +1522,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "royal_road_archiver"
|
||||
version = "0.1.1"
|
||||
version = "1.0.1"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
|
@ -1530,12 +1530,14 @@ dependencies = [
|
|||
"epub-builder",
|
||||
"html2md",
|
||||
"indicatif",
|
||||
"lazy_static",
|
||||
"path-slash",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"serde_json",
|
||||
"tempdir",
|
||||
"thiserror",
|
||||
"url",
|
||||
"zip-extract",
|
||||
]
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
[package]
|
||||
name = "royal_road_archiver"
|
||||
version = "0.1.1"
|
||||
version = "1.0.1"
|
||||
edition = "2021"
|
||||
description = "An archival program and library for the webnovel site RoyalRoad."
|
||||
license = "EUPL-1.2 "
|
||||
repository = "https://github.com/Raine-gay/royal_road_archiver"
|
||||
license = "EUPL-1.2"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
|
@ -29,11 +30,13 @@ clap = { version = "4.4.18", features = ["derive"] }
|
|||
epub-builder = "0.7.4"
|
||||
html2md = "0.2.14"
|
||||
indicatif = "0.17.7"
|
||||
lazy_static = "1.4.0"
|
||||
path-slash = "0.2.1"
|
||||
regex = "1.10.3"
|
||||
reqwest = { version = "0.11.23", features = ["blocking", "rustls"] }
|
||||
scraper = "0.18.1"
|
||||
serde_json = "1.0.111"
|
||||
tempdir = "0.3.7"
|
||||
thiserror = "1.0.56"
|
||||
url = "2.5.0"
|
||||
zip-extract = "0.1.3"
|
||||
|
|
1
TODO
Normal file
1
TODO
Normal file
|
@ -0,0 +1 @@
|
|||
Make images download to disk then read from there to avoid high ram usage when processing books that are image heavy.
|
|
@ -1,6 +1,7 @@
|
|||
use std::{env, fs, path::{Path, PathBuf}, process::exit};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use royal_road_archiver_lib::GenerationError;
|
||||
use url::Url;
|
||||
|
||||
#[derive(clap::Parser, Debug)]
|
||||
|
@ -53,11 +54,26 @@ fn main() {
|
|||
valid_directory_check(&output_directory);
|
||||
let book_url = valid_url_check(&cli_input.book_url.to_lowercase());
|
||||
|
||||
match cli_input.subcommand {
|
||||
let result: Result<std::sync::MutexGuard<'_, royal_road_archiver_lib::GenerationWarnings>, GenerationError> = match cli_input.subcommand {
|
||||
Subcommands::Audiobook(audiobook_args) => royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory),
|
||||
Subcommands::Epub(epub_args) => royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory),
|
||||
Subcommands::Html(html_args) => royal_road_archiver_lib::generate_html(html_args, book_url, output_directory),
|
||||
Subcommands::Markdown(markdown_args) => royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(generation_warnings) => {
|
||||
if !&generation_warnings.warnings_count() == 0 {
|
||||
|
||||
println!("The following warnings were generated:");
|
||||
for warning in generation_warnings.get_warnings() {
|
||||
println!("\n{warning}");
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(generation_error) => {
|
||||
eprintln!("{}", generation_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
36
src/book.rs
36
src/book.rs
|
@ -1,7 +1,7 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use crate::misc::HashMapExt;
|
||||
use crate::{misc::HashMapExt, GenerationError};
|
||||
use scraper::Html;
|
||||
use url::Url;
|
||||
|
||||
|
@ -37,10 +37,10 @@ pub struct Book {
|
|||
|
||||
impl Book {
|
||||
/// Generate a new book instance with all the needed data from a given url.
|
||||
pub fn new(book_url: Url) -> Book {
|
||||
let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
|
||||
pub fn new(book_url: Url) -> Result<Book, GenerationError> {
|
||||
let index_html = html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?);
|
||||
|
||||
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
|
||||
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?;
|
||||
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
|
||||
|
||||
let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
|
||||
|
@ -56,7 +56,7 @@ impl Book {
|
|||
|
||||
// Generate the chapters and add em to the book.
|
||||
for i in 0..chapter_names_and_urls.len() {
|
||||
let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
|
||||
let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?;
|
||||
|
||||
// extract the image urls and add em to the image_urls_and_tags hashmap.
|
||||
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html));
|
||||
|
@ -68,18 +68,20 @@ impl Book {
|
|||
|
||||
progress_bar.finish();
|
||||
|
||||
let title = html::get_title_from_index(&index_html);
|
||||
let title = html::get_title_from_index(&index_html, &book_url)?;
|
||||
|
||||
Book {
|
||||
let book = Book {
|
||||
author: html::get_author_from_index(&index_html, &book_url)?,
|
||||
cover_image_url: html::get_cover_image_url_from_index(&index_html, &book_url)?,
|
||||
book_url: book_url,
|
||||
title: title.clone(),
|
||||
file_name_title: remove_illegal_chars(title),
|
||||
author: html::get_author_from_index(&index_html),
|
||||
cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
|
||||
index_html: index_html,
|
||||
chapters: chapters,
|
||||
image_urls_and_tags: image_urls_and_tags,
|
||||
}
|
||||
};
|
||||
|
||||
return Ok(book);
|
||||
}
|
||||
|
||||
/// Count how many paragraphs are in the book.
|
||||
|
@ -105,15 +107,17 @@ pub struct Chapter {
|
|||
}
|
||||
|
||||
impl Chapter {
|
||||
fn new(chapter_name: &str, chapter_url: &str) -> Self {
|
||||
let chapter_url = http::string_to_url(&chapter_url);
|
||||
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
|
||||
fn new(chapter_name: &str, chapter_url: &str) -> Result<Self, GenerationError> {
|
||||
let chapter_url = http::string_to_url(&chapter_url)?;
|
||||
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?);
|
||||
|
||||
Chapter {
|
||||
let chapter = Chapter {
|
||||
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?,
|
||||
chapter_url: chapter_url,
|
||||
chapter_name: chapter_name.to_string(),
|
||||
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html),
|
||||
raw_chapter_html: raw_chapter_html,
|
||||
}
|
||||
};
|
||||
|
||||
return Ok(chapter);
|
||||
}
|
||||
}
|
|
@ -1,8 +1,10 @@
|
|||
use std::{io::Cursor, path::PathBuf, process::exit};
|
||||
use std::{io::Cursor, path::PathBuf};
|
||||
|
||||
use path_slash::PathBufExt as _;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use crate::{misc, GenerationError, Warning, WARNINGS};
|
||||
|
||||
/// Converts a given path to windows style if needed.
|
||||
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
|
||||
// If target os is windows.
|
||||
|
@ -28,65 +30,60 @@ pub fn remove_illegal_chars(mut string: String) -> String {
|
|||
}
|
||||
|
||||
/// Setup html2xhtml in the operating system's temp directory.
|
||||
pub fn setup_html2xhtml() -> TempDir {
|
||||
pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
|
||||
#[cfg(target_os = "windows")] {
|
||||
const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit.
|
||||
// Compile it on linux for windows like a sane person.
|
||||
let html2xhtml_dir = match TempDir::new("html2xhtml-windows") {
|
||||
let html2xhtml_temp_dir = match TempDir::new("html2xhtml-windows") {
|
||||
Ok(temp_dir) => temp_dir,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to create temp directory: {error}");
|
||||
exit(1);
|
||||
}
|
||||
Err(error) => return Err(GenerationError::TempDirCreationError {error}),
|
||||
};
|
||||
|
||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) {
|
||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to extract html2xhtml into into the temp directory\n{error}");
|
||||
exit(1);
|
||||
}
|
||||
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}),
|
||||
}
|
||||
|
||||
return html2xhtml_dir;
|
||||
return Ok(html2xhtml_temp_dir);
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")] {
|
||||
const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip");
|
||||
let html2xhtml_dir = match TempDir::new("html2xhtml-linux") {
|
||||
let html2xhtml_temp_dir = match TempDir::new("html2xhtml-linux") {
|
||||
Ok(temp_dir) => temp_dir,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to create temp directory: {error}");
|
||||
exit(1);
|
||||
}
|
||||
Err(error) => return Err(GenerationError::TempDirCreationError {error}),
|
||||
};
|
||||
|
||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) {
|
||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to extract html2xhtml into the temp directory\n{error}");
|
||||
exit(1);
|
||||
}
|
||||
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}),
|
||||
}
|
||||
|
||||
return html2xhtml_dir;
|
||||
return Ok(html2xhtml_temp_dir);
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")] {
|
||||
// TODO!
|
||||
// You can find the macos tempdir by doing: echo $TMPDIR
|
||||
|
||||
eprint!("Error! This mode does not currently support MacOS. Try either html mode or markdown mode.");
|
||||
exit(1);
|
||||
Err(GenerationError::OsUnsupportedError {os: misc::Oses::MacOs})
|
||||
}
|
||||
|
||||
// In the event the OS is unknown.
|
||||
#[allow(unreachable_code)]
|
||||
Err(GenerationError::OsUnsupportedError {os: misc::Oses::OtherUnknownOs})
|
||||
}
|
||||
|
||||
/// Delete html2xhtml from the operating system's temp directory.
|
||||
pub fn delete_html2xhtml(html2xhtml_dir: TempDir) {
|
||||
let temp_dir_path = html2xhtml_dir.path().to_path_buf();
|
||||
|
||||
match html2xhtml_dir.close() {
|
||||
Ok(_) => (),
|
||||
Err(warning) => {
|
||||
eprintln!("Warning! Unable to close & delete temp directory: {warning}");
|
||||
let warning = Warning::TempDirDeletionError {
|
||||
warning_msg: "Unable to close and delete temp directory".to_string(),
|
||||
temp_directory_path: temp_dir_path,
|
||||
error: warning
|
||||
};
|
||||
WARNINGS.lock().unwrap().add_warning(warning);
|
||||
}
|
||||
}
|
||||
}
|
60
src/html.rs
60
src/html.rs
|
@ -1,11 +1,11 @@
|
|||
use std::{collections::HashMap, io::Write, process::{exit, Command, Stdio}};
|
||||
use std::{collections::HashMap, io::Write, process::{Command, Stdio}};
|
||||
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use tempdir::TempDir;
|
||||
use url::Url;
|
||||
|
||||
use crate::misc::HashMapExt;
|
||||
use crate::{http, misc::HashMapExt, GenerationError, Warning, WARNINGS};
|
||||
|
||||
/// Convert a string to an html document.
|
||||
pub fn string_to_html_document(document_string: &str) -> Html {
|
||||
|
@ -18,7 +18,7 @@ pub fn string_to_html_fragment(fragment_string: &str) -> Html {
|
|||
}
|
||||
|
||||
/// Get the book's title from the index.
|
||||
pub fn get_title_from_index(index_html: &Html) -> String {
|
||||
pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
|
||||
let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
|
||||
for element in index_html.select(&selector) {
|
||||
// Loop through all meta tags in the html document.
|
||||
|
@ -28,58 +28,55 @@ pub fn get_title_from_index(index_html: &Html) -> String {
|
|||
Some(x) => {
|
||||
if x == "twitter:title" {
|
||||
// If it does contain attribute "name", check if the content of that attribute is "twitter:title"
|
||||
return element.value().attr("content").unwrap().to_owned();
|
||||
return Ok(element.value().attr("content").unwrap().to_owned());
|
||||
// If it is, extract the data from the content attribute.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||
exit(1);
|
||||
Err(GenerationError::BookTitleFetchError{url: book_url.clone()})
|
||||
}
|
||||
|
||||
/// Get the book's author from index
|
||||
pub fn get_author_from_index(index_html: &Html) -> String {
|
||||
pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
|
||||
let selector = Selector::parse("meta").unwrap();
|
||||
for element in index_html.select(&selector) {
|
||||
match element.value().attr("property") {
|
||||
None => continue,
|
||||
Some(x) => {
|
||||
if x == "books:author" {
|
||||
return element.value().attr("content").unwrap().to_owned();
|
||||
return Ok(element.value().attr("content").unwrap().to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||
exit(1);
|
||||
Err(GenerationError::BookAuthorFetchError{url: book_url.clone()})
|
||||
}
|
||||
|
||||
/// Get the book's cover image url from the index
|
||||
pub fn get_cover_image_url_from_index(index_html: &Html) -> String {
|
||||
pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Result<Url, GenerationError> {
|
||||
let selector = Selector::parse("meta").unwrap();
|
||||
for element in index_html.select(&selector) {
|
||||
match element.value().attr("property") {
|
||||
None => continue,
|
||||
Some(x) => {
|
||||
if x == "og:image" {
|
||||
return element.value().attr("content").unwrap().to_owned();
|
||||
return http::string_to_url(element.value().attr("content").unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||
exit(1);
|
||||
Err(GenerationError::BookCoverImageUrlFetchError{url: book_url.clone()})
|
||||
}
|
||||
|
||||
/// Gets the chapter names and urls from the index.
|
||||
///
|
||||
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
|
||||
pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> {
|
||||
pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url) -> Result<Vec<(String, String)>, GenerationError> {
|
||||
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
|
||||
// I've commented in the gist of it, but we have no memory actually writing this function.
|
||||
|
||||
let mut chapters: Vec<[String; 2]> = Vec::new();
|
||||
let mut chapters: Vec<(String, String)> = Vec::new();
|
||||
let mut raw_json_data = String::new();
|
||||
|
||||
// Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
|
||||
|
@ -92,8 +89,7 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String;
|
|||
}
|
||||
// Exit it if unable to find the needed json data. That probably means royal road has changed their code.
|
||||
if raw_json_data.is_empty() {
|
||||
eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||
exit(1);
|
||||
return Err(GenerationError::BookChapterNameAndUrlFetchError { url: book_url.clone()});
|
||||
}
|
||||
|
||||
// I have absolutely no idea what this regex does; but it's probably important.
|
||||
|
@ -118,15 +114,15 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String;
|
|||
chapter["url"].to_string().replace('"', "")
|
||||
);
|
||||
|
||||
chapters.push([chapter_name, url]);
|
||||
chapters.push((chapter_name, url));
|
||||
}
|
||||
|
||||
// Return that wanker.
|
||||
return chapters;
|
||||
return Ok(chapters);
|
||||
}
|
||||
|
||||
/// Isolate chapter content from the rest of the shit on the page.
|
||||
pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html {
|
||||
pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Result<Html, GenerationError> {
|
||||
let page_html = Html::parse_document(&raw_chapter_html.html());
|
||||
|
||||
let selector = Selector::parse("div").unwrap();
|
||||
|
@ -135,13 +131,12 @@ pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html {
|
|||
None => continue,
|
||||
Some(x) => {
|
||||
if x == "chapter-inner chapter-content" {
|
||||
return string_to_html_fragment(&element.inner_html());
|
||||
return Ok(string_to_html_fragment(&element.inner_html()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
eprintln!("Error! Unable to isolate chapter content");
|
||||
exit(1);
|
||||
Err(GenerationError::ChapterContentIsolationError{url: chapter_url.clone()})
|
||||
}
|
||||
|
||||
/// Remove all img tags from the html fragment.
|
||||
|
@ -178,7 +173,13 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>
|
|||
let url = match Url::parse(url.unwrap()) {
|
||||
Ok(url) => url,
|
||||
Err(warning) => {
|
||||
eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{warning}");
|
||||
let warning = Warning::ImageTagParseError {
|
||||
warning_msg: "Unable to parse url in image tag".to_string(),
|
||||
raw_image_tag: image_tag,
|
||||
error: warning,
|
||||
};
|
||||
WARNINGS.lock().unwrap().add_warning(warning);
|
||||
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
@ -213,7 +214,7 @@ pub fn replace_img_src(img_tag: String, new_src: String) -> String {
|
|||
}
|
||||
|
||||
/// Convert a given html dom into xhtml.
|
||||
pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String {
|
||||
pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result<String, GenerationError> {
|
||||
#[cfg(target_os = "windows")]
|
||||
const HTML2XHTML_ENTRY: &str = "html2xhtml.exe";
|
||||
|
||||
|
@ -233,10 +234,7 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String {
|
|||
.spawn()
|
||||
{
|
||||
Ok(child) => child,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to start html2xhtml: {error}");
|
||||
exit(1);
|
||||
},
|
||||
Err(error) => return Err(GenerationError::Html2XhtmlStartError{error}),
|
||||
};
|
||||
|
||||
// Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
|
||||
|
@ -246,5 +244,5 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String {
|
|||
// Generate a lossy string from the stdout.
|
||||
let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string();
|
||||
|
||||
return xhtml;
|
||||
return Ok(xhtml);
|
||||
}
|
48
src/http.rs
48
src/http.rs
|
@ -1,8 +1,10 @@
|
|||
use std::{collections::HashMap, process::exit};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::{blocking::Response, header::HeaderMap};
|
||||
use url::Url;
|
||||
|
||||
use crate::{GenerationError, Warning, WARNINGS};
|
||||
|
||||
// A struct representing an HttpResponse and the Url it originated from.
|
||||
pub struct HttpResponse {
|
||||
url: Url,
|
||||
|
@ -16,24 +18,18 @@ impl HttpResponse {
|
|||
}
|
||||
|
||||
/// Attempt to convert the response to text. Exits the program if it fails.
|
||||
pub fn get_text(self) -> String {
|
||||
pub fn get_text(self) -> Result<String, GenerationError> {
|
||||
match self.response.text() {
|
||||
Ok(response_text) => response_text,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url);
|
||||
exit(1);
|
||||
}
|
||||
Ok(response_text) => Ok(response_text),
|
||||
Err(error) => Err(GenerationError::ResponseConvertToTextError {error}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
|
||||
pub fn get_bytes(self) -> bytes::Bytes{
|
||||
pub fn get_bytes(self) -> Result<bytes::Bytes, GenerationError>{
|
||||
match self.response.bytes() {
|
||||
Ok(response_bytes) => response_bytes,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url);
|
||||
exit(1);
|
||||
}
|
||||
Ok(response_bytes) => Ok(response_bytes),
|
||||
Err(error) => Err(GenerationError::ResponseConvertToBytesError {error}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,7 +48,13 @@ impl HttpResponse {
|
|||
let content_type = match self.get_headers()["content-type"].to_str() {
|
||||
Ok(content_type) => content_type,
|
||||
Err(warning) => {
|
||||
eprintln!("Warning! Unable to get content type from the http-header: {warning}");
|
||||
let warning = Warning::MissingContentType {
|
||||
warning_msg: "Unable to find or parse the content-type header".to_string(),
|
||||
url: self.url.clone(),
|
||||
error: warning,
|
||||
};
|
||||
WARNINGS.lock().unwrap().add_warning(warning);
|
||||
|
||||
return (String::with_capacity(0), String::with_capacity(0));
|
||||
}
|
||||
};
|
||||
|
@ -67,25 +69,19 @@ impl HttpResponse {
|
|||
}
|
||||
|
||||
/// Get an http response for a given url. Exits the program if it fails.
|
||||
pub fn get_response(url: Url) -> HttpResponse {
|
||||
pub fn get_response(url: Url) -> Result<HttpResponse, GenerationError> {
|
||||
let response_result = reqwest::blocking::get(url.clone());
|
||||
|
||||
match response_result {
|
||||
Ok(response) => HttpResponse { url, response },
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to get a response from: {url}\n{error}");
|
||||
exit(1);
|
||||
},
|
||||
Ok(response) => Ok(HttpResponse { url, response }),
|
||||
Err(error) => return Err(GenerationError::ResponseGetError {error, url}),
|
||||
}
|
||||
}
|
||||
|
||||
/// A function to convert a string to a url. Exits the program if it fails.
|
||||
pub fn string_to_url(url: &str) -> Url {
|
||||
pub fn string_to_url(url: &str) -> Result<Url, GenerationError> {
|
||||
match Url::parse(url) {
|
||||
Ok(url) => url,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to parse: {url} into a valid url.\n{error}");
|
||||
exit(1);
|
||||
}
|
||||
Ok(url) => Ok(url),
|
||||
Err(error) => Err(GenerationError::UrlParseError {error, string_url: url.to_string()}),
|
||||
}
|
||||
}
|
167
src/library.rs
167
src/library.rs
|
@ -1,4 +1,4 @@
|
|||
use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit};
|
||||
use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit, sync::{Mutex, MutexGuard}};
|
||||
|
||||
use bytes::Buf;
|
||||
use chrono::prelude::Local;
|
||||
|
@ -6,7 +6,11 @@ use clap::Args;
|
|||
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
|
||||
use file_system_crap::convert_path_to_os_specific;
|
||||
use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
|
||||
use lazy_static::lazy_static;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use misc::Oses;
|
||||
use reqwest::header::ToStrError;
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
mod book;
|
||||
|
@ -55,20 +59,24 @@ pub struct MarkdownArgs {
|
|||
pub no_image_tags: bool,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref WARNINGS: Mutex<GenerationWarnings> = Mutex::new(GenerationWarnings::new());
|
||||
}
|
||||
|
||||
/// Generate an audiobook from the given arguments, url, & outputs it to the output directory.
|
||||
///
|
||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||
pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) {
|
||||
eprintln!("This is not implemented yet.");
|
||||
pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||
return Err(GenerationError::GenerationUnsupportedError);
|
||||
}
|
||||
|
||||
/// Generate an epub file from the given arguments, url, & outputs it to the output directory.
|
||||
///
|
||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||
pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) {
|
||||
let book = book::Book::new(book_url);
|
||||
pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||
let book = book::Book::new(book_url)?;
|
||||
|
||||
// Initialize the epub builder.
|
||||
let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
|
@ -83,11 +91,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
.expect("Unable to add title metadata");
|
||||
|
||||
// Download the cover image & add it to the epub.
|
||||
let cover_image = http::get_response(book.cover_image_url);
|
||||
let cover_image = http::get_response(book.cover_image_url)?;
|
||||
let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
|
||||
epub_builder.add_cover_image(
|
||||
format!("cover.{cover_file_extension}"),
|
||||
cover_image.get_bytes().to_vec().as_slice(),
|
||||
cover_image.get_bytes()?.to_vec().as_slice(),
|
||||
cover_mime_type).expect("Error! Unable to add cover image.");
|
||||
|
||||
// Generate the cover xhtml.
|
||||
|
@ -116,7 +124,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
epub_builder.inline_toc();
|
||||
|
||||
// Setup html2xhtml on the operating system.
|
||||
let html2xhtml_dir = file_system_crap::setup_html2xhtml();
|
||||
let html2xhtml_dir = file_system_crap::setup_html2xhtml()?;
|
||||
|
||||
let mut old_tags_new_tags: HashMap<String, String> = HashMap::new();
|
||||
|
||||
|
@ -134,11 +142,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
|
||||
let mut i: usize = 0;
|
||||
for image_url in book.image_urls_and_tags.keys() {
|
||||
let image = http::get_response(image_url.clone());
|
||||
let image = http::get_response(image_url.clone())?;
|
||||
let (image_mime_type, image_file_extension) = image.get_content_type_and_file_extension();
|
||||
epub_builder.add_resource(
|
||||
format!("image_{i}.{image_file_extension}"),
|
||||
image.get_bytes().to_vec().reader(),
|
||||
image.get_bytes()?.to_vec().reader(),
|
||||
image_mime_type).expect("Error! Unable to add content image");
|
||||
|
||||
for image_tag in book.image_urls_and_tags[image_url].clone() {
|
||||
|
@ -157,7 +165,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
|
||||
let xhtml: String;
|
||||
if epub_args.no_images {
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_dir)
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_dir)?
|
||||
}
|
||||
else {
|
||||
let mut replaced_html = chapter.isolated_chapter_html.html();
|
||||
|
@ -165,7 +173,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
replaced_html = replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]);
|
||||
}
|
||||
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_dir);
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_dir)?;
|
||||
}
|
||||
|
||||
epub_builder.add_content(EpubContent::new(format!("chapter_{}.xhtml", i+1), xhtml.as_bytes())
|
||||
|
@ -192,22 +200,24 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
|
||||
// Delete the html2xhtml temp directory. It's good to clean up after yourself.
|
||||
file_system_crap::delete_html2xhtml(html2xhtml_dir);
|
||||
|
||||
Ok(WARNINGS.lock().unwrap())
|
||||
}
|
||||
|
||||
/// Generate an html archive from the given arguments, url, & outputs it to the output directory.
|
||||
///
|
||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||
pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) {
|
||||
eprintln!("This is not implemented yet.");
|
||||
pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||
return Err(GenerationError::GenerationUnsupportedError);
|
||||
}
|
||||
|
||||
/// Generate a markdown file from the given arguments, url, & outputs it to the output directory.
|
||||
///
|
||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
|
||||
let book = book::Book::new(book_url);
|
||||
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||
let book = book::Book::new(book_url)?;
|
||||
|
||||
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title)));
|
||||
|
||||
|
@ -215,8 +225,7 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
|
|||
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) {
|
||||
Ok(output_file) => output_file,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy());
|
||||
exit(1);
|
||||
return Err(GenerationError::FileCreationError{error, file_path: output_path});
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -248,4 +257,126 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
|
|||
|
||||
output_file.write_all(buf.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
Ok(WARNINGS.lock().unwrap())
|
||||
}
|
||||
|
||||
/// An error struct representing all the documented errors that can occur while archiving a RoyalRoad webnovel.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum GenerationError {
|
||||
/// Represents errors during file creation.
|
||||
#[error("Unable to create file: {file_path}\n{error}")]
|
||||
FileCreationError{error: std::io::Error, file_path: PathBuf},
|
||||
|
||||
/// Represents errors when getting a Response from a Url.
|
||||
#[error("Unable to get response for: {url}\n{error}")]
|
||||
ResponseGetError{error: reqwest::Error, url: Url},
|
||||
|
||||
/// Represents errors when converting a Response to a String.
|
||||
#[error("Unable to convert response to text: {error}")]
|
||||
ResponseConvertToTextError{error: reqwest::Error},
|
||||
|
||||
/// Represents errors when converting a Response to Bytes.
|
||||
#[error("Unable to convert response to bytes: {error}")]
|
||||
ResponseConvertToBytesError{error: reqwest::Error},
|
||||
|
||||
/// Represents errors when trying to parse a String to a Url.
|
||||
#[error("Unable to parse a valid Url from: {string_url}\n{error}")]
|
||||
UrlParseError{error: url::ParseError, string_url: String},
|
||||
|
||||
/// Represents io errors when trying to create a temporary directory.
|
||||
#[error("Unable to create temporary directory: {error}")]
|
||||
TempDirCreationError{error: std::io::Error},
|
||||
|
||||
/// Represents an error when trying to extract the html2xhtml binaries into the temporary directory.
|
||||
#[error("Unable to extract html2xhtml into the temporary directory: {error}")]
|
||||
Html2XhtmlExtractionError{error: zip_extract::ZipExtractError},
|
||||
|
||||
/// Represents an error when trying to start html2xhtml.
|
||||
#[error("Unable to start html2xhtml: {error}")]
|
||||
Html2XhtmlStartError{error: std::io::Error},
|
||||
|
||||
/// Represents an error when trying to find the book title.
|
||||
#[error("Unable to fetch the book title for: {url}")]
|
||||
BookTitleFetchError{url: Url},
|
||||
|
||||
/// Represents an error when trying to find the book author.
|
||||
#[error("Unable to fetch the book author for: {url}")]
|
||||
BookAuthorFetchError{url: Url},
|
||||
|
||||
/// Represents an error when trying to find the book cover image url.
|
||||
#[error("Unable to fetch the book cover image url: {url}")]
|
||||
BookCoverImageUrlFetchError{url: Url},
|
||||
|
||||
/// Represents an error when trying to find the chapter names and urls.
|
||||
///
|
||||
/// This typically occurs due to RoyalRoad changing their json scheme.
|
||||
#[error("Unable to fetch the chapter names and urls for: {url}")]
|
||||
BookChapterNameAndUrlFetchError{url: Url},
|
||||
|
||||
/// Represents an error when trying to isolate the chapter content.
|
||||
#[error("Unable to isolate chapter content for: {url}")]
|
||||
ChapterContentIsolationError{url: Url},
|
||||
|
||||
/// Represents an error for when the target os is unsupported.
|
||||
#[error("{os} is unsupported")]
|
||||
OsUnsupportedError{os: Oses},
|
||||
|
||||
/// Represents an error that shows the generation method is unsupported.
|
||||
#[error("This generation mode is currently unsupported")]
|
||||
GenerationUnsupportedError,
|
||||
}
|
||||
|
||||
|
||||
/// A struct that contains a vector of warnings.
|
||||
pub struct GenerationWarnings{warnings: Vec<Warning>}
|
||||
|
||||
impl GenerationWarnings {
|
||||
fn new() -> Self {
|
||||
GenerationWarnings {
|
||||
warnings: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a warning into this struct.
|
||||
pub fn add_warning(&mut self, warning: Warning) {
|
||||
self.warnings.push(warning);
|
||||
}
|
||||
|
||||
pub fn get_warnings(&self) -> &Vec<Warning> {
|
||||
&self.warnings
|
||||
}
|
||||
|
||||
/// Returns how many warnings have been accumulated.
|
||||
pub fn warnings_count(&self) -> usize {
|
||||
self.warnings.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// An enum to represent a warning.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Warning {
|
||||
/// Warning for when no ``content-type`` header can be found in the Response headers.
|
||||
#[error("{warning_msg}")]
|
||||
MissingContentType {
|
||||
warning_msg: String,
|
||||
url: Url,
|
||||
error: ToStrError,
|
||||
},
|
||||
|
||||
/// Warning for when a temporary directory is unable to be deleted.
|
||||
#[error("{warning_msg}")]
|
||||
TempDirDeletionError {
|
||||
warning_msg: String,
|
||||
temp_directory_path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
/// Warning for when the program can not parse a url in an image tag.
|
||||
#[error("{warning_msg}")]
|
||||
ImageTagParseError {
|
||||
warning_msg: String,
|
||||
raw_image_tag: String,
|
||||
error: url::ParseError,
|
||||
}
|
||||
}
|
18
src/misc.rs
18
src/misc.rs
|
@ -1,4 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, fmt::Display};
|
||||
|
||||
/// An extension to ``std::collections::HashMap<K, Vec<String>>``
|
||||
pub trait HashMapExt<K> {
|
||||
|
@ -31,4 +31,20 @@ impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for Ha
|
|||
|
||||
return self;
|
||||
}
|
||||
}
|
||||
|
||||
/// A list of Oses for error handling purposes.
|
||||
#[derive(Debug)]
|
||||
pub enum Oses {
|
||||
Windows,
|
||||
Linux,
|
||||
MacOs,
|
||||
OtherUnknownOs,
|
||||
}
|
||||
|
||||
/// Implement display for Oses.
|
||||
impl Display for Oses {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue