mirror of
https://github.com/Raine-gay/royal_road_archiver.git
synced 2024-11-23 15:48:41 -06:00
Formatted through rustfmt
This commit is contained in:
parent
3df3d05c34
commit
50b1b93436
5
rustfmt.toml
Normal file
5
rustfmt.toml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
match_block_trailing_comma = true
|
||||||
|
merge_imports = true
|
||||||
|
normalize_comments = true
|
||||||
|
normalize_doc_attributes = true
|
||||||
|
use_try_shorthand = true
|
|
@ -1,4 +1,8 @@
|
||||||
use std::{env, fs, path::{Path, PathBuf}, process::exit};
|
use std::{
|
||||||
|
env, fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
process::exit,
|
||||||
|
};
|
||||||
|
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use royal_road_archiver_lib::GenerationError;
|
use royal_road_archiver_lib::GenerationError;
|
||||||
|
@ -48,23 +52,33 @@ fn main() {
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
output_directory = env::current_dir().unwrap();
|
output_directory = env::current_dir().unwrap();
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
valid_directory_check(&output_directory);
|
valid_directory_check(&output_directory);
|
||||||
let book_url = valid_url_check(&cli_input.book_url.to_lowercase());
|
let book_url = valid_url_check(&cli_input.book_url.to_lowercase());
|
||||||
|
|
||||||
let result: Result<std::sync::MutexGuard<'_, royal_road_archiver_lib::GenerationWarnings>, GenerationError> = match cli_input.subcommand {
|
let result: Result<
|
||||||
Subcommands::Audiobook(audiobook_args) => royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory),
|
std::sync::MutexGuard<'_, royal_road_archiver_lib::GenerationWarnings>,
|
||||||
Subcommands::Epub(epub_args) => royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory),
|
GenerationError,
|
||||||
Subcommands::Html(html_args) => royal_road_archiver_lib::generate_html(html_args, book_url, output_directory),
|
> = match cli_input.subcommand {
|
||||||
Subcommands::Markdown(markdown_args) => royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory),
|
Subcommands::Audiobook(audiobook_args) => {
|
||||||
|
royal_road_archiver_lib::generate_audiobook(audiobook_args, book_url, output_directory)
|
||||||
|
},
|
||||||
|
Subcommands::Epub(epub_args) => {
|
||||||
|
royal_road_archiver_lib::generate_epub(epub_args, book_url, output_directory)
|
||||||
|
},
|
||||||
|
Subcommands::Html(html_args) => {
|
||||||
|
royal_road_archiver_lib::generate_html(html_args, book_url, output_directory)
|
||||||
|
},
|
||||||
|
Subcommands::Markdown(markdown_args) => {
|
||||||
|
royal_road_archiver_lib::generate_markdown(markdown_args, book_url, output_directory)
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(generation_warnings) => {
|
Ok(generation_warnings) => {
|
||||||
if !&generation_warnings.warnings_count() == 0 {
|
if !&generation_warnings.warnings_count() == 0 {
|
||||||
|
|
||||||
println!("The following warnings were generated:");
|
println!("The following warnings were generated:");
|
||||||
for warning in generation_warnings.get_warnings() {
|
for warning in generation_warnings.get_warnings() {
|
||||||
println!("\n{warning}");
|
println!("\n{warning}");
|
||||||
|
@ -73,7 +87,7 @@ fn main() {
|
||||||
},
|
},
|
||||||
Err(generation_error) => {
|
Err(generation_error) => {
|
||||||
eprintln!("{}", generation_error);
|
eprintln!("{}", generation_error);
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +102,7 @@ fn valid_directory_check(output_directory: &Path) {
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
eprintln!("Error! Unable to create directory: {error}");
|
eprintln!("Error! Unable to create directory: {error}");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,11 +111,9 @@ fn valid_directory_check(output_directory: &Path) {
|
||||||
fn valid_url_check(book_url: &str) -> Url {
|
fn valid_url_check(book_url: &str) -> Url {
|
||||||
match Url::parse(book_url) {
|
match Url::parse(book_url) {
|
||||||
Ok(book_url) => {
|
Ok(book_url) => {
|
||||||
|
|
||||||
if book_url.host_str() == Some("www.royalroad.com") {
|
if book_url.host_str() == Some("www.royalroad.com") {
|
||||||
return book_url;
|
return book_url;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
eprintln!("Error! Please enter a RoyalRoad URL.");
|
eprintln!("Error! Please enter a RoyalRoad URL.");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -109,6 +121,6 @@ fn valid_url_check(book_url: &str) -> Url {
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
eprintln!("Error! Unable to parse url: {book_url}\n{error}");
|
eprintln!("Error! Unable to parse url: {book_url}\n{error}");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
23
src/book.rs
23
src/book.rs
|
@ -1,7 +1,7 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
|
||||||
use crate::{misc::HashMapExt, GenerationError};
|
use crate::{misc::HashMapExt, GenerationError};
|
||||||
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use scraper::Html;
|
use scraper::Html;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
@ -38,9 +38,11 @@ pub struct Book {
|
||||||
impl Book {
|
impl Book {
|
||||||
/// Generate a new book instance with all the needed data from a given url.
|
/// Generate a new book instance with all the needed data from a given url.
|
||||||
pub fn new(book_url: Url) -> Result<Book, GenerationError> {
|
pub fn new(book_url: Url) -> Result<Book, GenerationError> {
|
||||||
let index_html = html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?);
|
let index_html =
|
||||||
|
html::string_to_html_document(&http::get_response(book_url.clone())?.get_text()?);
|
||||||
|
|
||||||
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?;
|
let chapter_names_and_urls =
|
||||||
|
html::get_chapter_names_and_urls_from_index(&index_html, &book_url)?;
|
||||||
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
|
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
|
||||||
|
|
||||||
let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
|
let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
|
||||||
|
@ -49,9 +51,11 @@ impl Book {
|
||||||
// Spawn a progress bar showing how many chapters have been downloaded & processed.
|
// Spawn a progress bar showing how many chapters have been downloaded & processed.
|
||||||
let progress_bar = ProgressBar::new(chapter_names_and_urls.len().try_into().unwrap());
|
let progress_bar = ProgressBar::new(chapter_names_and_urls.len().try_into().unwrap());
|
||||||
progress_bar.set_style(
|
progress_bar.set_style(
|
||||||
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ")
|
ProgressStyle::with_template(
|
||||||
.unwrap()
|
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ",
|
||||||
.progress_chars("#>-"),
|
)
|
||||||
|
.unwrap()
|
||||||
|
.progress_chars("#>-"),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Generate the chapters and add em to the book.
|
// Generate the chapters and add em to the book.
|
||||||
|
@ -59,7 +63,9 @@ impl Book {
|
||||||
let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?;
|
let chapter = Chapter::new(&chapter_names_and_urls[i].0, &chapter_names_and_urls[i].1)?;
|
||||||
|
|
||||||
// extract the image urls and add em to the image_urls_and_tags hashmap.
|
// extract the image urls and add em to the image_urls_and_tags hashmap.
|
||||||
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html));
|
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(
|
||||||
|
&chapter.isolated_chapter_html,
|
||||||
|
));
|
||||||
|
|
||||||
chapters.push(chapter);
|
chapters.push(chapter);
|
||||||
|
|
||||||
|
@ -109,7 +115,8 @@ pub struct Chapter {
|
||||||
impl Chapter {
|
impl Chapter {
|
||||||
fn new(chapter_name: &str, chapter_url: &str) -> Result<Self, GenerationError> {
|
fn new(chapter_name: &str, chapter_url: &str) -> Result<Self, GenerationError> {
|
||||||
let chapter_url = http::string_to_url(&chapter_url)?;
|
let chapter_url = http::string_to_url(&chapter_url)?;
|
||||||
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?);
|
let raw_chapter_html =
|
||||||
|
html::string_to_html_document(&http::get_response(chapter_url.clone())?.get_text()?);
|
||||||
|
|
||||||
let chapter = Chapter {
|
let chapter = Chapter {
|
||||||
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?,
|
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html, &chapter_url)?,
|
||||||
|
|
|
@ -8,12 +8,14 @@ use crate::{misc, GenerationError, Warning, WARNINGS};
|
||||||
/// Converts a given path to windows style if needed.
|
/// Converts a given path to windows style if needed.
|
||||||
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
|
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
|
||||||
// If target os is windows.
|
// If target os is windows.
|
||||||
#[cfg(target_os = "windows")] {
|
#[cfg(target_os = "windows")]
|
||||||
|
{
|
||||||
return PathBuf::from_slash_lossy(path.into_os_string());
|
return PathBuf::from_slash_lossy(path.into_os_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
// If target os is not windows.
|
// If target os is not windows.
|
||||||
#[cfg(not(target_os = "windows"))] {
|
#[cfg(not(target_os = "windows"))]
|
||||||
|
{
|
||||||
return PathBuf::from_backslash_lossy(path.into_os_string());
|
return PathBuf::from_backslash_lossy(path.into_os_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,45 +33,52 @@ pub fn remove_illegal_chars(mut string: String) -> String {
|
||||||
|
|
||||||
/// Setup html2xhtml in the operating system's temp directory.
|
/// Setup html2xhtml in the operating system's temp directory.
|
||||||
pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
|
pub fn setup_html2xhtml() -> Result<TempDir, GenerationError> {
|
||||||
#[cfg(target_os = "windows")] {
|
#[cfg(target_os = "windows")]
|
||||||
|
{
|
||||||
const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit.
|
const HTML2XHTML: &[u8; 245025] = include_bytes!("../html2xhtml-windows.zip"); // This will not compile on windows due to this and no I don't give a shit.
|
||||||
// Compile it on linux for windows like a sane person.
|
// Compile it on linux for windows like a sane person.
|
||||||
let html2xhtml_temp_dir = create_temp_dir()?;
|
let html2xhtml_temp_dir = create_temp_dir()?;
|
||||||
|
|
||||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}),
|
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError { error }),
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(html2xhtml_temp_dir);
|
return Ok(html2xhtml_temp_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_os = "linux")] {
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip");
|
const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip");
|
||||||
let html2xhtml_temp_dir = create_temp_dir()?;
|
let html2xhtml_temp_dir = create_temp_dir()?;
|
||||||
|
|
||||||
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_temp_dir.path(), true) {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError {error}),
|
Err(error) => return Err(GenerationError::Html2XhtmlExtractionError { error }),
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(html2xhtml_temp_dir);
|
return Ok(html2xhtml_temp_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_os = "macos")] {
|
#[cfg(target_os = "macos")]
|
||||||
Err(GenerationError::OsUnsupportedError {os: misc::Oses::MacOs})
|
{
|
||||||
|
Err(GenerationError::OsUnsupportedError {
|
||||||
|
os: misc::Oses::MacOs,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// In the event the OS is unknown.
|
// In the event the OS is unknown.
|
||||||
#[allow(unreachable_code)]
|
#[allow(unreachable_code)]
|
||||||
Err(GenerationError::OsUnsupportedError {os: misc::Oses::OtherUnknownOs})
|
Err(GenerationError::OsUnsupportedError {
|
||||||
|
os: misc::Oses::OtherUnknownOs,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Function to create a temporary directory.
|
/// Function to create a temporary directory.
|
||||||
fn create_temp_dir() -> Result<TempDir, GenerationError> {
|
fn create_temp_dir() -> Result<TempDir, GenerationError> {
|
||||||
match TempDir::new() {
|
match TempDir::new() {
|
||||||
Ok(temp_dir) => return Ok(temp_dir),
|
Ok(temp_dir) => return Ok(temp_dir),
|
||||||
Err(error) => return Err(GenerationError::TempDirCreationError {error}),
|
Err(error) => return Err(GenerationError::TempDirCreationError { error }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,9 +92,9 @@ pub fn delete_temp_dir(temp_dir: TempDir) {
|
||||||
let warning = Warning::TempDirDeletionError {
|
let warning = Warning::TempDirDeletionError {
|
||||||
warning_msg: "Unable to close and delete temp directory".to_string(),
|
warning_msg: "Unable to close and delete temp directory".to_string(),
|
||||||
temp_directory_path: temp_dir_path,
|
temp_directory_path: temp_dir_path,
|
||||||
error: warning
|
error: warning,
|
||||||
};
|
};
|
||||||
WARNINGS.lock().unwrap().add_warning(warning);
|
WARNINGS.lock().unwrap().add_warning(warning);
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
95
src/html.rs
95
src/html.rs
|
@ -1,4 +1,8 @@
|
||||||
use std::{collections::HashMap, io::Write, process::{Command, Stdio}};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::Write,
|
||||||
|
process::{Command, Stdio},
|
||||||
|
};
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
@ -20,21 +24,23 @@ pub fn string_to_html_fragment(fragment_string: &str) -> Html {
|
||||||
/// Get the book's title from the index.
|
/// Get the book's title from the index.
|
||||||
pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
|
pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
|
||||||
let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
|
let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
|
||||||
for element in index_html.select(&selector) {
|
for element in index_html.select(&selector) {
|
||||||
// Loop through all meta tags in the html document.
|
// Loop through all meta tags in the html document.
|
||||||
match element.value().attr("name") {
|
match element.value().attr("name") {
|
||||||
// Check if the meta tag contains attribute: "name"
|
// Check if the meta tag contains attribute: "name"
|
||||||
None => continue,
|
None => continue,
|
||||||
Some(x) => {
|
Some(x) => {
|
||||||
if x == "twitter:title" {
|
if x == "twitter:title" {
|
||||||
// If it does contain attribute "name", check if the content of that attribute is "twitter:title"
|
// If it does contain attribute "name", check if the content of that attribute is "twitter:title"
|
||||||
return Ok(element.value().attr("content").unwrap().to_owned());
|
return Ok(element.value().attr("content").unwrap().to_owned());
|
||||||
// If it is, extract the data from the content attribute.
|
// If it is, extract the data from the content attribute.
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
Err(GenerationError::BookTitleFetchError{url: book_url.clone()})
|
}
|
||||||
|
Err(GenerationError::BookTitleFetchError {
|
||||||
|
url: book_url.clone(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the book's author from index
|
/// Get the book's author from index
|
||||||
|
@ -47,14 +53,19 @@ pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result<String
|
||||||
if x == "books:author" {
|
if x == "books:author" {
|
||||||
return Ok(element.value().attr("content").unwrap().to_owned());
|
return Ok(element.value().attr("content").unwrap().to_owned());
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(GenerationError::BookAuthorFetchError{url: book_url.clone()})
|
Err(GenerationError::BookAuthorFetchError {
|
||||||
|
url: book_url.clone(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the book's cover image url from the index
|
/// Get the book's cover image url from the index
|
||||||
pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Result<Url, GenerationError> {
|
pub fn get_cover_image_url_from_index(
|
||||||
|
index_html: &Html,
|
||||||
|
book_url: &Url,
|
||||||
|
) -> Result<Url, GenerationError> {
|
||||||
let selector = Selector::parse("meta").unwrap();
|
let selector = Selector::parse("meta").unwrap();
|
||||||
for element in index_html.select(&selector) {
|
for element in index_html.select(&selector) {
|
||||||
match element.value().attr("property") {
|
match element.value().attr("property") {
|
||||||
|
@ -63,16 +74,21 @@ pub fn get_cover_image_url_from_index(index_html: &Html, book_url: &Url) -> Resu
|
||||||
if x == "og:image" {
|
if x == "og:image" {
|
||||||
return http::string_to_url(element.value().attr("content").unwrap());
|
return http::string_to_url(element.value().attr("content").unwrap());
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(GenerationError::BookCoverImageUrlFetchError{url: book_url.clone()})
|
Err(GenerationError::BookCoverImageUrlFetchError {
|
||||||
|
url: book_url.clone(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the chapter names and urls from the index.
|
/// Gets the chapter names and urls from the index.
|
||||||
///
|
///
|
||||||
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
|
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
|
||||||
pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url) -> Result<Vec<(String, String)>, GenerationError> {
|
pub fn get_chapter_names_and_urls_from_index(
|
||||||
|
index_html: &Html,
|
||||||
|
book_url: &Url,
|
||||||
|
) -> Result<Vec<(String, String)>, GenerationError> {
|
||||||
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
|
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
|
||||||
// I've commented in the gist of it, but we have no memory actually writing this function.
|
// I've commented in the gist of it, but we have no memory actually writing this function.
|
||||||
|
|
||||||
|
@ -89,7 +105,9 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url)
|
||||||
}
|
}
|
||||||
// Exit it if unable to find the needed json data. That probably means royal road has changed their code.
|
// Exit it if unable to find the needed json data. That probably means royal road has changed their code.
|
||||||
if raw_json_data.is_empty() {
|
if raw_json_data.is_empty() {
|
||||||
return Err(GenerationError::BookChapterNameAndUrlFetchError { url: book_url.clone()});
|
return Err(GenerationError::BookChapterNameAndUrlFetchError {
|
||||||
|
url: book_url.clone(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// I have absolutely no idea what this regex does; but it's probably important.
|
// I have absolutely no idea what this regex does; but it's probably important.
|
||||||
|
@ -122,7 +140,10 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html, book_url: &Url)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Isolate chapter content from the rest of the shit on the page.
|
/// Isolate chapter content from the rest of the shit on the page.
|
||||||
pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Result<Html, GenerationError> {
|
pub fn isolate_chapter_content(
|
||||||
|
raw_chapter_html: &Html,
|
||||||
|
chapter_url: &Url,
|
||||||
|
) -> Result<Html, GenerationError> {
|
||||||
let page_html = Html::parse_document(&raw_chapter_html.html());
|
let page_html = Html::parse_document(&raw_chapter_html.html());
|
||||||
|
|
||||||
let selector = Selector::parse("div").unwrap();
|
let selector = Selector::parse("div").unwrap();
|
||||||
|
@ -133,10 +154,12 @@ pub fn isolate_chapter_content(raw_chapter_html: &Html, chapter_url: &Url) -> Re
|
||||||
if x == "chapter-inner chapter-content" {
|
if x == "chapter-inner chapter-content" {
|
||||||
return Ok(string_to_html_fragment(&element.inner_html()));
|
return Ok(string_to_html_fragment(&element.inner_html()));
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(GenerationError::ChapterContentIsolationError{url: chapter_url.clone()})
|
Err(GenerationError::ChapterContentIsolationError {
|
||||||
|
url: chapter_url.clone(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove all img tags from the html fragment.
|
/// Remove all img tags from the html fragment.
|
||||||
|
@ -169,7 +192,9 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>
|
||||||
let url = element.attr("src");
|
let url = element.attr("src");
|
||||||
let image_tag = element.html();
|
let image_tag = element.html();
|
||||||
|
|
||||||
if url.is_none() { continue; }
|
if url.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let url = match Url::parse(url.unwrap()) {
|
let url = match Url::parse(url.unwrap()) {
|
||||||
Ok(url) => url,
|
Ok(url) => url,
|
||||||
Err(warning) => {
|
Err(warning) => {
|
||||||
|
@ -199,16 +224,19 @@ pub fn replace_img_src(img_tag: String, new_src: String) -> String {
|
||||||
let selector = Selector::parse("img").unwrap();
|
let selector = Selector::parse("img").unwrap();
|
||||||
let element = img_tag.select(&selector).next().unwrap();
|
let element = img_tag.select(&selector).next().unwrap();
|
||||||
|
|
||||||
|
|
||||||
if element.attr("src").is_some() {
|
if element.attr("src").is_some() {
|
||||||
let image_tag = element.html();
|
let image_tag = element.html();
|
||||||
|
|
||||||
let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
|
let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
|
||||||
let src_attr = src_match_regex.captures(&image_tag).unwrap().get(0).map(|m| m.as_str()).unwrap();
|
let src_attr = src_match_regex
|
||||||
|
.captures(&image_tag)
|
||||||
|
.unwrap()
|
||||||
|
.get(0)
|
||||||
|
.map(|m| m.as_str())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
|
return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
return element.html();
|
return element.html();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -234,11 +262,16 @@ pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result<String, Gen
|
||||||
.spawn()
|
.spawn()
|
||||||
{
|
{
|
||||||
Ok(child) => child,
|
Ok(child) => child,
|
||||||
Err(error) => return Err(GenerationError::Html2XhtmlStartError{error}),
|
Err(error) => return Err(GenerationError::Html2XhtmlStartError { error }),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
|
// Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
|
||||||
html2xhtml.stdin.as_mut().unwrap().write_all(html.as_bytes()).unwrap();
|
html2xhtml
|
||||||
|
.stdin
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.write_all(html.as_bytes())
|
||||||
|
.unwrap();
|
||||||
let html2xhtml_output = html2xhtml.wait_with_output().unwrap();
|
let html2xhtml_output = html2xhtml.wait_with_output().unwrap();
|
||||||
|
|
||||||
// Generate a lossy string from the stdout.
|
// Generate a lossy string from the stdout.
|
||||||
|
|
27
src/http.rs
27
src/http.rs
|
@ -21,15 +21,15 @@ impl HttpResponse {
|
||||||
pub fn get_text(self) -> Result<String, GenerationError> {
|
pub fn get_text(self) -> Result<String, GenerationError> {
|
||||||
match self.response.text() {
|
match self.response.text() {
|
||||||
Ok(response_text) => Ok(response_text),
|
Ok(response_text) => Ok(response_text),
|
||||||
Err(error) => Err(GenerationError::ResponseConvertToTextError {error}),
|
Err(error) => Err(GenerationError::ResponseConvertToTextError { error }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
|
/// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
|
||||||
pub fn get_bytes(self) -> Result<bytes::Bytes, GenerationError>{
|
pub fn get_bytes(self) -> Result<bytes::Bytes, GenerationError> {
|
||||||
match self.response.bytes() {
|
match self.response.bytes() {
|
||||||
Ok(response_bytes) => Ok(response_bytes),
|
Ok(response_bytes) => Ok(response_bytes),
|
||||||
Err(error) => Err(GenerationError::ResponseConvertToBytesError {error}),
|
Err(error) => Err(GenerationError::ResponseConvertToBytesError { error }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,10 +39,10 @@ impl HttpResponse {
|
||||||
pub fn get_content_type_and_file_extension(&self) -> (String, String) {
|
pub fn get_content_type_and_file_extension(&self) -> (String, String) {
|
||||||
// A hashmap to convert mime-types to file extensions.
|
// A hashmap to convert mime-types to file extensions.
|
||||||
let mime_to_file_extension: HashMap<&str, &str> = HashMap::from([
|
let mime_to_file_extension: HashMap<&str, &str> = HashMap::from([
|
||||||
("image/png", "png"),
|
("image/png", "png"),
|
||||||
("image/webp", "webp"),
|
("image/webp", "webp"),
|
||||||
("image/jpeg", "jpeg"),
|
("image/jpeg", "jpeg"),
|
||||||
("image/jpg", "jpg"),
|
("image/jpg", "jpg"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let content_type = match self.get_headers()["content-type"].to_str() {
|
let content_type = match self.get_headers()["content-type"].to_str() {
|
||||||
|
@ -56,13 +56,15 @@ impl HttpResponse {
|
||||||
WARNINGS.lock().unwrap().add_warning(warning);
|
WARNINGS.lock().unwrap().add_warning(warning);
|
||||||
|
|
||||||
return (String::with_capacity(0), String::with_capacity(0));
|
return (String::with_capacity(0), String::with_capacity(0));
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if mime_to_file_extension.contains_key(content_type) {
|
if mime_to_file_extension.contains_key(content_type) {
|
||||||
return (content_type.to_string(), mime_to_file_extension[content_type].to_string());
|
return (
|
||||||
}
|
content_type.to_string(),
|
||||||
else {
|
mime_to_file_extension[content_type].to_string(),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
return (content_type.to_string(), String::with_capacity(0));
|
return (content_type.to_string(), String::with_capacity(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,7 +76,7 @@ pub fn get_response(url: Url) -> Result<HttpResponse, GenerationError> {
|
||||||
|
|
||||||
match response_result {
|
match response_result {
|
||||||
Ok(response) => Ok(HttpResponse { url, response }),
|
Ok(response) => Ok(HttpResponse { url, response }),
|
||||||
Err(error) => return Err(GenerationError::ResponseGetError {error, url}),
|
Err(error) => return Err(GenerationError::ResponseGetError { error, url }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,6 +84,9 @@ pub fn get_response(url: Url) -> Result<HttpResponse, GenerationError> {
|
||||||
pub fn string_to_url(url: &str) -> Result<Url, GenerationError> {
|
pub fn string_to_url(url: &str) -> Result<Url, GenerationError> {
|
||||||
match Url::parse(url) {
|
match Url::parse(url) {
|
||||||
Ok(url) => Ok(url),
|
Ok(url) => Ok(url),
|
||||||
Err(error) => Err(GenerationError::UrlParseError {error, string_url: url.to_string()}),
|
Err(error) => Err(GenerationError::UrlParseError {
|
||||||
|
error,
|
||||||
|
string_url: url.to_string(),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
233
src/library.rs
233
src/library.rs
|
@ -1,4 +1,11 @@
|
||||||
use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit, sync::{Mutex, MutexGuard}};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
fs::OpenOptions,
|
||||||
|
io::Write,
|
||||||
|
path::PathBuf,
|
||||||
|
process::exit,
|
||||||
|
sync::{Mutex, MutexGuard},
|
||||||
|
};
|
||||||
|
|
||||||
use bytes::Buf;
|
use bytes::Buf;
|
||||||
use chrono::prelude::Local;
|
use chrono::prelude::Local;
|
||||||
|
@ -6,8 +13,8 @@ use clap::Args;
|
||||||
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
|
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
|
||||||
use file_system_crap::convert_path_to_os_specific;
|
use file_system_crap::convert_path_to_os_specific;
|
||||||
use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
|
use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use misc::Oses;
|
use misc::Oses;
|
||||||
use reqwest::header::ToStrError;
|
use reqwest::header::ToStrError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
@ -24,7 +31,7 @@ mod misc;
|
||||||
#[derive(Args, Debug)]
|
#[derive(Args, Debug)]
|
||||||
pub struct AudiobookArgs {
|
pub struct AudiobookArgs {
|
||||||
/// Disable the generation of chapter titles in the audio file. Useful to avoid chapter titles appearing twice.
|
/// Disable the generation of chapter titles in the audio file. Useful to avoid chapter titles appearing twice.
|
||||||
#[arg(short='c', long)]
|
#[arg(short = 'c', long)]
|
||||||
pub no_chapter_titles: bool,
|
pub no_chapter_titles: bool,
|
||||||
|
|
||||||
/// Split the novel into multiple audio files by chapter.
|
/// Split the novel into multiple audio files by chapter.
|
||||||
|
@ -43,15 +50,13 @@ pub struct EpubArgs {
|
||||||
|
|
||||||
/// struct that corresponds to arguments for Html generation.
|
/// struct that corresponds to arguments for Html generation.
|
||||||
#[derive(Args, Debug)]
|
#[derive(Args, Debug)]
|
||||||
pub struct HtmlArgs {
|
pub struct HtmlArgs {}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/// struct that corresponds to arguments for Markdown generation.
|
/// struct that corresponds to arguments for Markdown generation.
|
||||||
#[derive(Args, Debug)]
|
#[derive(Args, Debug)]
|
||||||
pub struct MarkdownArgs {
|
pub struct MarkdownArgs {
|
||||||
/// Disable the generation of chapter titles. Useful to avoid chapter titles appearing twice.
|
/// Disable the generation of chapter titles. Useful to avoid chapter titles appearing twice.
|
||||||
#[arg(short='c', long)]
|
#[arg(short = 'c', long)]
|
||||||
pub no_chapter_titles: bool,
|
pub no_chapter_titles: bool,
|
||||||
|
|
||||||
/// Disables the inclusion of html image tags in the markdown.
|
/// Disables the inclusion of html image tags in the markdown.
|
||||||
|
@ -67,7 +72,11 @@ lazy_static! {
|
||||||
///
|
///
|
||||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||||
pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
pub fn generate_audiobook(
|
||||||
|
audiobook_args: AudiobookArgs,
|
||||||
|
book_url: Url,
|
||||||
|
output_directory: PathBuf,
|
||||||
|
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||||
return Err(GenerationError::GenerationUnsupportedError);
|
return Err(GenerationError::GenerationUnsupportedError);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,14 +84,20 @@ pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_d
|
||||||
///
|
///
|
||||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||||
pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
pub fn generate_epub(
|
||||||
|
epub_args: EpubArgs,
|
||||||
|
book_url: Url,
|
||||||
|
output_directory: PathBuf,
|
||||||
|
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||||
let book = book::Book::new(book_url)?;
|
let book = book::Book::new(book_url)?;
|
||||||
|
|
||||||
// Initialize the epub builder.
|
// Initialize the epub builder.
|
||||||
let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||||
|
|
||||||
// Add author and title metadata.
|
// Add author and title metadata.
|
||||||
epub_builder.stylesheet(constants::EPUB_CSS.as_bytes()).unwrap(); // Use the epub_css in the constants.rs file.
|
epub_builder
|
||||||
|
.stylesheet(constants::EPUB_CSS.as_bytes())
|
||||||
|
.unwrap(); // Use the epub_css in the constants.rs file.
|
||||||
epub_builder
|
epub_builder
|
||||||
.metadata("author", &book.author)
|
.metadata("author", &book.author)
|
||||||
.expect("Unable to add author metadata");
|
.expect("Unable to add author metadata");
|
||||||
|
@ -93,10 +108,13 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
||||||
// Download the cover image & add it to the epub.
|
// Download the cover image & add it to the epub.
|
||||||
let cover_image = http::get_response(book.cover_image_url)?;
|
let cover_image = http::get_response(book.cover_image_url)?;
|
||||||
let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
|
let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
|
||||||
epub_builder.add_cover_image(
|
epub_builder
|
||||||
format!("cover.{cover_file_extension}"),
|
.add_cover_image(
|
||||||
cover_image.get_bytes()?.to_vec().as_slice(),
|
format!("cover.{cover_file_extension}"),
|
||||||
cover_mime_type).expect("Error! Unable to add cover image.");
|
cover_image.get_bytes()?.to_vec().as_slice(),
|
||||||
|
cover_mime_type,
|
||||||
|
)
|
||||||
|
.expect("Error! Unable to add cover image.");
|
||||||
|
|
||||||
// Generate the cover xhtml.
|
// Generate the cover xhtml.
|
||||||
let cover_xhtml = format!(
|
let cover_xhtml = format!(
|
||||||
|
@ -111,14 +129,20 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
||||||
book.author,
|
book.author,
|
||||||
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
|
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
|
||||||
);
|
);
|
||||||
let cover_xhtml = format!("{0}{cover_xhtml}{1}", constants::EPUB_XML_HEAD, constants::EPUB_XML_TAIL);
|
let cover_xhtml = format!(
|
||||||
|
"{0}{cover_xhtml}{1}",
|
||||||
|
constants::EPUB_XML_HEAD,
|
||||||
|
constants::EPUB_XML_TAIL
|
||||||
|
);
|
||||||
|
|
||||||
// Add the cover xhtml to the epub.
|
// Add the cover xhtml to the epub.
|
||||||
epub_builder.add_content(
|
epub_builder
|
||||||
EpubContent::new("title.xhtml", cover_xhtml.as_bytes())
|
.add_content(
|
||||||
.title("Cover")
|
EpubContent::new("title.xhtml", cover_xhtml.as_bytes())
|
||||||
.reftype(ReferenceType::Cover),
|
.title("Cover")
|
||||||
).expect("Error! Unable to add cover");
|
.reftype(ReferenceType::Cover),
|
||||||
|
)
|
||||||
|
.expect("Error! Unable to add cover");
|
||||||
|
|
||||||
// Add a table of contents after the cover page.
|
// Add a table of contents after the cover page.
|
||||||
epub_builder.inline_toc();
|
epub_builder.inline_toc();
|
||||||
|
@ -133,27 +157,37 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
||||||
|
|
||||||
println!("\nDownloading and processing images:");
|
println!("\nDownloading and processing images:");
|
||||||
// Spawn a progress bar showing how many images have been downloaded & processed.
|
// Spawn a progress bar showing how many images have been downloaded & processed.
|
||||||
let progress_bar = ProgressBar::new(book.image_urls_and_tags.keys().len().try_into().unwrap());
|
let progress_bar =
|
||||||
|
ProgressBar::new(book.image_urls_and_tags.keys().len().try_into().unwrap());
|
||||||
progress_bar.set_style(
|
progress_bar.set_style(
|
||||||
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ")
|
ProgressStyle::with_template(
|
||||||
.unwrap()
|
"[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ",
|
||||||
.progress_chars("#>-"),
|
)
|
||||||
|
.unwrap()
|
||||||
|
.progress_chars("#>-"),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut i: usize = 0;
|
let mut i: usize = 0;
|
||||||
for image_url in book.image_urls_and_tags.keys() {
|
for image_url in book.image_urls_and_tags.keys() {
|
||||||
let image = http::get_response(image_url.clone())?;
|
let image = http::get_response(image_url.clone())?;
|
||||||
let (image_mime_type, image_file_extension) = image.get_content_type_and_file_extension();
|
let (image_mime_type, image_file_extension) =
|
||||||
epub_builder.add_resource(
|
image.get_content_type_and_file_extension();
|
||||||
format!("image_{i}.{image_file_extension}"),
|
epub_builder
|
||||||
image.get_bytes()?.to_vec().reader(),
|
.add_resource(
|
||||||
image_mime_type).expect("Error! Unable to add content image");
|
format!("image_{i}.{image_file_extension}"),
|
||||||
|
image.get_bytes()?.to_vec().reader(),
|
||||||
|
image_mime_type,
|
||||||
|
)
|
||||||
|
.expect("Error! Unable to add content image");
|
||||||
|
|
||||||
for image_tag in book.image_urls_and_tags[image_url].clone() {
|
for image_tag in book.image_urls_and_tags[image_url].clone() {
|
||||||
old_tags_new_tags.insert(image_tag.clone(), html::replace_img_src(image_tag, format!("image_{i}.{image_file_extension}")));
|
old_tags_new_tags.insert(
|
||||||
|
image_tag.clone(),
|
||||||
|
html::replace_img_src(image_tag, format!("image_{i}.{image_file_extension}")),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
i+=1;
|
i += 1;
|
||||||
progress_bar.inc(1);
|
progress_bar.inc(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,41 +196,66 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
||||||
|
|
||||||
// Convert the html to xhtml and add the xhtml to the epub for each chapter.
|
// Convert the html to xhtml and add the xhtml to the epub for each chapter.
|
||||||
for (i, chapter) in book.chapters.iter().enumerate() {
|
for (i, chapter) in book.chapters.iter().enumerate() {
|
||||||
|
|
||||||
let xhtml: String;
|
let xhtml: String;
|
||||||
if epub_args.no_images {
|
if epub_args.no_images {
|
||||||
xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_temp_dir)?
|
xhtml = html_to_xhtml(
|
||||||
}
|
string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)),
|
||||||
else {
|
&html2xhtml_temp_dir,
|
||||||
|
)?
|
||||||
|
} else {
|
||||||
let mut replaced_html = chapter.isolated_chapter_html.html();
|
let mut replaced_html = chapter.isolated_chapter_html.html();
|
||||||
for old_img_tag in old_tags_new_tags.keys() {
|
for old_img_tag in old_tags_new_tags.keys() {
|
||||||
replaced_html = replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]);
|
replaced_html =
|
||||||
|
replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]);
|
||||||
}
|
}
|
||||||
|
|
||||||
xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_temp_dir)?;
|
xhtml = html_to_xhtml(
|
||||||
|
string_to_html_fragment(&replaced_html),
|
||||||
|
&html2xhtml_temp_dir,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
epub_builder.add_content(EpubContent::new(format!("chapter_{}.xhtml", i+1), xhtml.as_bytes())
|
epub_builder
|
||||||
.title(chapter.chapter_name.clone())
|
.add_content(
|
||||||
.reftype(ReferenceType::Text)).expect("Error! Unable to add chapter");
|
EpubContent::new(format!("chapter_{}.xhtml", i + 1), xhtml.as_bytes())
|
||||||
|
.title(chapter.chapter_name.clone())
|
||||||
|
.reftype(ReferenceType::Text),
|
||||||
|
)
|
||||||
|
.expect("Error! Unable to add chapter");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate the finished epub data as a byte vector.
|
// Generate the finished epub data as a byte vector.
|
||||||
let mut finished_epub: Vec<u8> = vec![];
|
let mut finished_epub: Vec<u8> = vec![];
|
||||||
epub_builder.generate(&mut finished_epub).expect("Unable to generate epub data");
|
epub_builder
|
||||||
|
.generate(&mut finished_epub)
|
||||||
|
.expect("Unable to generate epub data");
|
||||||
|
|
||||||
// Create the epub file and write the finished epub data to it.
|
// Create the epub file and write the finished epub data to it.
|
||||||
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.epub", book.file_name_title)));
|
let output_path = convert_path_to_os_specific(
|
||||||
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) {
|
output_directory.join(format!("{0}.epub", book.file_name_title)),
|
||||||
|
);
|
||||||
|
let mut output_file = match OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create_new(true)
|
||||||
|
.open(&output_path)
|
||||||
|
{
|
||||||
Ok(output_file) => output_file,
|
Ok(output_file) => output_file,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy());
|
eprintln!(
|
||||||
|
"Error! Unable to create: {0}\n{error}",
|
||||||
|
output_path.to_string_lossy()
|
||||||
|
);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
output_file.write_all(finished_epub.as_slice())
|
output_file.write_all(finished_epub.as_slice()).expect(
|
||||||
.expect(format!("Unable to write finished epub data to {0}", output_path.to_string_lossy()).as_str());
|
format!(
|
||||||
|
"Unable to write finished epub data to {0}",
|
||||||
|
output_path.to_string_lossy()
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
);
|
||||||
|
|
||||||
// Delete the html2xhtml temp directory. It's good to clean up after yourself.
|
// Delete the html2xhtml temp directory. It's good to clean up after yourself.
|
||||||
file_system_crap::delete_temp_dir(html2xhtml_temp_dir);
|
file_system_crap::delete_temp_dir(html2xhtml_temp_dir);
|
||||||
|
@ -208,7 +267,11 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
||||||
///
|
///
|
||||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||||
pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
pub fn generate_html(
|
||||||
|
html_args: HtmlArgs,
|
||||||
|
book_url: Url,
|
||||||
|
output_directory: PathBuf,
|
||||||
|
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||||
return Err(GenerationError::GenerationUnsupportedError);
|
return Err(GenerationError::GenerationUnsupportedError);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,17 +279,29 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
|
||||||
///
|
///
|
||||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||||
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
pub fn generate_markdown(
|
||||||
|
markdown_args: MarkdownArgs,
|
||||||
|
book_url: Url,
|
||||||
|
output_directory: PathBuf,
|
||||||
|
) -> Result<MutexGuard<'static, GenerationWarnings>, GenerationError> {
|
||||||
let book = book::Book::new(book_url)?;
|
let book = book::Book::new(book_url)?;
|
||||||
|
|
||||||
let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title)));
|
let output_path =
|
||||||
|
convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title)));
|
||||||
|
|
||||||
// Create the md file. This will crash if it already exists or can not be created.
|
// Create the md file. This will crash if it already exists or can not be created.
|
||||||
let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) {
|
let mut output_file = match OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create_new(true)
|
||||||
|
.open(&output_path)
|
||||||
|
{
|
||||||
Ok(output_file) => output_file,
|
Ok(output_file) => output_file,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
return Err(GenerationError::FileCreationError{error, file_path: output_path});
|
return Err(GenerationError::FileCreationError {
|
||||||
}
|
error,
|
||||||
|
file_path: output_path,
|
||||||
|
});
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Append the book title & author.
|
// Append the book title & author.
|
||||||
|
@ -249,10 +324,15 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
|
||||||
|
|
||||||
if markdown_args.no_image_tags {
|
if markdown_args.no_image_tags {
|
||||||
// Remove image tags or not depending on args.
|
// Remove image tags or not depending on args.
|
||||||
buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html)));
|
buf = format!(
|
||||||
|
"\n\n{}\n\n",
|
||||||
|
html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html))
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
buf = format!("\n\n{}\n\n", html2md::parse_html(&chapter.isolated_chapter_html.html()));
|
buf = format!(
|
||||||
|
"\n\n{}\n\n",
|
||||||
|
html2md::parse_html(&chapter.isolated_chapter_html.html())
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
output_file.write_all(buf.as_bytes()).unwrap();
|
output_file.write_all(buf.as_bytes()).unwrap();
|
||||||
|
@ -266,70 +346,77 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire
|
||||||
pub enum GenerationError {
|
pub enum GenerationError {
|
||||||
/// Represents errors during file creation.
|
/// Represents errors during file creation.
|
||||||
#[error("Unable to create file: {file_path}\n{error}")]
|
#[error("Unable to create file: {file_path}\n{error}")]
|
||||||
FileCreationError{error: std::io::Error, file_path: PathBuf},
|
FileCreationError {
|
||||||
|
error: std::io::Error,
|
||||||
|
file_path: PathBuf,
|
||||||
|
},
|
||||||
|
|
||||||
/// Represents errors when getting a Response from a Url.
|
/// Represents errors when getting a Response from a Url.
|
||||||
#[error("Unable to get response for: {url}\n{error}")]
|
#[error("Unable to get response for: {url}\n{error}")]
|
||||||
ResponseGetError{error: reqwest::Error, url: Url},
|
ResponseGetError { error: reqwest::Error, url: Url },
|
||||||
|
|
||||||
/// Represents errors when converting a Response to a String.
|
/// Represents errors when converting a Response to a String.
|
||||||
#[error("Unable to convert response to text: {error}")]
|
#[error("Unable to convert response to text: {error}")]
|
||||||
ResponseConvertToTextError{error: reqwest::Error},
|
ResponseConvertToTextError { error: reqwest::Error },
|
||||||
|
|
||||||
/// Represents errors when converting a Response to Bytes.
|
/// Represents errors when converting a Response to Bytes.
|
||||||
#[error("Unable to convert response to bytes: {error}")]
|
#[error("Unable to convert response to bytes: {error}")]
|
||||||
ResponseConvertToBytesError{error: reqwest::Error},
|
ResponseConvertToBytesError { error: reqwest::Error },
|
||||||
|
|
||||||
/// Represents errors when trying to parse a String to a Url.
|
/// Represents errors when trying to parse a String to a Url.
|
||||||
#[error("Unable to parse a valid Url from: {string_url}\n{error}")]
|
#[error("Unable to parse a valid Url from: {string_url}\n{error}")]
|
||||||
UrlParseError{error: url::ParseError, string_url: String},
|
UrlParseError {
|
||||||
|
error: url::ParseError,
|
||||||
|
string_url: String,
|
||||||
|
},
|
||||||
|
|
||||||
/// Represents io errors when trying to create a temporary directory.
|
/// Represents io errors when trying to create a temporary directory.
|
||||||
#[error("Unable to create temporary directory: {error}")]
|
#[error("Unable to create temporary directory: {error}")]
|
||||||
TempDirCreationError{error: std::io::Error},
|
TempDirCreationError { error: std::io::Error },
|
||||||
|
|
||||||
/// Represents an error when trying to extract the html2xhtml binaries into the temporary directory.
|
/// Represents an error when trying to extract the html2xhtml binaries into the temporary directory.
|
||||||
#[error("Unable to extract html2xhtml into the temporary directory: {error}")]
|
#[error("Unable to extract html2xhtml into the temporary directory: {error}")]
|
||||||
Html2XhtmlExtractionError{error: zip_extract::ZipExtractError},
|
Html2XhtmlExtractionError { error: zip_extract::ZipExtractError },
|
||||||
|
|
||||||
/// Represents an error when trying to start html2xhtml.
|
/// Represents an error when trying to start html2xhtml.
|
||||||
#[error("Unable to start html2xhtml: {error}")]
|
#[error("Unable to start html2xhtml: {error}")]
|
||||||
Html2XhtmlStartError{error: std::io::Error},
|
Html2XhtmlStartError { error: std::io::Error },
|
||||||
|
|
||||||
/// Represents an error when trying to find the book title.
|
/// Represents an error when trying to find the book title.
|
||||||
#[error("Unable to fetch the book title for: {url}")]
|
#[error("Unable to fetch the book title for: {url}")]
|
||||||
BookTitleFetchError{url: Url},
|
BookTitleFetchError { url: Url },
|
||||||
|
|
||||||
/// Represents an error when trying to find the book author.
|
/// Represents an error when trying to find the book author.
|
||||||
#[error("Unable to fetch the book author for: {url}")]
|
#[error("Unable to fetch the book author for: {url}")]
|
||||||
BookAuthorFetchError{url: Url},
|
BookAuthorFetchError { url: Url },
|
||||||
|
|
||||||
/// Represents an error when trying to find the book cover image url.
|
/// Represents an error when trying to find the book cover image url.
|
||||||
#[error("Unable to fetch the book cover image url: {url}")]
|
#[error("Unable to fetch the book cover image url: {url}")]
|
||||||
BookCoverImageUrlFetchError{url: Url},
|
BookCoverImageUrlFetchError { url: Url },
|
||||||
|
|
||||||
/// Represents an error when trying to find the chapter names and urls.
|
/// Represents an error when trying to find the chapter names and urls.
|
||||||
///
|
///
|
||||||
/// This typically occurs due to RoyalRoad changing their json scheme.
|
/// This typically occurs due to RoyalRoad changing their json scheme.
|
||||||
#[error("Unable to fetch the chapter names and urls for: {url}")]
|
#[error("Unable to fetch the chapter names and urls for: {url}")]
|
||||||
BookChapterNameAndUrlFetchError{url: Url},
|
BookChapterNameAndUrlFetchError { url: Url },
|
||||||
|
|
||||||
/// Represents an error when trying to isolate the chapter content.
|
/// Represents an error when trying to isolate the chapter content.
|
||||||
#[error("Unable to isolate chapter content for: {url}")]
|
#[error("Unable to isolate chapter content for: {url}")]
|
||||||
ChapterContentIsolationError{url: Url},
|
ChapterContentIsolationError { url: Url },
|
||||||
|
|
||||||
/// Represents an error for when the target os is unsupported.
|
/// Represents an error for when the target os is unsupported.
|
||||||
#[error("{os} is unsupported")]
|
#[error("{os} is unsupported")]
|
||||||
OsUnsupportedError{os: Oses},
|
OsUnsupportedError { os: Oses },
|
||||||
|
|
||||||
/// Represents an error that shows the generation method is unsupported.
|
/// Represents an error that shows the generation method is unsupported.
|
||||||
#[error("This generation mode is currently unsupported")]
|
#[error("This generation mode is currently unsupported")]
|
||||||
GenerationUnsupportedError,
|
GenerationUnsupportedError,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// A struct that contains a vector of warnings.
|
/// A struct that contains a vector of warnings.
|
||||||
pub struct GenerationWarnings{warnings: Vec<Warning>}
|
pub struct GenerationWarnings {
|
||||||
|
warnings: Vec<Warning>,
|
||||||
|
}
|
||||||
|
|
||||||
impl GenerationWarnings {
|
impl GenerationWarnings {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
|
@ -378,5 +465,5 @@ pub enum Warning {
|
||||||
warning_msg: String,
|
warning_msg: String,
|
||||||
raw_image_tag: String,
|
raw_image_tag: String,
|
||||||
error: url::ParseError,
|
error: url::ParseError,
|
||||||
}
|
},
|
||||||
}
|
}
|
12
src/misc.rs
12
src/misc.rs
|
@ -6,15 +6,18 @@ pub trait HashMapExt<K> {
|
||||||
fn join(self, new_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>>;
|
fn join(self, new_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K>
|
||||||
impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for HashMap<K, Vec<String>> {
|
for HashMap<K, Vec<String>>
|
||||||
|
{
|
||||||
fn join(mut self, other_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>> {
|
fn join(mut self, other_hashmap: HashMap<K, Vec<String>>) -> HashMap<K, Vec<String>> {
|
||||||
// I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it.
|
// I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it.
|
||||||
|
|
||||||
for key in other_hashmap.keys() {
|
for key in other_hashmap.keys() {
|
||||||
if self.contains_key(key) {
|
if self.contains_key(key) {
|
||||||
for string in &other_hashmap[key] {
|
for string in &other_hashmap[key] {
|
||||||
if self[key].contains(string) { continue; } // Avoid repeating strings in the vectors.
|
if self[key].contains(string) {
|
||||||
|
continue;
|
||||||
|
} // Avoid repeating strings in the vectors.
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut self_vector = self[key].clone();
|
let mut self_vector = self[key].clone();
|
||||||
|
@ -23,8 +26,7 @@ impl<K: std::cmp::Eq + std::hash::Hash + std::clone::Clone> HashMapExt<K> for Ha
|
||||||
self_vector.append(&mut other_vector);
|
self_vector.append(&mut other_vector);
|
||||||
|
|
||||||
self.insert(key.clone(), self_vector);
|
self.insert(key.clone(), self_vector);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
self.insert(key.clone(), other_hashmap[key].clone());
|
self.insert(key.clone(), other_hashmap[key].clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue