mirror of
https://github.com/Raine-gay/royal_road_archiver.git
synced 2025-01-05 10:28:19 -06:00
Epub generation looks finished.
Thinking of making it use CSS files from RoyalRoad.
This commit is contained in:
parent
d432b60541
commit
d448b3ba42
61
src/html.rs
61
src/html.rs
|
@ -1,7 +1,8 @@
|
|||
use std::{collections::HashMap, process::exit};
|
||||
use std::{collections::HashMap, io::Write, process::{exit, Command, Stdio}};
|
||||
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use tempdir::TempDir;
|
||||
use url::Url;
|
||||
|
||||
use crate::misc::HashMapExt;
|
||||
|
@ -188,4 +189,62 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>
|
|||
}
|
||||
|
||||
return chapter_image_urls;
|
||||
}
|
||||
|
||||
/// Replace the image tag with new one that contains the new src attribute.
|
||||
pub fn replace_img_src(img_tag: String, new_src: String) -> String {
|
||||
let img_tag = string_to_html_fragment(&img_tag);
|
||||
|
||||
let selector = Selector::parse("img").unwrap();
|
||||
let element = img_tag.select(&selector).next().unwrap();
|
||||
|
||||
|
||||
if element.attr("src").is_some() {
|
||||
let image_tag = element.html();
|
||||
|
||||
let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
|
||||
let src_attr = src_match_regex.captures(&image_tag).unwrap().get(0).map(|m| m.as_str()).unwrap();
|
||||
|
||||
return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
|
||||
}
|
||||
else {
|
||||
return element.html();
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a given html dom into xhtml.
|
||||
pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String {
|
||||
#[cfg(target_os = "windows")]
|
||||
const HTML2XHTML_ENTRY: &str = "html2xhtml.exe";
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
const HTML2XHTML_ENTRY: &str = "html2xhtml";
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
const HTML2XHTML_ENTRY: &str = "html2xhtml";
|
||||
|
||||
// Remove nbsp, They can cause certain e-readers to crash.
|
||||
let html = html.html().replace(" ", " ");
|
||||
|
||||
// Start html2xhtml.
|
||||
let mut html2xhtml = match Command::new(html2xhtml_dir.path().join(HTML2XHTML_ENTRY))
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
{
|
||||
Ok(child) => child,
|
||||
Err(error) => {
|
||||
eprintln!("Error! Unable to start html2xhtml: {error}");
|
||||
exit(1);
|
||||
},
|
||||
};
|
||||
|
||||
// Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
|
||||
html2xhtml.stdin.as_mut().unwrap().write_all(html.as_bytes()).unwrap();
|
||||
let html2xhtml_output = html2xhtml.wait_with_output().unwrap();
|
||||
|
||||
// Generate a lossy string from the stdout.
|
||||
let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string();
|
||||
|
||||
return xhtml;
|
||||
}
|
30
src/http.rs
30
src/http.rs
|
@ -1,4 +1,4 @@
|
|||
use std::process::exit;
|
||||
use std::{collections::HashMap, process::exit};
|
||||
|
||||
use reqwest::{blocking::Response, header::HeaderMap};
|
||||
use url::Url;
|
||||
|
@ -36,6 +36,34 @@ impl HttpResponse {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to get the content(mime)-type and file extension from the http-header.
|
||||
///
|
||||
/// If the content-type header value can not be found it will warn the use and return empty strings.
|
||||
pub fn get_content_type_and_file_extension(&self) -> (String, String) {
|
||||
// A hashmap to convert mime-types to file extensions.
|
||||
let mime_to_file_extension: HashMap<&str, &str> = HashMap::from([
|
||||
("image/png", "png"),
|
||||
("image/webp", "webp"),
|
||||
("image/jpeg", "jpeg"),
|
||||
("image/jpg", "jpg"),
|
||||
]);
|
||||
|
||||
let content_type = match self.get_headers()["content-type"].to_str() {
|
||||
Ok(content_type) => content_type,
|
||||
Err(warning) => {
|
||||
eprintln!("Warning! Unable to get content type from the http-header: {warning}");
|
||||
return (String::with_capacity(0), String::with_capacity(0));
|
||||
}
|
||||
};
|
||||
|
||||
if mime_to_file_extension.contains_key(content_type) {
|
||||
return (content_type.to_string(), mime_to_file_extension[content_type].to_string());
|
||||
}
|
||||
else {
|
||||
return (content_type.to_string(), String::with_capacity(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an http response for a given url. Exits the program if it fails.
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
use std::{fs::OpenOptions, io::Write, path::PathBuf, process::exit};
|
||||
use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit};
|
||||
|
||||
use bytes::Buf;
|
||||
use chrono::prelude::Local;
|
||||
use clap::Args;
|
||||
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
|
||||
use file_system_crap::convert_path_to_os_specific;
|
||||
use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use url::Url;
|
||||
|
||||
mod book;
|
||||
|
@ -80,18 +83,23 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
.expect("Unable to add title metadata");
|
||||
|
||||
// Download the cover image & add it to the epub.
|
||||
let cover_image = http::get_response(book.cover_image_url).get_bytes().to_vec();
|
||||
epub_builder.add_cover_image("cover.jpeg", cover_image.as_slice(), "image/jpeg").expect("Unable to add cover image.");
|
||||
let cover_image = http::get_response(book.cover_image_url);
|
||||
let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
|
||||
epub_builder.add_cover_image(
|
||||
format!("cover.{cover_file_extension}"),
|
||||
cover_image.get_bytes().to_vec().as_slice(),
|
||||
cover_mime_type).expect("Error! Unable to add cover image.");
|
||||
|
||||
// Generate the cover xhtml.
|
||||
let cover_xhtml = format!(
|
||||
r#"<head></head><body><div style="text-align: center;">
|
||||
<h1><a href="{0}">{1}</a></h1>
|
||||
<img src="cover.jpeg"/>
|
||||
<h2>by: {2}</h2>
|
||||
<h3>Archived on: {3}</h3></div></body>"#,
|
||||
<img src="cover.{2}"/>
|
||||
<h2>by: {3}</h2>
|
||||
<h3>Archived on: {4}</h3></div></body>"#,
|
||||
book.book_url,
|
||||
book.title,
|
||||
cover_file_extension,
|
||||
book.author,
|
||||
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
|
||||
);
|
||||
|
@ -102,7 +110,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
EpubContent::new("title.xhtml", cover_xhtml.as_bytes())
|
||||
.title("Cover")
|
||||
.reftype(ReferenceType::Cover),
|
||||
).expect("Unable to add cover");
|
||||
).expect("Error! Unable to add cover");
|
||||
|
||||
// Add a table of contents after the cover page.
|
||||
epub_builder.inline_toc();
|
||||
|
@ -110,8 +118,60 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
|
|||
// Setup html2xhtml on the operating system.
|
||||
let html2xhtml_dir = file_system_crap::setup_html2xhtml();
|
||||
|
||||
// TODO! Generate the epub body, deal with images etc etc. You know pickup from last night etc etc.
|
||||
// Finish setup_html2xhtml() first though dummy.
|
||||
let mut old_tags_new_tags: HashMap<String, String> = HashMap::new();
|
||||
|
||||
if !epub_args.no_images {
|
||||
// Download the images and add em to the epub.
|
||||
|
||||
println!("\nDownloading and processing images:");
|
||||
// Spawn a progress bar showing how many images have been downloaded & processed.
|
||||
let progress_bar = ProgressBar::new(book.image_urls_and_tags.keys().len().try_into().unwrap());
|
||||
progress_bar.set_style(
|
||||
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ")
|
||||
.unwrap()
|
||||
.progress_chars("#>-"),
|
||||
);
|
||||
|
||||
let mut i: usize = 0;
|
||||
for image_url in book.image_urls_and_tags.keys() {
|
||||
let image = http::get_response(image_url.clone());
|
||||
let (image_mime_type, image_file_extension) = image.get_content_type_and_file_extension();
|
||||
epub_builder.add_resource(
|
||||
format!("image_{i}.{image_file_extension}"),
|
||||
image.get_bytes().to_vec().reader(),
|
||||
image_mime_type).expect("Error! Unable to add content image");
|
||||
|
||||
for image_tag in book.image_urls_and_tags[image_url].clone() {
|
||||
old_tags_new_tags.insert(image_tag.clone(), html::replace_img_src(image_tag, format!("image_{i}.{image_file_extension}")));
|
||||
}
|
||||
|
||||
i+=1;
|
||||
progress_bar.inc(1);
|
||||
}
|
||||
|
||||
progress_bar.finish();
|
||||
}
|
||||
|
||||
// Convert the html to xhtml and add the xhtml to the epub for each chapter.
|
||||
for (i, chapter) in book.chapters.iter().enumerate() {
|
||||
|
||||
let xhtml: String;
|
||||
if epub_args.no_images {
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&remove_image_tags(&chapter.isolated_chapter_html)), &html2xhtml_dir)
|
||||
}
|
||||
else {
|
||||
let mut replaced_html = chapter.isolated_chapter_html.html();
|
||||
for old_img_tag in old_tags_new_tags.keys() {
|
||||
replaced_html = replaced_html.replace(&old_img_tag.clone(), &old_tags_new_tags[old_img_tag]);
|
||||
}
|
||||
|
||||
xhtml = html_to_xhtml(string_to_html_fragment(&replaced_html), &html2xhtml_dir);
|
||||
}
|
||||
|
||||
epub_builder.add_content(EpubContent::new(format!("chapter_{}.xhtml", i+1), xhtml.as_bytes())
|
||||
.title(chapter.chapter_name.clone())
|
||||
.reftype(ReferenceType::Text)).expect("Error! Unable to add chapter");
|
||||
}
|
||||
|
||||
// Generate the finished epub data as a byte vector.
|
||||
let mut finished_epub: Vec<u8> = vec![];
|
||||
|
|
Loading…
Reference in a new issue