diff --git a/src/html.rs b/src/html.rs
index 2d32be6..cb0fc08 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -1,7 +1,8 @@
-use std::{collections::HashMap, process::exit};
+use std::{collections::HashMap, io::Write, process::{exit, Command, Stdio}};
use regex::Regex;
use scraper::{Html, Selector};
+use tempdir::TempDir;
use url::Url;
use crate::misc::HashMapExt;
@@ -188,4 +189,62 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap
}
return chapter_image_urls;
+}
+
+/// Replace the image tag with new one that contains the new src attribute.
+pub fn replace_img_src(img_tag: String, new_src: String) -> String {
+ let img_tag = string_to_html_fragment(&img_tag);
+
+ let selector = Selector::parse("img").unwrap();
+ let element = img_tag.select(&selector).next().unwrap();
+
+
+ if element.attr("src").is_some() {
+ let image_tag = element.html();
+
+ let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
+ let src_attr = src_match_regex.captures(&image_tag).unwrap().get(0).map(|m| m.as_str()).unwrap();
+
+ return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
+ }
+ else {
+ return element.html();
+ }
+}
+
+/// Convert a given html dom into xhtml.
+pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> String {
+ #[cfg(target_os = "windows")]
+ const HTML2XHTML_ENTRY: &str = "html2xhtml.exe";
+
+ #[cfg(target_os = "linux")]
+ const HTML2XHTML_ENTRY: &str = "html2xhtml";
+
+ #[cfg(target_os = "macos")]
+ const HTML2XHTML_ENTRY: &str = "html2xhtml";
+
+ // Remove nbsp, They can cause certain e-readers to crash.
+ let html = html.html().replace(" ", " ");
+
+ // Start html2xhtml.
+ let mut html2xhtml = match Command::new(html2xhtml_dir.path().join(HTML2XHTML_ENTRY))
+ .stdin(Stdio::piped())
+ .stdout(Stdio::piped())
+ .spawn()
+ {
+ Ok(child) => child,
+ Err(error) => {
+ eprintln!("Error! Unable to start html2xhtml: {error}");
+ exit(1);
+ },
+ };
+
+ // Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
+ html2xhtml.stdin.as_mut().unwrap().write_all(html.as_bytes()).unwrap();
+ let html2xhtml_output = html2xhtml.wait_with_output().unwrap();
+
+ // Generate a lossy string from the stdout.
+ let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string();
+
+ return xhtml;
}
\ No newline at end of file
diff --git a/src/http.rs b/src/http.rs
index e01aa87..317424d 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -1,4 +1,4 @@
-use std::process::exit;
+use std::{collections::HashMap, process::exit};
use reqwest::{blocking::Response, header::HeaderMap};
use url::Url;
@@ -36,6 +36,34 @@ impl HttpResponse {
}
}
}
+
+ /// Attempt to get the content(mime)-type and file extension from the http-header.
+ ///
+ /// If the content-type header value can not be found it will warn the use and return empty strings.
+ pub fn get_content_type_and_file_extension(&self) -> (String, String) {
+ // A hashmap to convert mime-types to file extensions.
+ let mime_to_file_extension: HashMap<&str, &str> = HashMap::from([
+ ("image/png", "png"),
+ ("image/webp", "webp"),
+ ("image/jpeg", "jpeg"),
+ ("image/jpg", "jpg"),
+ ]);
+
+ let content_type = match self.get_headers()["content-type"].to_str() {
+ Ok(content_type) => content_type,
+ Err(warning) => {
+ eprintln!("Warning! Unable to get content type from the http-header: {warning}");
+ return (String::with_capacity(0), String::with_capacity(0));
+ }
+ };
+
+ if mime_to_file_extension.contains_key(content_type) {
+ return (content_type.to_string(), mime_to_file_extension[content_type].to_string());
+ }
+ else {
+ return (content_type.to_string(), String::with_capacity(0));
+ }
+ }
}
/// Get an http response for a given url. Exits the program if it fails.
diff --git a/src/library.rs b/src/library.rs
index 7d3328e..eea17ea 100644
--- a/src/library.rs
+++ b/src/library.rs
@@ -1,9 +1,12 @@
-use std::{fs::OpenOptions, io::Write, path::PathBuf, process::exit};
+use std::{collections::HashMap, fs::OpenOptions, io::Write, path::PathBuf, process::exit};
+use bytes::Buf;
use chrono::prelude::Local;
use clap::Args;
use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary};
use file_system_crap::convert_path_to_os_specific;
+use html::{html_to_xhtml, remove_image_tags, string_to_html_fragment};
+use indicatif::{ProgressBar, ProgressStyle};
use url::Url;
mod book;
@@ -80,18 +83,23 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
.expect("Unable to add title metadata");
// Download the cover image & add it to the epub.
- let cover_image = http::get_response(book.cover_image_url).get_bytes().to_vec();
- epub_builder.add_cover_image("cover.jpeg", cover_image.as_slice(), "image/jpeg").expect("Unable to add cover image.");
+ let cover_image = http::get_response(book.cover_image_url);
+ let (cover_mime_type, cover_file_extension) = cover_image.get_content_type_and_file_extension();
+ epub_builder.add_cover_image(
+ format!("cover.{cover_file_extension}"),
+ cover_image.get_bytes().to_vec().as_slice(),
+ cover_mime_type).expect("Error! Unable to add cover image.");
// Generate the cover xhtml.
let cover_xhtml = format!(
r#"
-
-
by: {2}
-
Archived on: {3}
"#,
+
+ by: {3}
+ Archived on: {4}