From 0bfcfd496fb47d949d694cc71a962f5686b89d8d Mon Sep 17 00:00:00 2001 From: NA Date: Fri, 26 Jan 2024 03:17:50 +0000 Subject: [PATCH] Working on epub generation. Got to work on setup_html2xhtml() next. --- .vscode/settings.json | 5 ++ Cargo.lock | 140 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/binary.rs | 10 +-- src/book.rs | 38 +++++------ src/constants.rs | 110 +++++++++++++++++++++++++++++++ src/file_system_crap.rs | 50 ++++++++++++++ src/html.rs | 29 +++++++-- src/http.rs | 2 +- src/library.rs | 98 ++++++++++++++++++++++------ src/misc.rs | 34 ++++++++++ 11 files changed, 472 insertions(+), 45 deletions(-) create mode 100644 src/constants.rs create mode 100644 src/file_system_crap.rs create mode 100644 src/misc.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index d163322..805ce55 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,16 +2,21 @@ "cSpell.words": [ "archiver", "Audiobook", + "dogshit", "epub", + "stylesheet", "Webnovel" ], "rust-analyzer.showUnlinkedFileNotification": false, "cSpell.ignoreWords": [ + "TMPDIR", "autotools", "chrono", "indicatif", + "reftype", "reqwest", "royalroad", + "tempdir", "ureq" ] } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 721d8c1..b343c1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,6 +279,21 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "cssparser" version = "0.31.2" @@ -302,6 +317,15 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -349,6 +373,23 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "epub-builder" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6fcc8fc7b93c7001e0d47c269aa5a30a78a1f44692dc09cc9d0f781378545e1" +dependencies = [ + "chrono", + "eyre", + "html-escape", + "log", + "once_cell", + "tempfile", + "upon", + "uuid", + "zip", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -365,12 +406,32 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "eyre" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6267a1fa6f59179ea4afc8e50fd8612a3cc60bc858f786ff877a4a8cb042799" +dependencies = [ + "indenter", + "once_cell", +] + [[package]] name = "fastrand" version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -531,6 +592,15 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "html2md" version = "0.2.14" @@ -663,6 +733,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + [[package]] name = "indexmap" version = "2.1.0" @@ -1081,6 +1157,12 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1224,6 +1306,7 @@ dependencies = [ "bytes", "chrono", "clap", + "epub-builder", "html2md", "indicatif", "path-slash", @@ -1549,6 +1632,24 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "time" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" +dependencies = [ + "deranged", + "powerfmt", + "serde", + "time-core", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + [[package]] name = "tinyvec" version = "1.6.0" @@ -1662,6 +1763,17 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "upon" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a9260fe394dfd8ab204a8eab40f88eb9a331bb852147d24fc0aff6b30daa02" +dependencies = [ + "serde", + "unicode-ident", + "unicode-width", +] + [[package]] name = "url" version = "2.5.0" @@ -1679,12 +1791,27 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +dependencies = [ + "getrandom", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -2010,3 +2137,16 @@ dependencies = [ "quote", "syn 2.0.48", ] + +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", + "time", +] diff --git a/Cargo.toml b/Cargo.toml index 8841c09..f3d856e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ path = "src/binary.rs" bytes = "1.5.0" chrono = "0.4.33" clap = { version = "4.4.18", features = ["derive"] } +epub-builder = "0.7.4" html2md = "0.2.14" indicatif = "0.17.7" path-slash = "0.2.1" diff --git a/src/binary.rs b/src/binary.rs index 68c90fc..454f4cc 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -43,10 +43,10 @@ fn main() { let output_directory: PathBuf; match cli_input.output_directory { Some(output_directory_input) => { - output_directory = Path::new(&output_directory_input).to_path_buf(); + output_directory = PathBuf::from(&output_directory_input); }, None => { - output_directory = env::current_dir().unwrap().as_path().to_path_buf(); + output_directory = env::current_dir().unwrap(); } } @@ -61,7 +61,9 @@ fn main() { } } -// Check if the directory exists and is writeable. Creates one if not. +/// Check if the directory exists and is writeable. Creates one if not. +/// +/// Exits the program of failure. fn valid_directory_check(output_directory: &Path) { // Check if the directory exists, if it does not; attempt to create one. if !output_directory.exists() { @@ -95,7 +97,7 @@ fn valid_url_check(book_url: &str) -> Url { } }, Err(error) => { - eprintln!("Error! Unable to parse url: {error}"); + eprintln!("Error! Unable to parse url: {book_url}\n{error}"); exit(1); } } diff --git a/src/book.rs b/src/book.rs index 5d6d63b..42cdfb4 100644 --- a/src/book.rs +++ b/src/book.rs @@ -1,24 +1,29 @@ use std::collections::HashMap; use indicatif::{ProgressBar, ProgressStyle}; +use crate::misc::HashMapExt; use scraper::Html; use url::Url; -use crate::{html, http}; +use crate::{file_system_crap::remove_illegal_chars, html, http}; /// A struct representing a book & all the needed data to generate one. pub struct Book { /// The RoyalRoad Url for the book. - book_url: Url, + pub book_url: Url, /// The book's title. pub title: String, + /// Book title used for the filename. + /// Should have illegal chars expunged via file_system_crap::remove_illegal_chars. + pub file_name_title: String, + /// The book's author. pub author: String, /// A Url to the book's cover image. - cover_image_url: Url, + pub cover_image_url: Url, /// The raw html data of the RoyalRoad index page. index_html: Html, @@ -27,7 +32,7 @@ pub struct Book { pub chapters: Vec, /// A hashmap representing the book image urls and their corresponding img html tags. - image_urls: HashMap>, + pub image_urls_and_tags: HashMap>, } impl Book { @@ -36,10 +41,9 @@ impl Book { let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text()); let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html); - let mut chapters: Vec = Vec::with_capacity(chapter_names_and_urls.len()); - let mut image_urls: HashMap> = HashMap::new(); + let mut image_urls_and_tags: HashMap> = HashMap::new(); println!("\nDownloading and processing chapters:"); // Spawn a progress bar showing how many chapters have been downloaded & processed. @@ -54,8 +58,8 @@ impl Book { for i in 0..chapter_names_and_urls.len() { let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]); - // extract the image urls and add em to the image_urls hashmap. - + // extract the image urls and add em to the image_urls_and_tags hashmap. + image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html)); chapters.push(chapter); @@ -64,14 +68,17 @@ impl Book { progress_bar.finish(); + let title = html::get_title_from_index(&index_html); + Book { book_url: book_url, - title: html::get_title_from_index(&index_html), + title: title.clone(), + file_name_title: remove_illegal_chars(title), author: html::get_author_from_index(&index_html), cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)), index_html: index_html, chapters: chapters, - image_urls: image_urls, + image_urls_and_tags, } } @@ -90,7 +97,7 @@ pub struct Chapter { /// The name of the chapter. pub chapter_name: String, - /// The raw html data of the page. + /// The raw html data of the chapter page. raw_chapter_html: Html, /// The isolated chapter html. @@ -105,13 +112,8 @@ impl Chapter { Chapter { chapter_url: chapter_url, chapter_name: chapter_name.to_string(), - raw_chapter_html: raw_chapter_html.clone(), - isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html) + isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html), + raw_chapter_html: raw_chapter_html, } } -} - -// TODO! -struct BookCss { - } \ No newline at end of file diff --git a/src/constants.rs b/src/constants.rs new file mode 100644 index 0000000..e30bf9c --- /dev/null +++ b/src/constants.rs @@ -0,0 +1,110 @@ +pub const EPUB_XML_HEAD: &str = r#" + +"#; + +pub const EPUB_XML_TAIL: &str = r#""#; + +pub const EPUB_CSS: &str = r#" +/* +html5doctor.com Reset Stylesheet +v1.6.1 +Last Updated: 2010-09-17 +Author: Richard Clark - http://richclarkdesign.com +Twitter: @rich_clark +*/ + +html, body, div, span, object, iframe, +h1, h2, h3, h4, h5, h6, p, blockquote, pre, +abbr, address, cite, code, +del, dfn, em, img, ins, kbd, q, samp, +small, strong, sub, sup, var, +b, i, +dl, dt, dd, ol, ul, li, +fieldset, form, label, legend, +table, caption, tbody, tfoot, thead, tr, th, td, +article, aside, canvas, details, figcaption, figure, +footer, header, hgroup, menu, nav, section, summary, +time, mark, audio, video { + margin:0; + padding:0; + border:0; + outline:0; + font-size:100%; + vertical-align:baseline; + background:transparent; +} + +body { + line-height:1; +} + +article,aside,details,figcaption,figure, +footer,header,hgroup,menu,nav,section { + display:block; +} + +nav ul { + list-style:none; +} + +blockquote, q { + quotes:none; +} + +blockquote:before, blockquote:after, +q:before, q:after { + content:''; + content:none; +} + +a { + margin:0; + padding:0; + font-size:100%; + vertical-align:baseline; + background:transparent; +} + +/* change colours to suit your needs */ +ins { + background-color:#ff9; + color:#000; + text-decoration:none; +} + +/* change colours to suit your needs */ +mark { + background-color:#ff9; + color:#000; + font-style:italic; + font-weight:bold; +} + +del { + text-decoration: line-through; +} + +abbr[title], dfn[title] { + border-bottom:1px dotted; + cursor:help; +} + +table { + border-collapse:collapse; + border-spacing:0; +} + +/* change border colour to suit your needs */ +hr { + display:block; + height:1px; + border:0; + border-top:1px solid #cccccc; + margin:1em 0; + padding:0; +} + +input, select { + vertical-align:middle; +} +"#; \ No newline at end of file diff --git a/src/file_system_crap.rs b/src/file_system_crap.rs new file mode 100644 index 0000000..d7fe02a --- /dev/null +++ b/src/file_system_crap.rs @@ -0,0 +1,50 @@ +use std::path::PathBuf; + +use path_slash::PathBufExt as _; + +/// Converts a given path to windows style if needed. +pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf { + // If target os is windows. + #[cfg(target_os = "windows")] { + return PathBuf::from_slash_lossy(path.into_os_string()); + } + + // If target os is not windows. + #[cfg(not(target_os = "windows"))] { + return PathBuf::from_backslash_lossy(path.into_os_string()); + } +} + +/// Remove chars that are illegal to be used in filenames on both unix & windows. +pub fn remove_illegal_chars(mut string: String) -> String { + const ILLEGAL_CHARS: [char; 9] = ['/', '\u{005C}', '<', '>', ':', '\u{0022}', '|', '?', '*']; + + for char in ILLEGAL_CHARS { + string = string.replace(char, " "); + } + + return string; +} + +/// Setup html2xhtml in the operating system's temp directory. +pub fn setup_html2xhtml() { + #[cfg(target_os = "windows")] { + //TODO! + // Thinking of using C:\Users\\AppData\Local\Temp\html2xhtml-windows + } + + #[cfg(target_os = "linux")] { + // TODO! + // Thinking of using /tmp/html2xhtml-linux + } + + #[cfg(target_os = "macos")] { + // TODO! + // You can find the macos tempdir by doing: echo $TMPDIR + } +} + +/// Delete html2xhtml from the operating system's temp directory. +pub fn delete_html2xhtml() { + // TODO! +} \ No newline at end of file diff --git a/src/html.rs b/src/html.rs index f8db6fc..e18c74e 100644 --- a/src/html.rs +++ b/src/html.rs @@ -4,6 +4,8 @@ use regex::Regex; use scraper::{Html, Selector}; use url::Url; +use crate::misc::HashMapExt; + /// Convert a string to an html document. pub fn string_to_html_document(document_string: &str) -> Html { Html::parse_document(document_string) @@ -123,7 +125,7 @@ pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; } /// Isolate chapter content from the rest of the shit on the page. -pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html { +pub fn isolate_chapter_content(raw_chapter_html: &Html) -> Html { let page_html = Html::parse_document(&raw_chapter_html.html()); let selector = Selector::parse("div").unwrap(); @@ -142,7 +144,7 @@ pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html { } /// Remove all img tags from the html fragment. -pub fn remove_image_tags(html_fragment: Html) -> String { +pub fn remove_image_tags(html_fragment: &Html) -> String { let mut image_tags: Vec = Vec::new(); let selector = Selector::parse("img").unwrap(); @@ -161,10 +163,29 @@ pub fn remove_image_tags(html_fragment: Html) -> String { return html_fragment; } -pub fn extract_urls_and_imgs_tag(chapter_html: Html) -> HashMap> { +/// Extract the urls and image tags from a chapter and put them in the hashmap: +/// ``Hashmap>`` +pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap> { let mut chapter_image_urls: HashMap> = HashMap::new(); - + let selector = Selector::parse("img").unwrap(); + for element in chapter_html.select(&selector) { + let url = element.attr("src"); + let image_tag = element.html(); + + if url.is_none() { continue; } + let url = match Url::parse(url.unwrap()) { + Ok(url) => url, + Err(error) => { + eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{error}"); + continue; + }, + }; + + let temp_map: HashMap> = HashMap::from([(url, vec![image_tag])]); + + chapter_image_urls = chapter_image_urls.join(temp_map); + } return chapter_image_urls; } \ No newline at end of file diff --git a/src/http.rs b/src/http.rs index 441905f..e01aa87 100644 --- a/src/http.rs +++ b/src/http.rs @@ -56,7 +56,7 @@ pub fn string_to_url(url: &str) -> Url { match Url::parse(url) { Ok(url) => url, Err(error) => { - eprintln!("Error! Unable to parse: {url} into a valid url."); + eprintln!("Error! Unable to parse: {url} into a valid url.\n{error}"); exit(1); } } diff --git a/src/library.rs b/src/library.rs index 4be36c1..8cc1e4c 100644 --- a/src/library.rs +++ b/src/library.rs @@ -2,12 +2,16 @@ use std::{fs::OpenOptions, io::Write, path::PathBuf, process::exit}; use chrono::prelude::Local; use clap::Args; +use epub_builder::{EpubBuilder, EpubContent, ReferenceType, ZipLibrary}; +use file_system_crap::convert_path_to_os_specific; use url::Url; - mod book; +mod constants; +mod file_system_crap; mod html; mod http; +mod misc; /// struct that corresponds to arguments for Audiobook generation. #[derive(Args, Debug)] @@ -61,7 +65,80 @@ pub fn generate_audiobook(audiobook_args: AudiobookArgs, book_url: Url, output_d /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathBuf) { + // Until xhtml is working on MacOS this notice & exit code will remain. + // See file_system_crap::setup_html2xhtml() for current status on MacOS support for this mode. + #[cfg(target_os = "macos")] { + eprint!("Error! This mode does not currently support MacOS. Try either html mode or markdown mode."); + exit(1); + } + let book = book::Book::new(book_url); + + // Initialize the epub builder. + let mut epub_builder = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); + + // Add author and title metadata. + epub_builder.stylesheet(constants::EPUB_CSS.as_bytes()).unwrap(); // Use the epub_css in the constants.rs file. + epub_builder + .metadata("author", &book.author) + .expect("Unable to add author metadata"); + epub_builder + .metadata("title", &book.title) + .expect("Unable to add title metadata"); + + // Download the cover image & add it to the epub. + let cover_image = http::get_response(book.cover_image_url).get_bytes().to_vec(); + epub_builder.add_cover_image("cover.jpeg", cover_image.as_slice(), "image/jpeg").expect("Unable to add cover image."); + + // Generate the cover xhtml. + let cover_xhtml = format!( + r#"
+

{1}

+ +

by: {2}

+

Archived on: {3}

"#, + book.book_url, + book.title, + book.author, + chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false) + ); + let cover_xhtml = format!("{0}{cover_xhtml}{1}", constants::EPUB_XML_HEAD, constants::EPUB_XML_TAIL); + + // Add the cover xhtml to the epub. + epub_builder.add_content( + EpubContent::new("title.xhtml", cover_xhtml.as_bytes()) + .title("Cover") + .reftype(ReferenceType::Cover), + ).expect("Unable to add cover"); + + // Add a table of contents after the cover page. + epub_builder.inline_toc(); + + // Setup html2xhtml on the operating system. + file_system_crap::setup_html2xhtml(); + + // TODO! Generate the epub body, deal with images etc etc. You know pickup from last night etc etc. + // Finish setup_html2xhtml() first though dummy. + + // Generate the finished epub data as a byte vector. + let mut finished_epub: Vec = vec![]; + epub_builder.generate(&mut finished_epub).expect("Unable to generate epub data"); + + // Create the epub file and write the finished epub data to it. + let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.epub", book.file_name_title))); + let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { + Ok(output_file) => output_file, + Err(error) => { + eprintln!("Error! Unable to create: {0}\n{error}", output_path.to_string_lossy()); + exit(1); + } + }; + + output_file.write_all(finished_epub.as_slice()) + .expect(format!("Unable to write finished epub data to {0}", output_path.to_string_lossy()).as_str()); + + // Delete html2xhtml from the temp directory. It's good to clean up after yourself. + file_system_crap::delete_html2xhtml(); } /// Generate an html archive from the given arguments, url, & outputs it to the output directory. @@ -79,7 +156,7 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) { let book = book::Book::new(book_url); - let output_path = convert_path_to_windows(output_directory.join(format!("{0}.md", book.title))); + let output_path = convert_path_to_os_specific(output_directory.join(format!("{0}.md", book.file_name_title))); // Create the md file. This will crash if it already exists or can not be created. let mut output_file = match OpenOptions::new().write(true).create_new(true).open(&output_path) { @@ -110,7 +187,7 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire if markdown_args.no_image_tags { // Remove image tags or not depending on args. - buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(chapter.isolated_chapter_html))); + buf = format!("\n\n{}\n\n", html2md::parse_html(&html::remove_image_tags(&chapter.isolated_chapter_html))); } else { buf = format!("\n\n{}\n\n", html2md::parse_html(&chapter.isolated_chapter_html.html())); @@ -118,19 +195,4 @@ pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_dire output_file.write_all(buf.as_bytes()).unwrap(); } -} - -/// Converts a given path to windows style if needed. -fn convert_path_to_windows(path: PathBuf) -> PathBuf { - // If target os is windows. - #[cfg(target_os = "windows")] { - use path_slash::PathBufExt as _; - - return PathBuf::from_slash(path.into_os_string().into_string().unwrap()); - } - - // If target os is not windows. - #[cfg(not(target_os = "windows"))] { - return path; - } } \ No newline at end of file diff --git a/src/misc.rs b/src/misc.rs new file mode 100644 index 0000000..7b414e1 --- /dev/null +++ b/src/misc.rs @@ -0,0 +1,34 @@ +use std::collections::HashMap; + +/// An extension to ``std::collections::HashMap>`` +pub trait HashMapExt { + /// Merges two ``Hashmap>`` returning the merged hashmap. + fn join(self, new_hashmap: HashMap>) -> HashMap>; +} + + +impl HashMapExt for HashMap> { + fn join(mut self, other_hashmap: HashMap>) -> HashMap> { + // I am well aware that this function is dogshit for performance; but tbh I don't give enough of a shit to do anything about it. + + for key in other_hashmap.keys() { + if self.contains_key(key) { + for string in &other_hashmap[key] { + if self[key].contains(string) { continue; } // Avoid repeating strings in the vectors. + } + + let mut self_vector = self[key].clone(); + let mut other_vector = other_hashmap[key].clone(); + + self_vector.append(&mut other_vector); + + self.insert(key.clone(), self_vector); + } + else { + self.insert(key.clone(), other_hashmap[key].clone()); + } + } + + return self; + } +} \ No newline at end of file