royal_road_archiver/src/html.rs

use std::{
    collections::HashMap,
    io::Write,
    process::{Command, Stdio},
};

use regex::Regex;
use scraper::{Html, Selector};
use tempfile::TempDir;
use url::Url;

use crate::{http, misc::HashMapExt, GenerationError, Warning, WARNINGS};

/// Convert a string to an html document.
pub fn string_to_html_document(document_string: &str) -> Html {
    Html::parse_document(document_string)
}

/// Convert a string to an html fragment.
pub fn string_to_html_fragment(fragment_string: &str) -> Html {
    Html::parse_fragment(fragment_string)
}

/// Get the book's title from the index.
pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
    let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
    for element in index_html.select(&selector) {
        // Loop through all meta tags in the html document.
        match element.value().attr("name") {
            // Check if the meta tag contains attribute: "name"
            None => continue,
            Some(x) => {
                if x == "twitter:title" {
                    // If it does contain attribute "name", check if the content of that attribute is "twitter:title"
                    return Ok(element.value().attr("content").unwrap().to_owned());
                    // If it is, extract the data from the content attribute.
                }
            },
        }
    }
    Err(GenerationError::BookTitleFetchError {
        url: book_url.clone(),
    })
}

/// Get the book's author from index
pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {
    let selector = Selector::parse("meta").unwrap();
    for element in index_html.select(&selector) {
        match element.value().attr("property") {
            None => continue,
            Some(x) => {
                if x == "books:author" {
                    return Ok(element.value().attr("content").unwrap().to_owned());
                }
            },
        }
    }
    Err(GenerationError::BookAuthorFetchError {
        url: book_url.clone(),
    })
}

/// Get the book's cover image url from the index
pub fn get_cover_image_url_from_index(
    index_html: &Html,
    book_url: &Url,
) -> Result<Url, GenerationError> {
    let selector = Selector::parse("meta").unwrap();
    for element in index_html.select(&selector) {
        match element.value().attr("property") {
            None => continue,
            Some(x) => {
                if x == "og:image" {
                    return http::string_to_url(element.value().attr("content").unwrap());
                }
            },
        }
    }
    Err(GenerationError::BookCoverImageUrlFetchError {
        url: book_url.clone(),
    })
}

/// Gets the chapter names and urls from the index.
///
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
pub fn get_chapter_names_and_urls_from_index(
    index_html: &Html,
    book_url: &Url,
) -> Result<Vec<(String, String)>, GenerationError> {
    // I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
    // I've commented in the gist of it, but we have no memory actually writing this function.

    let mut chapters: Vec<(String, String)> = Vec::new();
    let mut raw_json_data = String::new();

    // Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
    let selector = Selector::parse("script").unwrap();
    for element in index_html.select(&selector) {
        if element.inner_html().contains("window.chapters") {
            raw_json_data = element.inner_html();
            break;
        }
    }
    // Exit it if unable to find the needed json data. That probably means royal road has changed their code.
    if raw_json_data.is_empty() {
        return Err(GenerationError::BookChapterNameAndUrlFetchError {
            url: book_url.clone(),
        });
    }

    // I have absolutely no idea what this regex does; but it's probably important.
    const REGEX: &str = r#"window.chapters = (\[.*?]);"#;
    let regex = Regex::new(REGEX).unwrap();

    // I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.
    let chapter_raw_json = regex
        .captures(&raw_json_data)
        .unwrap()
        .get(1)
        .map_or("[]", |m| m.as_str());

    // and it just spits out json when done. Neat.
    let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();

    // For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.
    for chapter in chapter_json.as_array().unwrap() {
        let chapter_name = chapter["title"].to_string().replace('"', "");
        let url = format!(
            "https://www.royalroad.com{}",
            chapter["url"].to_string().replace('"', "")
        );

        chapters.push((chapter_name, url));
    }

    // Return that wanker.
    return Ok(chapters);
}

/// Isolate chapter content from the rest of the shit on the page.
pub fn isolate_chapter_content(
    raw_chapter_html: &Html,
    chapter_url: &Url,
) -> Result<Html, GenerationError> {
    let page_html = Html::parse_document(&raw_chapter_html.html());

    let selector = Selector::parse("div").unwrap();
    for element in page_html.select(&selector) {
        match element.value().attr("class") {
            None => continue,
            Some(x) => {
                if x == "chapter-inner chapter-content" {
                    return Ok(string_to_html_fragment(&element.inner_html()));
                }
            },
        }
    }
    Err(GenerationError::ChapterContentIsolationError {
        url: chapter_url.clone(),
    })
}

/// Remove all img tags from the html fragment.
pub fn remove_image_tags(html_fragment: &Html) -> String {
    let mut image_tags: Vec<String> = Vec::new();

    let selector = Selector::parse("img").unwrap();
    for element in html_fragment.select(&selector) {
        if !image_tags.contains(&element.html()) {
            image_tags.push(element.html());
        }
    }

    let mut html_fragment = html_fragment.html();

    for image_tag in image_tags {
        html_fragment = html_fragment.replace(&image_tag, "");
    }

    return html_fragment;
}

/// Extract the urls and image tags from a chapter and put them in the hashmap:
/// ``Hashmap<Url, Vec<String>>``
pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>> {
    let mut chapter_image_urls: HashMap<Url, Vec<String>> = HashMap::new();

    let selector = Selector::parse("img").unwrap();
    for element in chapter_html.select(&selector) {
        let url = element.attr("src");
        let image_tag = element.html();

        if url.is_none() {
            continue;
        }
        let url = match Url::parse(url.unwrap()) {
            Ok(url) => url,
            Err(warning) => {
                let warning = Warning::ImageTagParseError {
                    warning_msg: "Unable to parse url in image tag".to_string(),
                    raw_image_tag: image_tag,
                    error: warning,
                };
                WARNINGS.lock().unwrap().add_warning(warning);

                continue;
            },
        };

        let temp_map: HashMap<Url, Vec<String>> = HashMap::from([(url, vec![image_tag])]);

        chapter_image_urls = chapter_image_urls.join(temp_map);
    }

    return chapter_image_urls;
}

/// Replace the image tag with new one that contains the new src attribute.
pub fn replace_img_src(img_tag: String, new_src: String) -> String {
    let img_tag = string_to_html_fragment(&img_tag);

    let selector = Selector::parse("img").unwrap();
    let element = img_tag.select(&selector).next().unwrap();

    if element.attr("src").is_some() {
        let image_tag = element.html();

        let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();
        let src_attr = src_match_regex
            .captures(&image_tag)
            .unwrap()
            .get(0)
            .map(|m| m.as_str())
            .unwrap();

        return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));
    } else {
        return element.html();
    }
}

/// Convert a given html dom into xhtml.
pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result<String, GenerationError> {
    #[cfg(target_os = "windows")]
    const HTML2XHTML_ENTRY: &str = "html2xhtml.exe";

    #[cfg(target_os = "linux")]
    const HTML2XHTML_ENTRY: &str = "html2xhtml";

    #[cfg(target_os = "macos")]
    const HTML2XHTML_ENTRY: &str = "html2xhtml";

    // Remove nbsp, They can cause certain e-readers to crash.
    let html = html.html().replace("&nbsp;", " ");

    // Start html2xhtml.
    let mut html2xhtml = match Command::new(html2xhtml_dir.path().join(HTML2XHTML_ENTRY))
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .spawn()
    {
        Ok(child) => child,
        Err(error) => return Err(GenerationError::Html2XhtmlStartError { error }),
    };

    // Write the html to the stdin, then wait for xhtml to be outputted to the stdout.
    html2xhtml
        .stdin
        .as_mut()
        .unwrap()
        .write_all(html.as_bytes())
        .unwrap();
    let html2xhtml_output = html2xhtml.wait_with_output().unwrap();

    // Generate a lossy string from the stdout.
    let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string();

    return Ok(xhtml);
}
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`use std::{`
			`collections::HashMap,`
			`io::Write,`
			`process::{Command, Stdio},`
			`};`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00
			`use regex::Regex;`
			`use scraper::{Html, Selector};`
Removed depricated crate tempdir and replaced it with tempfile. 2024-01-30 09:12:16 -06:00			`use tempfile::TempDir;`
Removed rustls from reqwest since it breaks windows support 2024-01-25 11:16:03 -06:00			`use url::Url;`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`use crate::{http, misc::HashMapExt, GenerationError, Warning, WARNINGS};`
Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`/// Convert a string to an html document.`
			`pub fn string_to_html_document(document_string: &str) -> Html {`
			`Html::parse_document(document_string)`
			`}`

			`/// Convert a string to an html fragment.`
			`pub fn string_to_html_fragment(fragment_string: &str) -> Html {`
			`Html::parse_fragment(fragment_string)`
			`}`

			`/// Get the book's title from the index.`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`pub fn get_title_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`for element in index_html.select(&selector) {`
			`// Loop through all meta tags in the html document.`
			`match element.value().attr("name") {`
			`// Check if the meta tag contains attribute: "name"`
			`None => continue,`
			`Some(x) => {`
			`if x == "twitter:title" {`
			`// If it does contain attribute "name", check if the content of that attribute is "twitter:title"`
			`return Ok(element.value().attr("content").unwrap().to_owned());`
			`// If it is, extract the data from the content attribute.`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`},`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`}`
			`Err(GenerationError::BookTitleFetchError {`
			`url: book_url.clone(),`
			`})`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`/// Get the book's author from index`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`pub fn get_author_from_index(index_html: &Html, book_url: &Url) -> Result<String, GenerationError> {`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`let selector = Selector::parse("meta").unwrap();`
			`for element in index_html.select(&selector) {`
			`match element.value().attr("property") {`
			`None => continue,`
			`Some(x) => {`
			`if x == "books:author" {`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`return Ok(element.value().attr("content").unwrap().to_owned());`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`},`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`Err(GenerationError::BookAuthorFetchError {`
			`url: book_url.clone(),`
			`})`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`/// Get the book's cover image url from the index`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`pub fn get_cover_image_url_from_index(`
			`index_html: &Html,`
			`book_url: &Url,`
			`) -> Result<Url, GenerationError> {`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`let selector = Selector::parse("meta").unwrap();`
			`for element in index_html.select(&selector) {`
			`match element.value().attr("property") {`
			`None => continue,`
			`Some(x) => {`
			`if x == "og:image" {`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`return http::string_to_url(element.value().attr("content").unwrap());`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`},`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`Err(GenerationError::BookCoverImageUrlFetchError {`
			`url: book_url.clone(),`
			`})`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`/// Gets the chapter names and urls from the index.`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`///`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`pub fn get_chapter_names_and_urls_from_index(`
			`index_html: &Html,`
			`book_url: &Url,`
			`) -> Result<Vec<(String, String)>, GenerationError> {`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.`
			`// I've commented in the gist of it, but we have no memory actually writing this function.`

Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`let mut chapters: Vec<(String, String)> = Vec::new();`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`let mut raw_json_data = String::new();`

			`// Find a script tag that has "window.chapters" inside the inner html. This is all in json format.`
			`let selector = Selector::parse("script").unwrap();`
			`for element in index_html.select(&selector) {`
			`if element.inner_html().contains("window.chapters") {`
			`raw_json_data = element.inner_html();`
			`break;`
			`}`
			`}`
			`// Exit it if unable to find the needed json data. That probably means royal road has changed their code.`
			`if raw_json_data.is_empty() {`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`return Err(GenerationError::BookChapterNameAndUrlFetchError {`
			`url: book_url.clone(),`
			`});`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`// I have absolutely no idea what this regex does; but it's probably important.`
			`const REGEX: &str = r#"window.chapters = (\[.*?]);"#;`
			`let regex = Regex::new(REGEX).unwrap();`

			`// I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.`
			`let chapter_raw_json = regex`
			`.captures(&raw_json_data)`
			`.unwrap()`
			`.get(1)`
			`.map_or("[]", \|m\| m.as_str());`

			`// and it just spits out json when done. Neat.`
			`let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();`

			`// For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.`
			`for chapter in chapter_json.as_array().unwrap() {`
			`let chapter_name = chapter["title"].to_string().replace('"', "");`
			`let url = format!(`
			`"https://www.royalroad.com{}",`
			`chapter["url"].to_string().replace('"', "")`
			`);`

Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`chapters.push((chapter_name, url));`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`// Return that wanker.`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`return Ok(chapters);`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`

			`/// Isolate chapter content from the rest of the shit on the page.`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`pub fn isolate_chapter_content(`
			`raw_chapter_html: &Html,`
			`chapter_url: &Url,`
			`) -> Result<Html, GenerationError> {`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`let page_html = Html::parse_document(&raw_chapter_html.html());`

			`let selector = Selector::parse("div").unwrap();`
			`for element in page_html.select(&selector) {`
			`match element.value().attr("class") {`
			`None => continue,`
			`Some(x) => {`
			`if x == "chapter-inner chapter-content" {`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`return Ok(string_to_html_fragment(&element.inner_html()));`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`},`
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`}`
			`}`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`Err(GenerationError::ChapterContentIsolationError {`
			`url: chapter_url.clone(),`
			`})`
Markdown generation is complete. Time for the hard shit. 2024-01-25 10:13:09 -06:00			`}`

			`/// Remove all img tags from the html fragment.`
Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00			`pub fn remove_image_tags(html_fragment: &Html) -> String {`
Markdown generation is complete. Time for the hard shit. 2024-01-25 10:13:09 -06:00			`let mut image_tags: Vec<String> = Vec::new();`

			`let selector = Selector::parse("img").unwrap();`
			`for element in html_fragment.select(&selector) {`
			`if !image_tags.contains(&element.html()) {`
			`image_tags.push(element.html());`
			`}`
			`}`

			`let mut html_fragment = html_fragment.html();`

			`for image_tag in image_tags {`
			`html_fragment = html_fragment.replace(&image_tag, "");`
			`}`

			`return html_fragment;`
Removed rustls from reqwest since it breaks windows support 2024-01-25 11:16:03 -06:00			`}`

Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00			`/// Extract the urls and image tags from a chapter and put them in the hashmap:`
			/// ``Hashmap<Url, Vec<String>>``
			`pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>> {`
Removed rustls from reqwest since it breaks windows support 2024-01-25 11:16:03 -06:00			`let mut chapter_image_urls: HashMap<Url, Vec<String>> = HashMap::new();`

Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00			`let selector = Selector::parse("img").unwrap();`
			`for element in chapter_html.select(&selector) {`
			`let url = element.attr("src");`
			`let image_tag = element.html();`

Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`if url.is_none() {`
			`continue;`
			`}`
Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00			`let url = match Url::parse(url.unwrap()) {`
			`Ok(url) => url,`
html2xhtml should now extract on both windows & linux. 2024-01-26 09:44:19 -06:00			`Err(warning) => {`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`let warning = Warning::ImageTagParseError {`
			`warning_msg: "Unable to parse url in image tag".to_string(),`
			`raw_image_tag: image_tag,`
			`error: warning,`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`};`
			`WARNINGS.lock().unwrap().add_warning(warning);`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00
Working on epub generation. Got to work on setup_html2xhtml() next. 2024-01-25 21:17:50 -06:00			`continue;`
			`},`
			`};`

			`let temp_map: HashMap<Url, Vec<String>> = HashMap::from([(url, vec![image_tag])]);`

			`chapter_image_urls = chapter_image_urls.join(temp_map);`
			`}`
Removed rustls from reqwest since it breaks windows support 2024-01-25 11:16:03 -06:00
			`return chapter_image_urls;`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00			`}`

			`/// Replace the image tag with new one that contains the new src attribute.`
			`pub fn replace_img_src(img_tag: String, new_src: String) -> String {`
			`let img_tag = string_to_html_fragment(&img_tag);`

			`let selector = Selector::parse("img").unwrap();`
			`let element = img_tag.select(&selector).next().unwrap();`

			`if element.attr("src").is_some() {`
			`let image_tag = element.html();`

			`let src_match_regex = Regex::new(r#"(src=["'].*["'])"#).unwrap();`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`let src_attr = src_match_regex`
			`.captures(&image_tag)`
			`.unwrap()`
			`.get(0)`
			`.map(\|m\| m.as_str())`
			`.unwrap();`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00
			`return image_tag.replace(src_attr, &format!(r#"src="{new_src}""#));`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`} else {`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00			`return element.html();`
			`}`
			`}`

			`/// Convert a given html dom into xhtml.`
Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`pub fn html_to_xhtml(html: Html, html2xhtml_dir: &TempDir) -> Result<String, GenerationError> {`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00			`#[cfg(target_os = "windows")]`
			`const HTML2XHTML_ENTRY: &str = "html2xhtml.exe";`

			`#[cfg(target_os = "linux")]`
			`const HTML2XHTML_ENTRY: &str = "html2xhtml";`

			`#[cfg(target_os = "macos")]`
			`const HTML2XHTML_ENTRY: &str = "html2xhtml";`

			`// Remove nbsp, They can cause certain e-readers to crash.`
			`let html = html.html().replace(" ", " ");`

			`// Start html2xhtml.`
			`let mut html2xhtml = match Command::new(html2xhtml_dir.path().join(HTML2XHTML_ENTRY))`
			`.stdin(Stdio::piped())`
			`.stdout(Stdio::piped())`
			`.spawn()`
			`{`
			`Ok(child) => child,`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`Err(error) => return Err(GenerationError::Html2XhtmlStartError { error }),`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00			`};`

			`// Write the html to the stdin, then wait for xhtml to be outputted to the stdout.`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`html2xhtml`
			`.stdin`
			`.as_mut()`
			`.unwrap()`
			`.write_all(html.as_bytes())`
			`.unwrap();`
Epub generation looks finished. Thinking of making it use CSS files from RoyalRoad. 2024-01-26 12:55:14 -06:00			`let html2xhtml_output = html2xhtml.wait_with_output().unwrap();`

			`// Generate a lossy string from the stdout.`
			`let xhtml = String::from_utf8_lossy(&html2xhtml_output.stdout).to_string();`

Updated to v1.0.1, redid all the error handling, made the library better. 2024-01-28 12:18:40 -06:00			`return Ok(xhtml);`
Formatted through rustfmt 2024-02-10 16:32:27 -06:00			`}`