html2xhtml should now extract on both windows & linux.

This commit is contained in:
NA 2024-01-26 15:44:19 +00:00
parent 054b2e48f1
commit 61036cf037
8 changed files with 357 additions and 17 deletions

View file

@ -12,6 +12,7 @@
"TMPDIR",
"autotools",
"chrono",
"colour",
"indicatif",
"reftype",
"reqwest",

307
Cargo.lock generated
View file

@ -17,6 +17,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aes"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
]
[[package]]
name = "ahash"
version = "0.8.7"
@ -129,6 +140,12 @@ version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64ct"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -141,6 +158,15 @@ version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.14.0"
@ -159,12 +185,34 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
[[package]]
name = "bzip2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
]
@ -194,6 +242,16 @@ dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "clap"
version = "4.4.18"
@ -263,6 +321,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -279,6 +343,15 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "cpufeatures"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
@ -294,6 +367,16 @@ version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "cssparser"
version = "0.31.2"
@ -337,6 +420,17 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "dtoa"
version = "1.0.9"
@ -462,6 +556,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "futf"
version = "0.1.5"
@ -529,6 +629,16 @@ dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.21"
@ -592,6 +702,15 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "html-escape"
version = "0.2.13"
@ -762,6 +881,15 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "inout"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
dependencies = [
"generic-array",
]
[[package]]
name = "instant"
version = "0.1.12"
@ -803,6 +931,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.67"
@ -1041,12 +1178,35 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "password-hash"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700"
dependencies = [
"base64ct",
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "path-slash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42"
[[package]]
name = "pbkdf2"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917"
dependencies = [
"digest",
"hmac",
"password-hash",
"sha2",
]
[[package]]
name = "percent-encoding"
version = "2.3.1"
@ -1089,7 +1249,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand",
"rand 0.8.5",
]
[[package]]
@ -1099,7 +1259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared 0.11.2",
"rand",
"rand 0.8.5",
]
[[package]]
@ -1193,6 +1353,19 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi",
]
[[package]]
name = "rand"
version = "0.8.5"
@ -1201,7 +1374,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
@ -1211,9 +1384,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.6.4"
@ -1223,6 +1411,15 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
@ -1261,6 +1458,15 @@ version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "reqwest"
version = "0.11.23"
@ -1314,7 +1520,9 @@ dependencies = [
"reqwest",
"scraper",
"serde_json",
"tempdir",
"url",
"zip-extract",
]
[[package]]
@ -1476,6 +1684,28 @@ dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "siphasher"
version = "0.3.11"
@ -1545,6 +1775,12 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "1.0.109"
@ -1588,6 +1824,16 @@ dependencies = [
"libc",
]
[[package]]
name = "tempdir"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
dependencies = [
"rand 0.4.6",
"remove_dir_all",
]
[[package]]
name = "tempfile"
version = "3.9.0"
@ -1736,6 +1982,12 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-bidi"
version = "0.3.15"
@ -2144,9 +2396,56 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261"
dependencies = [
"aes",
"byteorder",
"bzip2",
"constant_time_eq",
"crc32fast",
"crossbeam-utils",
"flate2",
"hmac",
"pbkdf2",
"sha1",
"time",
"zstd",
]
[[package]]
name = "zip-extract"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e109e5a291403b4c1e514d39f8a22d3f98d257e691a52bb1f16051bb1ffed63e"
dependencies = [
"log",
"thiserror",
"zip",
]
[[package]]
name = "zstd"
version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "5.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.9+zstd.1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656"
dependencies = [
"cc",
"pkg-config",
]

View file

@ -25,4 +25,6 @@ regex = "1.10.3"
reqwest = { version = "0.11.23", features = ["blocking"] }
scraper = "0.18.1"
serde_json = "1.0.111"
tempdir = "0.3.7"
url = "2.5.0"
zip-extract = "0.1.3"

BIN
html2xhtml-linux.zip Normal file

Binary file not shown.

BIN
html2xhtml-windows.zip Normal file

Binary file not shown.

View file

@ -1,6 +1,7 @@
use std::path::PathBuf;
use std::{io::Cursor, path::PathBuf, process::exit};
use path_slash::PathBufExt as _;
use tempdir::TempDir;
/// Converts a given path to windows style if needed.
pub fn convert_path_to_os_specific(path: PathBuf) -> PathBuf {
@ -27,15 +28,47 @@ pub fn remove_illegal_chars(mut string: String) -> String {
}
/// Setup html2xhtml in the operating system's temp directory.
pub fn setup_html2xhtml() {
pub fn setup_html2xhtml() -> TempDir {
#[cfg(target_os = "windows")] {
//TODO!
// Thinking of using C:\Users\<username>\AppData\Local\Temp\html2xhtml-windows
const HTML2XHTML: &[u8; 268299] = include_bytes!("../html2xhtml-windows.zip");
let html2xhtml_dir = match TempDir::new("html2xhtml-windows") {
Ok(temp_dir) => temp_dir,
Err(error) => {
eprintln!("Error! Unable to create temp directory: {error}");
exit(1);
}
};
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) {
Ok(_) => (),
Err(error) => {
eprintln!("Error! Unable to extract html2xhtml into into the temp directory\n{error}");
exit(1);
}
}
return html2xhtml_dir;
}
#[cfg(target_os = "linux")] {
// TODO!
// Thinking of using /tmp/html2xhtml-linux
const HTML2XHTML: &[u8; 186938] = include_bytes!("../html2xhtml-linux.zip");
let html2xhtml_dir = match TempDir::new("html2xhtml-linux") {
Ok(temp_dir) => temp_dir,
Err(error) => {
eprintln!("Error! Unable to create temp directory: {error}");
exit(1);
}
};
match zip_extract::extract(Cursor::new(HTML2XHTML), html2xhtml_dir.path(), true) {
Ok(_) => (),
Err(error) => {
eprintln!("Error! Unable to extract html2xhtml into the temp directory\n{error}");
exit(1);
}
}
return html2xhtml_dir;
}
#[cfg(target_os = "macos")] {
@ -45,6 +78,11 @@ pub fn setup_html2xhtml() {
}
/// Delete html2xhtml from the operating system's temp directory.
pub fn delete_html2xhtml() {
// TODO!
pub fn delete_html2xhtml(html2xhtml_dir: TempDir) {
match html2xhtml_dir.close() {
Ok(_) => (),
Err(warning) => {
eprintln!("Warning! Unable to close & delete temp directory: {warning}");
}
}
}

View file

@ -176,8 +176,8 @@ pub fn extract_urls_and_img_tag(chapter_html: &Html) -> HashMap<Url, Vec<String>
if url.is_none() { continue; }
let url = match Url::parse(url.unwrap()) {
Ok(url) => url,
Err(error) => {
eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{error}");
Err(warning) => {
eprintln!("Warning! Unable to parse url on image tag: {image_tag}\n{warning}");
continue;
},
};

View file

@ -115,7 +115,7 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
epub_builder.inline_toc();
// Setup html2xhtml on the operating system.
file_system_crap::setup_html2xhtml();
let html2xhtml_dir = file_system_crap::setup_html2xhtml();
// TODO! Generate the epub body, deal with images etc etc. You know pickup from last night etc etc.
// Finish setup_html2xhtml() first though dummy.
@ -137,8 +137,8 @@ pub fn generate_epub(epub_args: EpubArgs, book_url: Url, output_directory: PathB
output_file.write_all(finished_epub.as_slice())
.expect(format!("Unable to write finished epub data to {0}", output_path.to_string_lossy()).as_str());
// Delete html2xhtml from the temp directory. It's good to clean up after yourself.
file_system_crap::delete_html2xhtml();
// Delete the html2xhtml temp directory. It's good to clean up after yourself.
file_system_crap::delete_html2xhtml(html2xhtml_dir);
}
/// Generate an html archive from the given arguments, url, & outputs it to the output directory.