Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work

This commit is contained in:
NA 2024-01-25 14:49:55 +00:00
parent 778b1adf6a
commit 80f3d5b423
18 changed files with 4371 additions and 14 deletions

64
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,64 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in library 'royal_road_archiver_lib'",
"cargo": {
"args": [
"test",
"--no-run",
"--lib",
"--package=royal_road_archiver"
],
"filter": {
"name": "royal_road_archiver_lib",
"kind": "lib"
}
},
"args": [],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug executable 'royal_road_archiver_bin'",
"cargo": {
"args": [
"build",
"--bin=royal_road_archiver_bin",
"--package=royal_road_archiver"
],
"filter": {
"name": "royal_road_archiver_bin",
"kind": "bin"
}
},
"args": ["https://www.royalroad.com/fiction/22848/post-human", "markdown"],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in executable 'royal_road_archiver_bin'",
"cargo": {
"args": [
"test",
"--no-run",
"--bin=royal_road_archiver_bin",
"--package=royal_road_archiver"
],
"filter": {
"name": "royal_road_archiver_bin",
"kind": "bin"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}

View file

@ -7,6 +7,9 @@
], ],
"rust-analyzer.showUnlinkedFileNotification": false, "rust-analyzer.showUnlinkedFileNotification": false,
"cSpell.ignoreWords": [ "cSpell.ignoreWords": [
"royalroad" "autotools",
"reqwest",
"royalroad",
"ureq"
] ]
} }

1527
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -14,5 +14,10 @@ name = "royal_road_archiver_bin"
path = "src/binary.rs" path = "src/binary.rs"
[dependencies] [dependencies]
bytes = "1.5.0"
clap = { version = "4.4.18", features = ["derive"] } clap = { version = "4.4.18", features = ["derive"] }
regex = "1.10.3"
reqwest = { version = "0.11.23", features = ["rustls", "blocking"] }
scraper = "0.18.1"
serde_json = "1.0.111"
url = "2.5.0" url = "2.5.0"

BIN
html2xhtml-linux/dtdquery Executable file

Binary file not shown.

BIN
html2xhtml-linux/html2xhtml Executable file

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,201 @@
#! /bin/sh
# dtdquery - temporary wrapper script for .libs/dtdquery.exe
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
#
# The dtdquery program cannot be directly executed until all the libtool
# libraries that it depends on are installed.
#
# This wrapper script should never be moved out of the build directory.
# If it is, it will not operate correctly.
# Sed substitution that helps us do robust quoting. It backslashifies
# metacharacters that are still active within double-quoted strings.
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
# Be Bourne compatible
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
# is contrary to our usage. Disable this feature.
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
fi
BIN_SH=xpg4; export BIN_SH # for Tru64
DUALCASE=1; export DUALCASE # for MKS sh
# The HP-UX ksh and POSIX shell print the target directory to stdout
# if CDPATH is set.
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
relink_command=""
# This environment variable determines our operation mode.
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
# install mode needs the following variables:
generated_by_libtool_version='2.4.2'
notinst_deplibs=''
else
# When we are sourced in execute mode, $file and $ECHO are already set.
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
file="$0"
# A function that is used when there is no print builtin or printf.
func_fallback_echo ()
{
eval 'cat <<_LTECHO_EOF
$1
_LTECHO_EOF'
}
ECHO="printf %s\\n"
fi
# Very basic option parsing. These options are (a) specific to
# the libtool wrapper, (b) are identical between the wrapper
# /script/ and the wrapper /executable/ which is used only on
# windows platforms, and (c) all begin with the string --lt-
# (application programs are unlikely to have options which match
# this pattern).
#
# There are only two supported options: --lt-debug and
# --lt-dump-script. There is, deliberately, no --lt-help.
#
# The first argument to this parsing function should be the
# script's ../libtool value, followed by yes.
lt_option_debug=
func_parse_lt_options ()
{
lt_script_arg0=$0
shift
for lt_opt
do
case "$lt_opt" in
--lt-debug) lt_option_debug=1 ;;
--lt-dump-script)
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
cat "$lt_dump_D/$lt_dump_F"
exit 0
;;
--lt-*)
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
exit 1
;;
esac
done
# Print the debug banner immediately:
if test -n "$lt_option_debug"; then
echo "dtdquery.exe:dtdquery:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
fi
}
# Used when --lt-debug. Prints its arguments to stdout
# (redirection is the responsibility of the caller)
func_lt_dump_args ()
{
lt_dump_args_N=1;
for lt_arg
do
$ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
lt_dump_args_N=`expr $lt_dump_args_N + 1`
done
}
# Core function for launching the target application
func_exec_program_core ()
{
if test -n "$lt_option_debug"; then
$ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[0]: $progdir/$program" 1>&2
func_lt_dump_args ${1+"$@"} 1>&2
fi
exec "$progdir/$program" ${1+"$@"}
$ECHO "$0: cannot exec $program $*" 1>&2
exit 1
}
# A function to encapsulate launching the target application
# Strips options in the --lt-* namespace from $@ and
# launches target application with the remaining arguments.
func_exec_program ()
{
case " $* " in
*\ --lt-*)
for lt_wr_arg
do
case $lt_wr_arg in
--lt-*) ;;
*) set x "$@" "$lt_wr_arg"; shift;;
esac
shift
done ;;
esac
func_exec_program_core ${1+"$@"}
}
# Parse options
func_parse_lt_options "$0" ${1+"$@"}
# Find the directory that this script lives in.
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
test "x$thisdir" = "x$file" && thisdir=.
# Follow symbolic links until we get to the real thisdir.
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
while test -n "$file"; do
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
# If there was a directory component, then change thisdir.
if test "x$destdir" != "x$file"; then
case "$destdir" in
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
*) thisdir="$thisdir/$destdir" ;;
esac
fi
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
done
# Usually 'no', except on cygwin/mingw when embedded into
# the cwrapper.
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
# special case for '.'
if test "$thisdir" = "."; then
thisdir=`pwd`
fi
# remove .libs from thisdir
case "$thisdir" in
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
.libs ) thisdir=. ;;
esac
fi
# Try to get the absolute directory name.
absdir=`cd "$thisdir" && pwd`
test -n "$absdir" && thisdir="$absdir"
program='dtdquery.exe'
progdir="$thisdir/.libs"
if test -f "$progdir/$program"; then
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
# Run the actual program with our arguments.
func_exec_program ${1+"$@"}
fi
else
# The program doesn't exist.
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
$ECHO "This script is just a wrapper for $program." 1>&2
$ECHO "See the libtool documentation for more information." 1>&2
exit 1
fi
fi

Binary file not shown.

View file

@ -0,0 +1,201 @@
#! /bin/sh
# html2xhtml - temporary wrapper script for .libs/html2xhtml.exe
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
#
# The html2xhtml program cannot be directly executed until all the libtool
# libraries that it depends on are installed.
#
# This wrapper script should never be moved out of the build directory.
# If it is, it will not operate correctly.
# Sed substitution that helps us do robust quoting. It backslashifies
# metacharacters that are still active within double-quoted strings.
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
# Be Bourne compatible
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
# is contrary to our usage. Disable this feature.
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
fi
BIN_SH=xpg4; export BIN_SH # for Tru64
DUALCASE=1; export DUALCASE # for MKS sh
# The HP-UX ksh and POSIX shell print the target directory to stdout
# if CDPATH is set.
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
relink_command=""
# This environment variable determines our operation mode.
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
# install mode needs the following variables:
generated_by_libtool_version='2.4.2'
notinst_deplibs=''
else
# When we are sourced in execute mode, $file and $ECHO are already set.
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
file="$0"
# A function that is used when there is no print builtin or printf.
func_fallback_echo ()
{
eval 'cat <<_LTECHO_EOF
$1
_LTECHO_EOF'
}
ECHO="printf %s\\n"
fi
# Very basic option parsing. These options are (a) specific to
# the libtool wrapper, (b) are identical between the wrapper
# /script/ and the wrapper /executable/ which is used only on
# windows platforms, and (c) all begin with the string --lt-
# (application programs are unlikely to have options which match
# this pattern).
#
# There are only two supported options: --lt-debug and
# --lt-dump-script. There is, deliberately, no --lt-help.
#
# The first argument to this parsing function should be the
# script's ../libtool value, followed by yes.
lt_option_debug=
func_parse_lt_options ()
{
lt_script_arg0=$0
shift
for lt_opt
do
case "$lt_opt" in
--lt-debug) lt_option_debug=1 ;;
--lt-dump-script)
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
cat "$lt_dump_D/$lt_dump_F"
exit 0
;;
--lt-*)
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
exit 1
;;
esac
done
# Print the debug banner immediately:
if test -n "$lt_option_debug"; then
echo "html2xhtml.exe:html2xhtml:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
fi
}
# Used when --lt-debug. Prints its arguments to stdout
# (redirection is the responsibility of the caller)
func_lt_dump_args ()
{
lt_dump_args_N=1;
for lt_arg
do
$ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
lt_dump_args_N=`expr $lt_dump_args_N + 1`
done
}
# Core function for launching the target application
func_exec_program_core ()
{
if test -n "$lt_option_debug"; then
$ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[0]: $progdir/$program" 1>&2
func_lt_dump_args ${1+"$@"} 1>&2
fi
exec "$progdir/$program" ${1+"$@"}
$ECHO "$0: cannot exec $program $*" 1>&2
exit 1
}
# A function to encapsulate launching the target application
# Strips options in the --lt-* namespace from $@ and
# launches target application with the remaining arguments.
func_exec_program ()
{
case " $* " in
*\ --lt-*)
for lt_wr_arg
do
case $lt_wr_arg in
--lt-*) ;;
*) set x "$@" "$lt_wr_arg"; shift;;
esac
shift
done ;;
esac
func_exec_program_core ${1+"$@"}
}
# Parse options
func_parse_lt_options "$0" ${1+"$@"}
# Find the directory that this script lives in.
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
test "x$thisdir" = "x$file" && thisdir=.
# Follow symbolic links until we get to the real thisdir.
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
while test -n "$file"; do
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
# If there was a directory component, then change thisdir.
if test "x$destdir" != "x$file"; then
case "$destdir" in
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
*) thisdir="$thisdir/$destdir" ;;
esac
fi
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
done
# Usually 'no', except on cygwin/mingw when embedded into
# the cwrapper.
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
# special case for '.'
if test "$thisdir" = "."; then
thisdir=`pwd`
fi
# remove .libs from thisdir
case "$thisdir" in
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
.libs ) thisdir=. ;;
esac
fi
# Try to get the absolute directory name.
absdir=`cd "$thisdir" && pwd`
test -n "$absdir" && thisdir="$absdir"
program='html2xhtml.exe'
progdir="$thisdir/.libs"
if test -f "$progdir/$program"; then
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
# Run the actual program with our arguments.
func_exec_program ${1+"$@"}
fi
else
# The program doesn't exist.
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
$ECHO "This script is just a wrapper for $program." 1>&2
$ECHO "See the libtool documentation for more information." 1>&2
exit 1
fi
fi

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

95
src/book.rs Normal file
View file

@ -0,0 +1,95 @@
use scraper::Html;
use url::Url;
use crate::{html, http};
/// A struct representing a book & all the needed data to generate one.
pub struct Book {
/// The RoyalRoad Url for the book.
book_url: Url,
/// The book's title.
title: String,
/// The book's author.
author: String,
/// A Url to the book's cover image.
cover_image_url: Url,
/// The raw html data of the RoyalRoad index page.
index_html: Html,
/// A vector of the book's chapters.
chapters: Vec<Chapter>,
}
impl Book {
/// Generate a new book instance with all the needed data from a given url.
pub fn new(book_url: Url) -> Book {
let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
for i in 0..chapter_names_and_urls.len() {
let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
chapters.push(chapter);
}
Book {
book_url: book_url,
title: html::get_title_from_index(&index_html),
author: html::get_author_from_index(&index_html),
cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
index_html: index_html,
chapters: chapters,
}
}
/// Count how many paragraphs are in the book.
pub fn count_paragraphs(&self) -> u128 {
// TODO!
0
}
}
/// A struct representing a chapter.
struct Chapter {
/// The Url of the chapter.
chapter_url: Url,
/// The name of the chapter.
chapter_name: String,
/// The raw html data of the page.
raw_chapter_html: Html,
/// The isolated chapter html.
isolated_chapter_html: Html,
}
impl Chapter {
fn new(chapter_name: &str, chapter_url: &str) -> Self {
let chapter_url = http::string_to_url(&chapter_url);
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
Chapter {
chapter_url: chapter_url,
chapter_name: chapter_name.to_string(),
raw_chapter_html: raw_chapter_html.clone(),
isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)
}
}
}
// TODO!
struct BookImages {
}
// TODO!
struct BookCss {
}

141
src/html.rs Normal file
View file

@ -0,0 +1,141 @@
use std::process::exit;
use regex::Regex;
use scraper::{Html, Selector};
/// Convert a string to an html document.
pub fn string_to_html_document(document_string: &str) -> Html {
Html::parse_document(document_string)
}
/// Convert a string to an html fragment.
pub fn string_to_html_fragment(fragment_string: &str) -> Html {
Html::parse_fragment(fragment_string)
}
/// Get the book's title from the index.
pub fn get_title_from_index(index_html: &Html) -> String {
let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
for element in index_html.select(&selector) {
// Loop through all meta tags in the html document.
match element.value().attr("name") {
// Check if the meta tag contains attribute: "name"
None => continue,
Some(x) => {
if x == "twitter:title" {
// If it does contain attribute "name", check if the content of that attribute is "twitter:title"
return element.value().attr("content").unwrap().to_owned();
// If it is, extract the data from the content attribute.
}
}
}
}
eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
exit(1);
}
/// Get the book's author from index
pub fn get_author_from_index(index_html: &Html) -> String {
let selector = Selector::parse("meta").unwrap();
for element in index_html.select(&selector) {
match element.value().attr("property") {
None => continue,
Some(x) => {
if x == "books:author" {
return element.value().attr("content").unwrap().to_owned();
}
}
}
}
eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
exit(1);
}
/// Get the book's cover image url from the index
pub fn get_cover_image_url_from_index(index_html: &Html) -> String {
let selector = Selector::parse("meta").unwrap();
for element in index_html.select(&selector) {
match element.value().attr("property") {
None => continue,
Some(x) => {
if x == "og:image" {
return element.value().attr("content").unwrap().to_owned();
}
}
}
}
eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
exit(1);
}
/// Gets the chapter names and urls from the index.
///
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> {
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
// I've commented in the gist of it, but we have no memory actually writing this function.
let mut chapters: Vec<[String; 2]> = Vec::new();
let mut raw_json_data = String::new();
// Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
let selector = Selector::parse("script").unwrap();
for element in index_html.select(&selector) {
if element.inner_html().contains("window.chapters") {
raw_json_data = element.inner_html();
break;
}
}
// Exit it if unable to find the needed json data. That probably means royal road has changed their code.
if raw_json_data.is_empty() {
eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
exit(1);
}
// I have absolutely no idea what this regex does; but it's probably important.
const REGEX: &str = r#"window.chapters = (\[.*?]);"#;
let regex = Regex::new(REGEX).unwrap();
// I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.
let chapter_raw_json = regex
.captures(&raw_json_data)
.unwrap()
.get(1)
.map_or("[]", |m| m.as_str());
// and it just spits out json when done. Neat.
let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();
// For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.
for chapter in chapter_json.as_array().unwrap() {
let chapter_name = chapter["title"].to_string().replace('"', "");
let url = format!(
"https://www.royalroad.com{}",
chapter["url"].to_string().replace('"', "")
);
chapters.push([chapter_name, url]);
}
// Return that wanker.
return chapters;
}
/// Isolate chapter content from the rest of the shit on the page.
pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html {
let page_html = Html::parse_document(&raw_chapter_html.html());
let selector = Selector::parse("div").unwrap();
for element in page_html.select(&selector) {
match element.value().attr("class") {
None => continue,
Some(x) => {
if x == "chapter-inner chapter-content" {
return string_to_html_fragment(&element.inner_html());
}
}
}
}
eprintln!("Error! Unable to isolate chapter content");
exit(1);
}

63
src/http.rs Normal file
View file

@ -0,0 +1,63 @@
use std::process::exit;
use reqwest::{blocking::Response, header::HeaderMap};
use url::Url;
// A struct representing an HttpResponse and the Url it originated from.
pub struct HttpResponse {
url: Url,
pub response: Response,
}
impl HttpResponse {
/// Get the response headers.
pub fn get_headers(&self) -> &HeaderMap {
self.response.headers()
}
/// Attempt to convert the response to text. Exits the program if it fails.
pub fn get_text(self) -> String {
match self.response.text() {
Ok(response_text) => response_text,
Err(error) => {
eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url);
exit(1);
}
}
}
/// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
pub fn get_bytes(self) -> bytes::Bytes{
match self.response.bytes() {
Ok(response_bytes) => response_bytes,
Err(error) => {
eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url);
exit(1);
}
}
}
}
/// Get an http response for a given url. Exits the program if it fails.
pub fn get_response(url: Url) -> HttpResponse {
let response_result = reqwest::blocking::get(url.clone());
match response_result {
Ok(response) => HttpResponse { url, response },
Err(error) => {
eprintln!("Error! Unable to get a response from: {url}\n{error}");
exit(1);
},
}
}
/// A function to convert a string to a url. Exits the program if it fails.
pub fn string_to_url(url: &str) -> Url {
match Url::parse(url) {
Ok(url) => url,
Err(error) => {
eprintln!("Error! Unable to parse: {url} into a valid url.");
exit(1);
}
}
}

View file

@ -3,6 +3,10 @@ use std::path::PathBuf;
use clap::Args; use clap::Args;
use url::Url; use url::Url;
mod book;
mod html;
mod http;
/// struct that corresponds to arguments for Audiobook generation. /// struct that corresponds to arguments for Audiobook generation.
#[derive(Args, Debug)] #[derive(Args, Debug)]
pub struct AudiobookArgs { pub struct AudiobookArgs {
@ -71,5 +75,5 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this. /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) { pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
eprintln!("This is not implemented yet."); let book = book::Book::new(book_url);
} }