Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work

2025-01-05 10:28:19 -06:00 · 2024-01-25 14:49:55 +00:00 · 2024-01-25 14:49:55 +00:00 · 80f3d5b423
parent 778b1adf6a
commit 80f3d5b423
18 changed files with 4371 additions and 14 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,64 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "Debug unit tests in library 'royal_road_archiver_lib'",
+            "cargo": {
+                "args": [
+                    "test",
+                    "--no-run",
+                    "--lib",
+                    "--package=royal_road_archiver"
+                ],
+                "filter": {
+                    "name": "royal_road_archiver_lib",
+                    "kind": "lib"
+                }
+            },
+            "args": [],
+            "cwd": "${workspaceFolder}"
+        },
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "Debug executable 'royal_road_archiver_bin'",
+            "cargo": {
+                "args": [
+                    "build",
+                    "--bin=royal_road_archiver_bin",
+                    "--package=royal_road_archiver"
+                ],
+                "filter": {
+                    "name": "royal_road_archiver_bin",
+                    "kind": "bin"
+                }
+            },
+            "args": ["https://www.royalroad.com/fiction/22848/post-human", "markdown"],
+            "cwd": "${workspaceFolder}"
+        },
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "Debug unit tests in executable 'royal_road_archiver_bin'",
+            "cargo": {
+                "args": [
+                    "test",
+                    "--no-run",
+                    "--bin=royal_road_archiver_bin",
+                    "--package=royal_road_archiver"
+                ],
+                "filter": {
+                    "name": "royal_road_archiver_bin",
+                    "kind": "bin"
+                }
+            },
+            "args": [],
+            "cwd": "${workspaceFolder}"
+        }
+    ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -7,6 +7,9 @@
    ],
    "rust-analyzer.showUnlinkedFileNotification": false,
    "cSpell.ignoreWords": [
-        "royalroad"
+        "autotools",
+        "reqwest",
+        "royalroad",
+        "ureq"
    ]
 }
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -14,5 +14,10 @@ name = "royal_road_archiver_bin"
 path = "src/binary.rs"

 [dependencies]
+bytes = "1.5.0"
 clap = { version = "4.4.18", features = ["derive"] }
-url = "2.5.0"
+regex = "1.10.3"
+reqwest = { version = "0.11.23", features = ["rustls", "blocking"] }
+scraper = "0.18.1"
+serde_json = "1.0.111"
+url = "2.5.0"
--- a/html2xhtml-linux/dtdquery
+++ b/html2xhtml-linux/dtdquery
--- a/html2xhtml-linux/html2xhtml
+++ b/html2xhtml-linux/html2xhtml
--- a/html2xhtml-windows/.libs/dtdquery.exe
+++ b/html2xhtml-windows/.libs/dtdquery.exe
--- a/html2xhtml-windows/.libs/dtdquery_ltshwrapper
+++ b/html2xhtml-windows/.libs/dtdquery_ltshwrapper
@ -0,0 +1,201 @@
+#! /bin/sh
+
+# dtdquery - temporary wrapper script for .libs/dtdquery.exe
+# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
+#
+# The dtdquery program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=""
+
+# This environment variable determines our operation mode.
+if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
+  # install mode needs the following variables:
+  generated_by_libtool_version='2.4.2'
+  notinst_deplibs=''
+else
+  # When we are sourced in execute mode, $file and $ECHO are already set.
+  if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
+    file="$0"
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+}
+    ECHO="printf %s\\n"
+  fi
+
+# Very basic option parsing. These options are (a) specific to
+# the libtool wrapper, (b) are identical between the wrapper
+# /script/ and the wrapper /executable/ which is used only on
+# windows platforms, and (c) all begin with the string --lt-
+# (application programs are unlikely to have options which match
+# this pattern).
+#
+# There are only two supported options: --lt-debug and
+# --lt-dump-script. There is, deliberately, no --lt-help.
+#
+# The first argument to this parsing function should be the
+# script's ../libtool value, followed by yes.
+lt_option_debug=
+func_parse_lt_options ()
+{
+  lt_script_arg0=$0
+  shift
+  for lt_opt
+  do
+    case "$lt_opt" in
+    --lt-debug) lt_option_debug=1 ;;
+    --lt-dump-script)
+        lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
+        test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
+        lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
+        cat "$lt_dump_D/$lt_dump_F"
+        exit 0
+      ;;
+    --lt-*)
+        $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
+        exit 1
+      ;;
+    esac
+  done
+
+  # Print the debug banner immediately:
+  if test -n "$lt_option_debug"; then
+    echo "dtdquery.exe:dtdquery:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
+  fi
+}
+
+# Used when --lt-debug. Prints its arguments to stdout
+# (redirection is the responsibility of the caller)
+func_lt_dump_args ()
+{
+  lt_dump_args_N=1;
+  for lt_arg
+  do
+    $ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
+    lt_dump_args_N=`expr $lt_dump_args_N + 1`
+  done
+}
+
+# Core function for launching the target application
+func_exec_program_core ()
+{
+
+      if test -n "$lt_option_debug"; then
+        $ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[0]: $progdir/$program" 1>&2
+        func_lt_dump_args ${1+"$@"} 1>&2
+      fi
+      exec "$progdir/$program" ${1+"$@"}
+
+      $ECHO "$0: cannot exec $program $*" 1>&2
+      exit 1
+}
+
+# A function to encapsulate launching the target application
+# Strips options in the --lt-* namespace from $@ and
+# launches target application with the remaining arguments.
+func_exec_program ()
+{
+  case " $* " in
+  *\ --lt-*)
+    for lt_wr_arg
+    do
+      case $lt_wr_arg in
+      --lt-*) ;;
+      *) set x "$@" "$lt_wr_arg"; shift;;
+      esac
+      shift
+    done ;;
+  esac
+  func_exec_program_core ${1+"$@"}
+}
+
+  # Parse options
+  func_parse_lt_options "$0" ${1+"$@"}
+
+  # Find the directory that this script lives in.
+  thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
+  test "x$thisdir" = "x$file" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
+  while test -n "$file"; do
+    destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
+
+    # If there was a directory component, then change thisdir.
+    if test "x$destdir" != "x$file"; then
+      case "$destdir" in
+      [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
+      *) thisdir="$thisdir/$destdir" ;;
+      esac
+    fi
+
+    file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
+    file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
+  done
+
+  # Usually 'no', except on cygwin/mingw when embedded into
+  # the cwrapper.
+  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
+  if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
+    # special case for '.'
+    if test "$thisdir" = "."; then
+      thisdir=`pwd`
+    fi
+    # remove .libs from thisdir
+    case "$thisdir" in
+    *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
+    .libs )   thisdir=. ;;
+    esac
+  fi
+
+  # Try to get the absolute directory name.
+  absdir=`cd "$thisdir" && pwd`
+  test -n "$absdir" && thisdir="$absdir"
+
+  program='dtdquery.exe'
+  progdir="$thisdir/.libs"
+
+
+  if test -f "$progdir/$program"; then
+    if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
+      # Run the actual program with our arguments.
+      func_exec_program ${1+"$@"}
+    fi
+  else
+    # The program doesn't exist.
+    $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
+    $ECHO "This script is just a wrapper for $program." 1>&2
+    $ECHO "See the libtool documentation for more information." 1>&2
+    exit 1
+  fi
+fi
--- a/html2xhtml-windows/.libs/html2xhtml.exe
+++ b/html2xhtml-windows/.libs/html2xhtml.exe
--- a/html2xhtml-windows/.libs/html2xhtml_ltshwrapper
+++ b/html2xhtml-windows/.libs/html2xhtml_ltshwrapper
@ -0,0 +1,201 @@
+#! /bin/sh
+
+# html2xhtml - temporary wrapper script for .libs/html2xhtml.exe
+# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
+#
+# The html2xhtml program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=""
+
+# This environment variable determines our operation mode.
+if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
+  # install mode needs the following variables:
+  generated_by_libtool_version='2.4.2'
+  notinst_deplibs=''
+else
+  # When we are sourced in execute mode, $file and $ECHO are already set.
+  if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
+    file="$0"
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+}
+    ECHO="printf %s\\n"
+  fi
+
+# Very basic option parsing. These options are (a) specific to
+# the libtool wrapper, (b) are identical between the wrapper
+# /script/ and the wrapper /executable/ which is used only on
+# windows platforms, and (c) all begin with the string --lt-
+# (application programs are unlikely to have options which match
+# this pattern).
+#
+# There are only two supported options: --lt-debug and
+# --lt-dump-script. There is, deliberately, no --lt-help.
+#
+# The first argument to this parsing function should be the
+# script's ../libtool value, followed by yes.
+lt_option_debug=
+func_parse_lt_options ()
+{
+  lt_script_arg0=$0
+  shift
+  for lt_opt
+  do
+    case "$lt_opt" in
+    --lt-debug) lt_option_debug=1 ;;
+    --lt-dump-script)
+        lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
+        test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
+        lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
+        cat "$lt_dump_D/$lt_dump_F"
+        exit 0
+      ;;
+    --lt-*)
+        $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
+        exit 1
+      ;;
+    esac
+  done
+
+  # Print the debug banner immediately:
+  if test -n "$lt_option_debug"; then
+    echo "html2xhtml.exe:html2xhtml:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
+  fi
+}
+
+# Used when --lt-debug. Prints its arguments to stdout
+# (redirection is the responsibility of the caller)
+func_lt_dump_args ()
+{
+  lt_dump_args_N=1;
+  for lt_arg
+  do
+    $ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
+    lt_dump_args_N=`expr $lt_dump_args_N + 1`
+  done
+}
+
+# Core function for launching the target application
+func_exec_program_core ()
+{
+
+      if test -n "$lt_option_debug"; then
+        $ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[0]: $progdir/$program" 1>&2
+        func_lt_dump_args ${1+"$@"} 1>&2
+      fi
+      exec "$progdir/$program" ${1+"$@"}
+
+      $ECHO "$0: cannot exec $program $*" 1>&2
+      exit 1
+}
+
+# A function to encapsulate launching the target application
+# Strips options in the --lt-* namespace from $@ and
+# launches target application with the remaining arguments.
+func_exec_program ()
+{
+  case " $* " in
+  *\ --lt-*)
+    for lt_wr_arg
+    do
+      case $lt_wr_arg in
+      --lt-*) ;;
+      *) set x "$@" "$lt_wr_arg"; shift;;
+      esac
+      shift
+    done ;;
+  esac
+  func_exec_program_core ${1+"$@"}
+}
+
+  # Parse options
+  func_parse_lt_options "$0" ${1+"$@"}
+
+  # Find the directory that this script lives in.
+  thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
+  test "x$thisdir" = "x$file" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
+  while test -n "$file"; do
+    destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
+
+    # If there was a directory component, then change thisdir.
+    if test "x$destdir" != "x$file"; then
+      case "$destdir" in
+      [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
+      *) thisdir="$thisdir/$destdir" ;;
+      esac
+    fi
+
+    file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
+    file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
+  done
+
+  # Usually 'no', except on cygwin/mingw when embedded into
+  # the cwrapper.
+  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
+  if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
+    # special case for '.'
+    if test "$thisdir" = "."; then
+      thisdir=`pwd`
+    fi
+    # remove .libs from thisdir
+    case "$thisdir" in
+    *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
+    .libs )   thisdir=. ;;
+    esac
+  fi
+
+  # Try to get the absolute directory name.
+  absdir=`cd "$thisdir" && pwd`
+  test -n "$absdir" && thisdir="$absdir"
+
+  program='html2xhtml.exe'
+  progdir="$thisdir/.libs"
+
+
+  if test -f "$progdir/$program"; then
+    if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
+      # Run the actual program with our arguments.
+      func_exec_program ${1+"$@"}
+    fi
+  else
+    # The program doesn't exist.
+    $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
+    $ECHO "This script is just a wrapper for $program." 1>&2
+    $ECHO "See the libtool documentation for more information." 1>&2
+    exit 1
+  fi
+fi
--- a/html2xhtml-windows/.libs/lt-dtdquery.c
+++ b/html2xhtml-windows/.libs/lt-dtdquery.c
--- a/html2xhtml-windows/.libs/lt-html2xhtml.c
+++ b/html2xhtml-windows/.libs/lt-html2xhtml.c
--- a/html2xhtml-windows/dtdquery.exe
+++ b/html2xhtml-windows/dtdquery.exe
--- a/html2xhtml-windows/html2xhtml.exe
+++ b/html2xhtml-windows/html2xhtml.exe
--- a/src/book.rs
+++ b/src/book.rs
@ -0,0 +1,95 @@
+use scraper::Html;
+use url::Url;
+
+use crate::{html, http};
+
+/// A struct representing a book & all the needed data to generate one.
+pub struct Book {
+    /// The RoyalRoad Url for the book.
+    book_url: Url,
+
+    /// The book's title.
+    title: String,
+
+    /// The book's author.
+    author: String,
+    
+    /// A Url to the book's cover image.
+    cover_image_url: Url,
+
+    /// The raw html data of the RoyalRoad index page.
+    index_html: Html,
+
+    /// A vector of the book's chapters.
+    chapters: Vec<Chapter>,
+}
+
+impl Book {
+    /// Generate a new book instance with all the needed data from a given url.
+    pub fn new(book_url: Url) -> Book {
+        let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
+
+        let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
+
+        let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
+
+        for i in 0..chapter_names_and_urls.len() {
+            let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
+            chapters.push(chapter);
+        }
+
+        Book { 
+            book_url: book_url, 
+            title: html::get_title_from_index(&index_html),
+            author: html::get_author_from_index(&index_html),
+            cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
+            index_html: index_html,
+            chapters: chapters,
+        }
+    }
+
+    /// Count how many paragraphs are in the book.
+    pub fn count_paragraphs(&self) -> u128 {
+        // TODO!
+        0
+    }
+}
+
+/// A struct representing a chapter.
+struct Chapter {
+    /// The Url of the chapter.
+    chapter_url: Url,
+    
+    /// The name of the chapter.
+    chapter_name: String,
+    
+    /// The raw html data of the page.
+    raw_chapter_html: Html,
+
+    /// The isolated chapter html.
+    isolated_chapter_html: Html,
+}
+
+impl Chapter {
+    fn new(chapter_name: &str, chapter_url: &str) -> Self {
+        let chapter_url = http::string_to_url(&chapter_url);
+        let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
+
+        Chapter { 
+            chapter_url: chapter_url, 
+            chapter_name: chapter_name.to_string(),
+            raw_chapter_html: raw_chapter_html.clone(),
+            isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)
+        }
+    }
+}
+
+// TODO!
+struct BookImages {
+
+}
+
+// TODO!
+struct BookCss {
+
+}
--- a/src/html.rs
+++ b/src/html.rs
@ -0,0 +1,141 @@
+use std::process::exit;
+
+use regex::Regex;
+use scraper::{Html, Selector};
+
+/// Convert a string to an html document.
+pub fn string_to_html_document(document_string: &str) -> Html {
+    Html::parse_document(document_string)
+}
+
+/// Convert a string to an html fragment.
+pub fn string_to_html_fragment(fragment_string: &str) -> Html {
+    Html::parse_fragment(fragment_string)
+}
+
+/// Get the book's title from the index.
+pub fn get_title_from_index(index_html: &Html) -> String {
+    let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
+        for element in index_html.select(&selector) {
+            // Loop through all meta tags in the html document.
+            match element.value().attr("name") {
+                // Check if the meta tag contains attribute: "name"
+                None => continue,
+                Some(x) => {
+                    if x == "twitter:title" {
+                        // If it does contain attribute "name", check if the content of that attribute is "twitter:title"
+                        return element.value().attr("content").unwrap().to_owned();
+                        // If it is, extract the data from the content attribute.
+                    }
+                }
+            }
+        }
+    eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
+    exit(1);
+}
+
+/// Get the book's author from index
+pub fn get_author_from_index(index_html: &Html) -> String {
+    let selector = Selector::parse("meta").unwrap();
+    for element in index_html.select(&selector) {
+        match element.value().attr("property") {
+            None => continue,
+            Some(x) => {
+                if x == "books:author" {
+                    return element.value().attr("content").unwrap().to_owned();
+                }
+            }
+        }
+    }
+    eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
+    exit(1);
+}
+
+/// Get the book's cover image url from the index
+pub fn get_cover_image_url_from_index(index_html: &Html) -> String {
+    let selector = Selector::parse("meta").unwrap();
+    for element in index_html.select(&selector) {
+        match element.value().attr("property") {
+            None => continue,
+            Some(x) => {
+                if x == "og:image" {
+                    return element.value().attr("content").unwrap().to_owned();
+                }
+            }
+        }
+    }
+    eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
+    exit(1);
+}
+
+/// Gets the chapter names and urls from the index.
+/// 
+/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
+pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> {
+    // I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
+    // I've commented in the gist of it, but we have no memory actually writing this function.
+
+    let mut chapters: Vec<[String; 2]> = Vec::new();
+    let mut raw_json_data = String::new();
+
+    // Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
+    let selector = Selector::parse("script").unwrap();
+    for element in index_html.select(&selector) {
+        if element.inner_html().contains("window.chapters") {
+            raw_json_data = element.inner_html();
+            break;
+        }
+    }
+    // Exit it if unable to find the needed json data. That probably means royal road has changed their code.
+    if raw_json_data.is_empty() {
+        eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
+        exit(1);
+    }
+
+    // I have absolutely no idea what this regex does; but it's probably important.
+    const REGEX: &str = r#"window.chapters = (\[.*?]);"#;
+    let regex = Regex::new(REGEX).unwrap();
+
+    // I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.
+    let chapter_raw_json = regex
+        .captures(&raw_json_data)
+        .unwrap()
+        .get(1)
+        .map_or("[]", |m| m.as_str());
+
+    // and it just spits out json when done. Neat.
+    let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();
+
+    // For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.
+    for chapter in chapter_json.as_array().unwrap() {
+        let chapter_name = chapter["title"].to_string().replace('"', "");
+        let url = format!(
+            "https://www.royalroad.com{}",
+            chapter["url"].to_string().replace('"', "")
+        );
+
+        chapters.push([chapter_name, url]);
+    }
+
+    // Return that wanker.
+    return chapters;
+}
+
+/// Isolate chapter content from the rest of the shit on the page.
+pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html {
+    let page_html = Html::parse_document(&raw_chapter_html.html());
+
+    let selector = Selector::parse("div").unwrap();
+    for element in page_html.select(&selector) {
+        match element.value().attr("class") {
+            None => continue,
+            Some(x) => {
+                if x == "chapter-inner chapter-content" {
+                    return string_to_html_fragment(&element.inner_html());
+                }
+            }
+        }
+    }
+    eprintln!("Error! Unable to isolate chapter content");
+    exit(1);
+}
--- a/src/http.rs
+++ b/src/http.rs
@ -0,0 +1,63 @@
+use std::process::exit;
+
+use reqwest::{blocking::Response, header::HeaderMap};
+use url::Url;
+
+// A struct representing an HttpResponse and the Url it originated from.
+pub struct HttpResponse {
+    url: Url,
+    pub response: Response,
+}
+
+impl HttpResponse {
+    /// Get the response headers.
+    pub fn get_headers(&self) -> &HeaderMap {
+        self.response.headers()
+    }
+
+    /// Attempt to convert the response to text. Exits the program if it fails.
+    pub fn get_text(self) -> String {
+        match self.response.text() {
+            Ok(response_text) => response_text,
+            Err(error) => {
+                eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url);
+                exit(1);
+            }
+        }
+    }
+
+    /// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
+    pub fn get_bytes(self) -> bytes::Bytes{
+        match self.response.bytes() {
+            Ok(response_bytes) => response_bytes,
+            Err(error) => {
+                eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url);
+                exit(1);
+            }
+        }
+    }
+}
+
+/// Get an http response for a given url. Exits the program if it fails.
+pub fn get_response(url: Url) -> HttpResponse {
+    let response_result = reqwest::blocking::get(url.clone());
+
+    match response_result {
+        Ok(response) => HttpResponse { url, response },
+        Err(error) => {
+            eprintln!("Error! Unable to get a response from: {url}\n{error}");
+            exit(1);
+        },
+    }
+}
+
+/// A function to convert a string to a url. Exits the program if it fails.
+pub fn string_to_url(url: &str) -> Url {
+    match Url::parse(url) {
+        Ok(url) => url,
+        Err(error) => {
+            eprintln!("Error! Unable to parse: {url} into a valid url.");
+            exit(1);
+        }
+    }
+}
--- a/src/library.rs
+++ b/src/library.rs
@ -3,6 +3,10 @@ use std::path::PathBuf;
 use clap::Args;
 use url::Url;

+mod book;
+mod html;
+mod http;
+
 /// struct that corresponds to arguments for Audiobook generation.
 #[derive(Args, Debug)]
 pub struct AudiobookArgs {
@ -71,5 +75,5 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
 /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. 
 /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
 pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
-    eprintln!("This is not implemented yet.");
+    let book = book::Book::new(book_url);
 }