Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work

2024-11-27 01:18:41 -06:00 · 2024-01-25 14:49:55 +00:00 · 2024-01-25 14:49:55 +00:00 · 80f3d5b423
parent 778b1adf6a
commit 80f3d5b423
18 changed files with 4371 additions and 14 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,64 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "type": "lldb",
            "request": "launch",
            "name": "Debug unit tests in library 'royal_road_archiver_lib'",
            "cargo": {
                "args": [
                    "test",
                    "--no-run",
                    "--lib",
                    "--package=royal_road_archiver"
                ],
                "filter": {
                    "name": "royal_road_archiver_lib",
                    "kind": "lib"
                }
            },
            "args": [],
            "cwd": "${workspaceFolder}"
        },
        {
            "type": "lldb",
            "request": "launch",
            "name": "Debug executable 'royal_road_archiver_bin'",
            "cargo": {
                "args": [
                    "build",
                    "--bin=royal_road_archiver_bin",
                    "--package=royal_road_archiver"
                ],
                "filter": {
                    "name": "royal_road_archiver_bin",
                    "kind": "bin"
                }
            },
            "args": ["https://www.royalroad.com/fiction/22848/post-human", "markdown"],
            "cwd": "${workspaceFolder}"
        },
        {
            "type": "lldb",
            "request": "launch",
            "name": "Debug unit tests in executable 'royal_road_archiver_bin'",
            "cargo": {
                "args": [
                    "test",
                    "--no-run",
                    "--bin=royal_road_archiver_bin",
                    "--package=royal_road_archiver"
                ],
                "filter": {
                    "name": "royal_road_archiver_bin",
                    "kind": "bin"
                }
            },
            "args": [],
            "cwd": "${workspaceFolder}"
        }
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -7,6 +7,9 @@
    ],
    "rust-analyzer.showUnlinkedFileNotification": false,
    "cSpell.ignoreWords": [
-        "royalroad"
+        "autotools",
        "reqwest",
        "royalroad",
        "ureq"
    ]
 }
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -14,5 +14,10 @@ name = "royal_road_archiver_bin"
 path = "src/binary.rs"
 [dependencies]
 bytes = "1.5.0"
 clap = { version = "4.4.18", features = ["derive"] }
 regex = "1.10.3"
 reqwest = { version = "0.11.23", features = ["rustls", "blocking"] }
 scraper = "0.18.1"
 serde_json = "1.0.111"
 url = "2.5.0"
--- a/html2xhtml-linux/dtdquery
+++ b/html2xhtml-linux/dtdquery
--- a/html2xhtml-linux/html2xhtml
+++ b/html2xhtml-linux/html2xhtml
--- a/html2xhtml-windows/.libs/dtdquery.exe
+++ b/html2xhtml-windows/.libs/dtdquery.exe
--- a/html2xhtml-windows/.libs/dtdquery_ltshwrapper
+++ b/html2xhtml-windows/.libs/dtdquery_ltshwrapper
@ -0,0 +1,201 @@
 #! /bin/sh
 # dtdquery - temporary wrapper script for .libs/dtdquery.exe
 # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
 #
 # The dtdquery program cannot be directly executed until all the libtool
 # libraries that it depends on are installed.
 #
 # This wrapper script should never be moved out of the build directory.
 # If it is, it will not operate correctly.
 # Sed substitution that helps us do robust quoting.  It backslashifies
 # metacharacters that are still active within double-quoted strings.
 sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
 # Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
  emulate sh
  NULLCMD=:
  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
  # is contrary to our usage.  Disable this feature.
  alias -g '${1+"$@"}'='"$@"'
  setopt NO_GLOB_SUBST
 else
  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
 BIN_SH=xpg4; export BIN_SH # for Tru64
 DUALCASE=1; export DUALCASE # for MKS sh
 # The HP-UX ksh and POSIX shell print the target directory to stdout
 # if CDPATH is set.
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 relink_command=""
 # This environment variable determines our operation mode.
 if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
  # install mode needs the following variables:
  generated_by_libtool_version='2.4.2'
  notinst_deplibs=''
 else
  # When we are sourced in execute mode, $file and $ECHO are already set.
  if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
    file="$0"
 # A function that is used when there is no print builtin or printf.
 func_fallback_echo ()
 {
  eval 'cat <<_LTECHO_EOF
 $1
 _LTECHO_EOF'
 }
    ECHO="printf %s\\n"
  fi
 # Very basic option parsing. These options are (a) specific to
 # the libtool wrapper, (b) are identical between the wrapper
 # /script/ and the wrapper /executable/ which is used only on
 # windows platforms, and (c) all begin with the string --lt-
 # (application programs are unlikely to have options which match
 # this pattern).
 #
 # There are only two supported options: --lt-debug and
 # --lt-dump-script. There is, deliberately, no --lt-help.
 #
 # The first argument to this parsing function should be the
 # script's ../libtool value, followed by yes.
 lt_option_debug=
 func_parse_lt_options ()
 {
  lt_script_arg0=$0
  shift
  for lt_opt
  do
    case "$lt_opt" in
    --lt-debug) lt_option_debug=1 ;;
    --lt-dump-script)
        lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
        test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
        lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
        cat "$lt_dump_D/$lt_dump_F"
        exit 0
      ;;
    --lt-*)
        $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
        exit 1
      ;;
    esac
  done
  # Print the debug banner immediately:
  if test -n "$lt_option_debug"; then
    echo "dtdquery.exe:dtdquery:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
  fi
 }
 # Used when --lt-debug. Prints its arguments to stdout
 # (redirection is the responsibility of the caller)
 func_lt_dump_args ()
 {
  lt_dump_args_N=1;
  for lt_arg
  do
    $ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
    lt_dump_args_N=`expr $lt_dump_args_N + 1`
  done
 }
 # Core function for launching the target application
 func_exec_program_core ()
 {
      if test -n "$lt_option_debug"; then
        $ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[0]: $progdir/$program" 1>&2
        func_lt_dump_args ${1+"$@"} 1>&2
      fi
      exec "$progdir/$program" ${1+"$@"}
      $ECHO "$0: cannot exec $program $*" 1>&2
      exit 1
 }
 # A function to encapsulate launching the target application
 # Strips options in the --lt-* namespace from $@ and
 # launches target application with the remaining arguments.
 func_exec_program ()
 {
  case " $* " in
  *\ --lt-*)
    for lt_wr_arg
    do
      case $lt_wr_arg in
      --lt-*) ;;
      *) set x "$@" "$lt_wr_arg"; shift;;
      esac
      shift
    done ;;
  esac
  func_exec_program_core ${1+"$@"}
 }
  # Parse options
  func_parse_lt_options "$0" ${1+"$@"}
  # Find the directory that this script lives in.
  thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
  test "x$thisdir" = "x$file" && thisdir=.
  # Follow symbolic links until we get to the real thisdir.
  file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
  while test -n "$file"; do
    destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
    # If there was a directory component, then change thisdir.
    if test "x$destdir" != "x$file"; then
      case "$destdir" in
      [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
      *) thisdir="$thisdir/$destdir" ;;
      esac
    fi
    file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
    file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
  done
  # Usually 'no', except on cygwin/mingw when embedded into
  # the cwrapper.
  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
  if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
    # special case for '.'
    if test "$thisdir" = "."; then
      thisdir=`pwd`
    fi
    # remove .libs from thisdir
    case "$thisdir" in
    *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
    .libs )   thisdir=. ;;
    esac
  fi
  # Try to get the absolute directory name.
  absdir=`cd "$thisdir" && pwd`
  test -n "$absdir" && thisdir="$absdir"
  program='dtdquery.exe'
  progdir="$thisdir/.libs"
  if test -f "$progdir/$program"; then
    if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
      # Run the actual program with our arguments.
      func_exec_program ${1+"$@"}
    fi
  else
    # The program doesn't exist.
    $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
    $ECHO "This script is just a wrapper for $program." 1>&2
    $ECHO "See the libtool documentation for more information." 1>&2
    exit 1
  fi
 fi
--- a/html2xhtml-windows/.libs/html2xhtml.exe
+++ b/html2xhtml-windows/.libs/html2xhtml.exe
--- a/html2xhtml-windows/.libs/html2xhtml_ltshwrapper
+++ b/html2xhtml-windows/.libs/html2xhtml_ltshwrapper
@ -0,0 +1,201 @@
 #! /bin/sh
 # html2xhtml - temporary wrapper script for .libs/html2xhtml.exe
 # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
 #
 # The html2xhtml program cannot be directly executed until all the libtool
 # libraries that it depends on are installed.
 #
 # This wrapper script should never be moved out of the build directory.
 # If it is, it will not operate correctly.
 # Sed substitution that helps us do robust quoting.  It backslashifies
 # metacharacters that are still active within double-quoted strings.
 sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
 # Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
  emulate sh
  NULLCMD=:
  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
  # is contrary to our usage.  Disable this feature.
  alias -g '${1+"$@"}'='"$@"'
  setopt NO_GLOB_SUBST
 else
  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
 BIN_SH=xpg4; export BIN_SH # for Tru64
 DUALCASE=1; export DUALCASE # for MKS sh
 # The HP-UX ksh and POSIX shell print the target directory to stdout
 # if CDPATH is set.
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 relink_command=""
 # This environment variable determines our operation mode.
 if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
  # install mode needs the following variables:
  generated_by_libtool_version='2.4.2'
  notinst_deplibs=''
 else
  # When we are sourced in execute mode, $file and $ECHO are already set.
  if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
    file="$0"
 # A function that is used when there is no print builtin or printf.
 func_fallback_echo ()
 {
  eval 'cat <<_LTECHO_EOF
 $1
 _LTECHO_EOF'
 }
    ECHO="printf %s\\n"
  fi
 # Very basic option parsing. These options are (a) specific to
 # the libtool wrapper, (b) are identical between the wrapper
 # /script/ and the wrapper /executable/ which is used only on
 # windows platforms, and (c) all begin with the string --lt-
 # (application programs are unlikely to have options which match
 # this pattern).
 #
 # There are only two supported options: --lt-debug and
 # --lt-dump-script. There is, deliberately, no --lt-help.
 #
 # The first argument to this parsing function should be the
 # script's ../libtool value, followed by yes.
 lt_option_debug=
 func_parse_lt_options ()
 {
  lt_script_arg0=$0
  shift
  for lt_opt
  do
    case "$lt_opt" in
    --lt-debug) lt_option_debug=1 ;;
    --lt-dump-script)
        lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
        test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
        lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
        cat "$lt_dump_D/$lt_dump_F"
        exit 0
      ;;
    --lt-*)
        $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
        exit 1
      ;;
    esac
  done
  # Print the debug banner immediately:
  if test -n "$lt_option_debug"; then
    echo "html2xhtml.exe:html2xhtml:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
  fi
 }
 # Used when --lt-debug. Prints its arguments to stdout
 # (redirection is the responsibility of the caller)
 func_lt_dump_args ()
 {
  lt_dump_args_N=1;
  for lt_arg
  do
    $ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
    lt_dump_args_N=`expr $lt_dump_args_N + 1`
  done
 }
 # Core function for launching the target application
 func_exec_program_core ()
 {
      if test -n "$lt_option_debug"; then
        $ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[0]: $progdir/$program" 1>&2
        func_lt_dump_args ${1+"$@"} 1>&2
      fi
      exec "$progdir/$program" ${1+"$@"}
      $ECHO "$0: cannot exec $program $*" 1>&2
      exit 1
 }
 # A function to encapsulate launching the target application
 # Strips options in the --lt-* namespace from $@ and
 # launches target application with the remaining arguments.
 func_exec_program ()
 {
  case " $* " in
  *\ --lt-*)
    for lt_wr_arg
    do
      case $lt_wr_arg in
      --lt-*) ;;
      *) set x "$@" "$lt_wr_arg"; shift;;
      esac
      shift
    done ;;
  esac
  func_exec_program_core ${1+"$@"}
 }
  # Parse options
  func_parse_lt_options "$0" ${1+"$@"}
  # Find the directory that this script lives in.
  thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
  test "x$thisdir" = "x$file" && thisdir=.
  # Follow symbolic links until we get to the real thisdir.
  file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
  while test -n "$file"; do
    destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
    # If there was a directory component, then change thisdir.
    if test "x$destdir" != "x$file"; then
      case "$destdir" in
      [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
      *) thisdir="$thisdir/$destdir" ;;
      esac
    fi
    file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
    file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
  done
  # Usually 'no', except on cygwin/mingw when embedded into
  # the cwrapper.
  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
  if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
    # special case for '.'
    if test "$thisdir" = "."; then
      thisdir=`pwd`
    fi
    # remove .libs from thisdir
    case "$thisdir" in
    *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
    .libs )   thisdir=. ;;
    esac
  fi
  # Try to get the absolute directory name.
  absdir=`cd "$thisdir" && pwd`
  test -n "$absdir" && thisdir="$absdir"
  program='html2xhtml.exe'
  progdir="$thisdir/.libs"
  if test -f "$progdir/$program"; then
    if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
      # Run the actual program with our arguments.
      func_exec_program ${1+"$@"}
    fi
  else
    # The program doesn't exist.
    $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
    $ECHO "This script is just a wrapper for $program." 1>&2
    $ECHO "See the libtool documentation for more information." 1>&2
    exit 1
  fi
 fi
--- a/html2xhtml-windows/.libs/lt-dtdquery.c
+++ b/html2xhtml-windows/.libs/lt-dtdquery.c
--- a/html2xhtml-windows/.libs/lt-html2xhtml.c
+++ b/html2xhtml-windows/.libs/lt-html2xhtml.c
--- a/html2xhtml-windows/dtdquery.exe
+++ b/html2xhtml-windows/dtdquery.exe
--- a/html2xhtml-windows/html2xhtml.exe
+++ b/html2xhtml-windows/html2xhtml.exe
--- a/src/book.rs
+++ b/src/book.rs
@ -0,0 +1,95 @@
 use scraper::Html;
 use url::Url;
 use crate::{html, http};
 /// A struct representing a book & all the needed data to generate one.
 pub struct Book {
    /// The RoyalRoad Url for the book.
    book_url: Url,
    /// The book's title.
    title: String,
    /// The book's author.
    author: String,
    /// A Url to the book's cover image.
    cover_image_url: Url,
    /// The raw html data of the RoyalRoad index page.
    index_html: Html,
    /// A vector of the book's chapters.
    chapters: Vec<Chapter>,
 }
 impl Book {
    /// Generate a new book instance with all the needed data from a given url.
    pub fn new(book_url: Url) -> Book {
        let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
        let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
        let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
        for i in 0..chapter_names_and_urls.len() {
            let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
            chapters.push(chapter);
        }
        Book { 
            book_url: book_url, 
            title: html::get_title_from_index(&index_html),
            author: html::get_author_from_index(&index_html),
            cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
            index_html: index_html,
            chapters: chapters,
        }
    }
    /// Count how many paragraphs are in the book.
    pub fn count_paragraphs(&self) -> u128 {
        // TODO!
        0
    }
 }
 /// A struct representing a chapter.
 struct Chapter {
    /// The Url of the chapter.
    chapter_url: Url,
    /// The name of the chapter.
    chapter_name: String,
    /// The raw html data of the page.
    raw_chapter_html: Html,
    /// The isolated chapter html.
    isolated_chapter_html: Html,
 }
 impl Chapter {
    fn new(chapter_name: &str, chapter_url: &str) -> Self {
        let chapter_url = http::string_to_url(&chapter_url);
        let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
        Chapter { 
            chapter_url: chapter_url, 
            chapter_name: chapter_name.to_string(),
            raw_chapter_html: raw_chapter_html.clone(),
            isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)
        }
    }
 }
 // TODO!
 struct BookImages {
 }
 // TODO!
 struct BookCss {
 }
--- a/src/html.rs
+++ b/src/html.rs
@ -0,0 +1,141 @@
 use std::process::exit;
 use regex::Regex;
 use scraper::{Html, Selector};
 /// Convert a string to an html document.
 pub fn string_to_html_document(document_string: &str) -> Html {
    Html::parse_document(document_string)
 }
 /// Convert a string to an html fragment.
 pub fn string_to_html_fragment(fragment_string: &str) -> Html {
    Html::parse_fragment(fragment_string)
 }
 /// Get the book's title from the index.
 pub fn get_title_from_index(index_html: &Html) -> String {
    let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
        for element in index_html.select(&selector) {
            // Loop through all meta tags in the html document.
            match element.value().attr("name") {
                // Check if the meta tag contains attribute: "name"
                None => continue,
                Some(x) => {
                    if x == "twitter:title" {
                        // If it does contain attribute "name", check if the content of that attribute is "twitter:title"
                        return element.value().attr("content").unwrap().to_owned();
                        // If it is, extract the data from the content attribute.
                    }
                }
            }
        }
    eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
    exit(1);
 }
 /// Get the book's author from index
 pub fn get_author_from_index(index_html: &Html) -> String {
    let selector = Selector::parse("meta").unwrap();
    for element in index_html.select(&selector) {
        match element.value().attr("property") {
            None => continue,
            Some(x) => {
                if x == "books:author" {
                    return element.value().attr("content").unwrap().to_owned();
                }
            }
        }
    }
    eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
    exit(1);
 }
 /// Get the book's cover image url from the index
 pub fn get_cover_image_url_from_index(index_html: &Html) -> String {
    let selector = Selector::parse("meta").unwrap();
    for element in index_html.select(&selector) {
        match element.value().attr("property") {
            None => continue,
            Some(x) => {
                if x == "og:image" {
                    return element.value().attr("content").unwrap().to_owned();
                }
            }
        }
    }
    eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
    exit(1);
 }
 /// Gets the chapter names and urls from the index.
 /// 
 /// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
 pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> {
    // I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
    // I've commented in the gist of it, but we have no memory actually writing this function.
    let mut chapters: Vec<[String; 2]> = Vec::new();
    let mut raw_json_data = String::new();
    // Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
    let selector = Selector::parse("script").unwrap();
    for element in index_html.select(&selector) {
        if element.inner_html().contains("window.chapters") {
            raw_json_data = element.inner_html();
            break;
        }
    }
    // Exit it if unable to find the needed json data. That probably means royal road has changed their code.
    if raw_json_data.is_empty() {
        eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
        exit(1);
    }
    // I have absolutely no idea what this regex does; but it's probably important.
    const REGEX: &str = r#"window.chapters = (\[.*?]);"#;
    let regex = Regex::new(REGEX).unwrap();
    // I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.
    let chapter_raw_json = regex
        .captures(&raw_json_data)
        .unwrap()
        .get(1)
        .map_or("[]", |m| m.as_str());
    // and it just spits out json when done. Neat.
    let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();
    // For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.
    for chapter in chapter_json.as_array().unwrap() {
        let chapter_name = chapter["title"].to_string().replace('"', "");
        let url = format!(
            "https://www.royalroad.com{}",
            chapter["url"].to_string().replace('"', "")
        );
        chapters.push([chapter_name, url]);
    }
    // Return that wanker.
    return chapters;
 }
 /// Isolate chapter content from the rest of the shit on the page.
 pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html {
    let page_html = Html::parse_document(&raw_chapter_html.html());
    let selector = Selector::parse("div").unwrap();
    for element in page_html.select(&selector) {
        match element.value().attr("class") {
            None => continue,
            Some(x) => {
                if x == "chapter-inner chapter-content" {
                    return string_to_html_fragment(&element.inner_html());
                }
            }
        }
    }
    eprintln!("Error! Unable to isolate chapter content");
    exit(1);
 }
--- a/src/http.rs
+++ b/src/http.rs
@ -0,0 +1,63 @@
 use std::process::exit;
 use reqwest::{blocking::Response, header::HeaderMap};
 use url::Url;
 // A struct representing an HttpResponse and the Url it originated from.
 pub struct HttpResponse {
    url: Url,
    pub response: Response,
 }
 impl HttpResponse {
    /// Get the response headers.
    pub fn get_headers(&self) -> &HeaderMap {
        self.response.headers()
    }
    /// Attempt to convert the response to text. Exits the program if it fails.
    pub fn get_text(self) -> String {
        match self.response.text() {
            Ok(response_text) => response_text,
            Err(error) => {
                eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url);
                exit(1);
            }
        }
    }
    /// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
    pub fn get_bytes(self) -> bytes::Bytes{
        match self.response.bytes() {
            Ok(response_bytes) => response_bytes,
            Err(error) => {
                eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url);
                exit(1);
            }
        }
    }
 }
 /// Get an http response for a given url. Exits the program if it fails.
 pub fn get_response(url: Url) -> HttpResponse {
    let response_result = reqwest::blocking::get(url.clone());
    match response_result {
        Ok(response) => HttpResponse { url, response },
        Err(error) => {
            eprintln!("Error! Unable to get a response from: {url}\n{error}");
            exit(1);
        },
    }
 }
 /// A function to convert a string to a url. Exits the program if it fails.
 pub fn string_to_url(url: &str) -> Url {
    match Url::parse(url) {
        Ok(url) => url,
        Err(error) => {
            eprintln!("Error! Unable to parse: {url} into a valid url.");
            exit(1);
        }
    }
 }
--- a/src/library.rs
+++ b/src/library.rs
@ -3,6 +3,10 @@ use std::path::PathBuf;
 use clap::Args;
 use url::Url;
 mod book;
 mod html;
 mod http;
 /// struct that corresponds to arguments for Audiobook generation.
 #[derive(Args, Debug)]
 pub struct AudiobookArgs {
@ -71,5 +75,5 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
 /// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong. 
 /// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
 pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
-    eprintln!("This is not implemented yet.");
+    let book = book::Book::new(book_url);
 }