mirror of
https://github.com/Raine-gay/royal_road_archiver.git
synced 2024-11-27 01:18:41 -06:00
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work
This commit is contained in:
parent
778b1adf6a
commit
80f3d5b423
64
.vscode/launch.json
vendored
Normal file
64
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug unit tests in library 'royal_road_archiver_lib'",
|
||||||
|
"cargo": {
|
||||||
|
"args": [
|
||||||
|
"test",
|
||||||
|
"--no-run",
|
||||||
|
"--lib",
|
||||||
|
"--package=royal_road_archiver"
|
||||||
|
],
|
||||||
|
"filter": {
|
||||||
|
"name": "royal_road_archiver_lib",
|
||||||
|
"kind": "lib"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug executable 'royal_road_archiver_bin'",
|
||||||
|
"cargo": {
|
||||||
|
"args": [
|
||||||
|
"build",
|
||||||
|
"--bin=royal_road_archiver_bin",
|
||||||
|
"--package=royal_road_archiver"
|
||||||
|
],
|
||||||
|
"filter": {
|
||||||
|
"name": "royal_road_archiver_bin",
|
||||||
|
"kind": "bin"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"args": ["https://www.royalroad.com/fiction/22848/post-human", "markdown"],
|
||||||
|
"cwd": "${workspaceFolder}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug unit tests in executable 'royal_road_archiver_bin'",
|
||||||
|
"cargo": {
|
||||||
|
"args": [
|
||||||
|
"test",
|
||||||
|
"--no-run",
|
||||||
|
"--bin=royal_road_archiver_bin",
|
||||||
|
"--package=royal_road_archiver"
|
||||||
|
],
|
||||||
|
"filter": {
|
||||||
|
"name": "royal_road_archiver_bin",
|
||||||
|
"kind": "bin"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
|
@ -7,6 +7,9 @@
|
||||||
],
|
],
|
||||||
"rust-analyzer.showUnlinkedFileNotification": false,
|
"rust-analyzer.showUnlinkedFileNotification": false,
|
||||||
"cSpell.ignoreWords": [
|
"cSpell.ignoreWords": [
|
||||||
"royalroad"
|
"autotools",
|
||||||
|
"reqwest",
|
||||||
|
"royalroad",
|
||||||
|
"ureq"
|
||||||
]
|
]
|
||||||
}
|
}
|
1527
Cargo.lock
generated
1527
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -14,5 +14,10 @@ name = "royal_road_archiver_bin"
|
||||||
path = "src/binary.rs"
|
path = "src/binary.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
bytes = "1.5.0"
|
||||||
clap = { version = "4.4.18", features = ["derive"] }
|
clap = { version = "4.4.18", features = ["derive"] }
|
||||||
url = "2.5.0"
|
regex = "1.10.3"
|
||||||
|
reqwest = { version = "0.11.23", features = ["rustls", "blocking"] }
|
||||||
|
scraper = "0.18.1"
|
||||||
|
serde_json = "1.0.111"
|
||||||
|
url = "2.5.0"
|
BIN
html2xhtml-linux/dtdquery
Executable file
BIN
html2xhtml-linux/dtdquery
Executable file
Binary file not shown.
BIN
html2xhtml-linux/html2xhtml
Executable file
BIN
html2xhtml-linux/html2xhtml
Executable file
Binary file not shown.
BIN
html2xhtml-windows/.libs/dtdquery.exe
Normal file
BIN
html2xhtml-windows/.libs/dtdquery.exe
Normal file
Binary file not shown.
201
html2xhtml-windows/.libs/dtdquery_ltshwrapper
Normal file
201
html2xhtml-windows/.libs/dtdquery_ltshwrapper
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
# dtdquery - temporary wrapper script for .libs/dtdquery.exe
|
||||||
|
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
|
||||||
|
#
|
||||||
|
# The dtdquery program cannot be directly executed until all the libtool
|
||||||
|
# libraries that it depends on are installed.
|
||||||
|
#
|
||||||
|
# This wrapper script should never be moved out of the build directory.
|
||||||
|
# If it is, it will not operate correctly.
|
||||||
|
|
||||||
|
# Sed substitution that helps us do robust quoting. It backslashifies
|
||||||
|
# metacharacters that are still active within double-quoted strings.
|
||||||
|
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
|
||||||
|
|
||||||
|
# Be Bourne compatible
|
||||||
|
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||||
|
emulate sh
|
||||||
|
NULLCMD=:
|
||||||
|
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
|
||||||
|
# is contrary to our usage. Disable this feature.
|
||||||
|
alias -g '${1+"$@"}'='"$@"'
|
||||||
|
setopt NO_GLOB_SUBST
|
||||||
|
else
|
||||||
|
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
|
||||||
|
fi
|
||||||
|
BIN_SH=xpg4; export BIN_SH # for Tru64
|
||||||
|
DUALCASE=1; export DUALCASE # for MKS sh
|
||||||
|
|
||||||
|
# The HP-UX ksh and POSIX shell print the target directory to stdout
|
||||||
|
# if CDPATH is set.
|
||||||
|
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
|
||||||
|
|
||||||
|
relink_command=""
|
||||||
|
|
||||||
|
# This environment variable determines our operation mode.
|
||||||
|
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
|
||||||
|
# install mode needs the following variables:
|
||||||
|
generated_by_libtool_version='2.4.2'
|
||||||
|
notinst_deplibs=''
|
||||||
|
else
|
||||||
|
# When we are sourced in execute mode, $file and $ECHO are already set.
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
file="$0"
|
||||||
|
|
||||||
|
# A function that is used when there is no print builtin or printf.
|
||||||
|
func_fallback_echo ()
|
||||||
|
{
|
||||||
|
eval 'cat <<_LTECHO_EOF
|
||||||
|
$1
|
||||||
|
_LTECHO_EOF'
|
||||||
|
}
|
||||||
|
ECHO="printf %s\\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Very basic option parsing. These options are (a) specific to
|
||||||
|
# the libtool wrapper, (b) are identical between the wrapper
|
||||||
|
# /script/ and the wrapper /executable/ which is used only on
|
||||||
|
# windows platforms, and (c) all begin with the string --lt-
|
||||||
|
# (application programs are unlikely to have options which match
|
||||||
|
# this pattern).
|
||||||
|
#
|
||||||
|
# There are only two supported options: --lt-debug and
|
||||||
|
# --lt-dump-script. There is, deliberately, no --lt-help.
|
||||||
|
#
|
||||||
|
# The first argument to this parsing function should be the
|
||||||
|
# script's ../libtool value, followed by yes.
|
||||||
|
lt_option_debug=
|
||||||
|
func_parse_lt_options ()
|
||||||
|
{
|
||||||
|
lt_script_arg0=$0
|
||||||
|
shift
|
||||||
|
for lt_opt
|
||||||
|
do
|
||||||
|
case "$lt_opt" in
|
||||||
|
--lt-debug) lt_option_debug=1 ;;
|
||||||
|
--lt-dump-script)
|
||||||
|
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
|
||||||
|
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
|
||||||
|
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
|
||||||
|
cat "$lt_dump_D/$lt_dump_F"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--lt-*)
|
||||||
|
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print the debug banner immediately:
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
echo "dtdquery.exe:dtdquery:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Used when --lt-debug. Prints its arguments to stdout
|
||||||
|
# (redirection is the responsibility of the caller)
|
||||||
|
func_lt_dump_args ()
|
||||||
|
{
|
||||||
|
lt_dump_args_N=1;
|
||||||
|
for lt_arg
|
||||||
|
do
|
||||||
|
$ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
|
||||||
|
lt_dump_args_N=`expr $lt_dump_args_N + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core function for launching the target application
|
||||||
|
func_exec_program_core ()
|
||||||
|
{
|
||||||
|
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
$ECHO "dtdquery.exe:dtdquery:${LINENO}: newargv[0]: $progdir/$program" 1>&2
|
||||||
|
func_lt_dump_args ${1+"$@"} 1>&2
|
||||||
|
fi
|
||||||
|
exec "$progdir/$program" ${1+"$@"}
|
||||||
|
|
||||||
|
$ECHO "$0: cannot exec $program $*" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# A function to encapsulate launching the target application
|
||||||
|
# Strips options in the --lt-* namespace from $@ and
|
||||||
|
# launches target application with the remaining arguments.
|
||||||
|
func_exec_program ()
|
||||||
|
{
|
||||||
|
case " $* " in
|
||||||
|
*\ --lt-*)
|
||||||
|
for lt_wr_arg
|
||||||
|
do
|
||||||
|
case $lt_wr_arg in
|
||||||
|
--lt-*) ;;
|
||||||
|
*) set x "$@" "$lt_wr_arg"; shift;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done ;;
|
||||||
|
esac
|
||||||
|
func_exec_program_core ${1+"$@"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
func_parse_lt_options "$0" ${1+"$@"}
|
||||||
|
|
||||||
|
# Find the directory that this script lives in.
|
||||||
|
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
test "x$thisdir" = "x$file" && thisdir=.
|
||||||
|
|
||||||
|
# Follow symbolic links until we get to the real thisdir.
|
||||||
|
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
while test -n "$file"; do
|
||||||
|
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
|
||||||
|
# If there was a directory component, then change thisdir.
|
||||||
|
if test "x$destdir" != "x$file"; then
|
||||||
|
case "$destdir" in
|
||||||
|
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
|
||||||
|
*) thisdir="$thisdir/$destdir" ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
|
||||||
|
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
done
|
||||||
|
|
||||||
|
# Usually 'no', except on cygwin/mingw when embedded into
|
||||||
|
# the cwrapper.
|
||||||
|
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
|
||||||
|
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
|
||||||
|
# special case for '.'
|
||||||
|
if test "$thisdir" = "."; then
|
||||||
|
thisdir=`pwd`
|
||||||
|
fi
|
||||||
|
# remove .libs from thisdir
|
||||||
|
case "$thisdir" in
|
||||||
|
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
|
||||||
|
.libs ) thisdir=. ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try to get the absolute directory name.
|
||||||
|
absdir=`cd "$thisdir" && pwd`
|
||||||
|
test -n "$absdir" && thisdir="$absdir"
|
||||||
|
|
||||||
|
program='dtdquery.exe'
|
||||||
|
progdir="$thisdir/.libs"
|
||||||
|
|
||||||
|
|
||||||
|
if test -f "$progdir/$program"; then
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
# Run the actual program with our arguments.
|
||||||
|
func_exec_program ${1+"$@"}
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# The program doesn't exist.
|
||||||
|
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
|
||||||
|
$ECHO "This script is just a wrapper for $program." 1>&2
|
||||||
|
$ECHO "See the libtool documentation for more information." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
BIN
html2xhtml-windows/.libs/html2xhtml.exe
Normal file
BIN
html2xhtml-windows/.libs/html2xhtml.exe
Normal file
Binary file not shown.
201
html2xhtml-windows/.libs/html2xhtml_ltshwrapper
Normal file
201
html2xhtml-windows/.libs/html2xhtml_ltshwrapper
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
# html2xhtml - temporary wrapper script for .libs/html2xhtml.exe
|
||||||
|
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.11
|
||||||
|
#
|
||||||
|
# The html2xhtml program cannot be directly executed until all the libtool
|
||||||
|
# libraries that it depends on are installed.
|
||||||
|
#
|
||||||
|
# This wrapper script should never be moved out of the build directory.
|
||||||
|
# If it is, it will not operate correctly.
|
||||||
|
|
||||||
|
# Sed substitution that helps us do robust quoting. It backslashifies
|
||||||
|
# metacharacters that are still active within double-quoted strings.
|
||||||
|
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
|
||||||
|
|
||||||
|
# Be Bourne compatible
|
||||||
|
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||||
|
emulate sh
|
||||||
|
NULLCMD=:
|
||||||
|
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
|
||||||
|
# is contrary to our usage. Disable this feature.
|
||||||
|
alias -g '${1+"$@"}'='"$@"'
|
||||||
|
setopt NO_GLOB_SUBST
|
||||||
|
else
|
||||||
|
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
|
||||||
|
fi
|
||||||
|
BIN_SH=xpg4; export BIN_SH # for Tru64
|
||||||
|
DUALCASE=1; export DUALCASE # for MKS sh
|
||||||
|
|
||||||
|
# The HP-UX ksh and POSIX shell print the target directory to stdout
|
||||||
|
# if CDPATH is set.
|
||||||
|
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
|
||||||
|
|
||||||
|
relink_command=""
|
||||||
|
|
||||||
|
# This environment variable determines our operation mode.
|
||||||
|
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
|
||||||
|
# install mode needs the following variables:
|
||||||
|
generated_by_libtool_version='2.4.2'
|
||||||
|
notinst_deplibs=''
|
||||||
|
else
|
||||||
|
# When we are sourced in execute mode, $file and $ECHO are already set.
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
file="$0"
|
||||||
|
|
||||||
|
# A function that is used when there is no print builtin or printf.
|
||||||
|
func_fallback_echo ()
|
||||||
|
{
|
||||||
|
eval 'cat <<_LTECHO_EOF
|
||||||
|
$1
|
||||||
|
_LTECHO_EOF'
|
||||||
|
}
|
||||||
|
ECHO="printf %s\\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Very basic option parsing. These options are (a) specific to
|
||||||
|
# the libtool wrapper, (b) are identical between the wrapper
|
||||||
|
# /script/ and the wrapper /executable/ which is used only on
|
||||||
|
# windows platforms, and (c) all begin with the string --lt-
|
||||||
|
# (application programs are unlikely to have options which match
|
||||||
|
# this pattern).
|
||||||
|
#
|
||||||
|
# There are only two supported options: --lt-debug and
|
||||||
|
# --lt-dump-script. There is, deliberately, no --lt-help.
|
||||||
|
#
|
||||||
|
# The first argument to this parsing function should be the
|
||||||
|
# script's ../libtool value, followed by yes.
|
||||||
|
lt_option_debug=
|
||||||
|
func_parse_lt_options ()
|
||||||
|
{
|
||||||
|
lt_script_arg0=$0
|
||||||
|
shift
|
||||||
|
for lt_opt
|
||||||
|
do
|
||||||
|
case "$lt_opt" in
|
||||||
|
--lt-debug) lt_option_debug=1 ;;
|
||||||
|
--lt-dump-script)
|
||||||
|
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
|
||||||
|
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
|
||||||
|
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
|
||||||
|
cat "$lt_dump_D/$lt_dump_F"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--lt-*)
|
||||||
|
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print the debug banner immediately:
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
echo "html2xhtml.exe:html2xhtml:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.11" 1>&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Used when --lt-debug. Prints its arguments to stdout
|
||||||
|
# (redirection is the responsibility of the caller)
|
||||||
|
func_lt_dump_args ()
|
||||||
|
{
|
||||||
|
lt_dump_args_N=1;
|
||||||
|
for lt_arg
|
||||||
|
do
|
||||||
|
$ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
|
||||||
|
lt_dump_args_N=`expr $lt_dump_args_N + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core function for launching the target application
|
||||||
|
func_exec_program_core ()
|
||||||
|
{
|
||||||
|
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
$ECHO "html2xhtml.exe:html2xhtml:${LINENO}: newargv[0]: $progdir/$program" 1>&2
|
||||||
|
func_lt_dump_args ${1+"$@"} 1>&2
|
||||||
|
fi
|
||||||
|
exec "$progdir/$program" ${1+"$@"}
|
||||||
|
|
||||||
|
$ECHO "$0: cannot exec $program $*" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# A function to encapsulate launching the target application
|
||||||
|
# Strips options in the --lt-* namespace from $@ and
|
||||||
|
# launches target application with the remaining arguments.
|
||||||
|
func_exec_program ()
|
||||||
|
{
|
||||||
|
case " $* " in
|
||||||
|
*\ --lt-*)
|
||||||
|
for lt_wr_arg
|
||||||
|
do
|
||||||
|
case $lt_wr_arg in
|
||||||
|
--lt-*) ;;
|
||||||
|
*) set x "$@" "$lt_wr_arg"; shift;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done ;;
|
||||||
|
esac
|
||||||
|
func_exec_program_core ${1+"$@"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
func_parse_lt_options "$0" ${1+"$@"}
|
||||||
|
|
||||||
|
# Find the directory that this script lives in.
|
||||||
|
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
test "x$thisdir" = "x$file" && thisdir=.
|
||||||
|
|
||||||
|
# Follow symbolic links until we get to the real thisdir.
|
||||||
|
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
while test -n "$file"; do
|
||||||
|
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
|
||||||
|
# If there was a directory component, then change thisdir.
|
||||||
|
if test "x$destdir" != "x$file"; then
|
||||||
|
case "$destdir" in
|
||||||
|
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
|
||||||
|
*) thisdir="$thisdir/$destdir" ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
|
||||||
|
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
done
|
||||||
|
|
||||||
|
# Usually 'no', except on cygwin/mingw when embedded into
|
||||||
|
# the cwrapper.
|
||||||
|
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=yes
|
||||||
|
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
|
||||||
|
# special case for '.'
|
||||||
|
if test "$thisdir" = "."; then
|
||||||
|
thisdir=`pwd`
|
||||||
|
fi
|
||||||
|
# remove .libs from thisdir
|
||||||
|
case "$thisdir" in
|
||||||
|
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
|
||||||
|
.libs ) thisdir=. ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try to get the absolute directory name.
|
||||||
|
absdir=`cd "$thisdir" && pwd`
|
||||||
|
test -n "$absdir" && thisdir="$absdir"
|
||||||
|
|
||||||
|
program='html2xhtml.exe'
|
||||||
|
progdir="$thisdir/.libs"
|
||||||
|
|
||||||
|
|
||||||
|
if test -f "$progdir/$program"; then
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
# Run the actual program with our arguments.
|
||||||
|
func_exec_program ${1+"$@"}
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# The program doesn't exist.
|
||||||
|
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
|
||||||
|
$ECHO "This script is just a wrapper for $program." 1>&2
|
||||||
|
$ECHO "See the libtool documentation for more information." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
1037
html2xhtml-windows/.libs/lt-dtdquery.c
Normal file
1037
html2xhtml-windows/.libs/lt-dtdquery.c
Normal file
File diff suppressed because it is too large
Load diff
1038
html2xhtml-windows/.libs/lt-html2xhtml.c
Normal file
1038
html2xhtml-windows/.libs/lt-html2xhtml.c
Normal file
File diff suppressed because it is too large
Load diff
BIN
html2xhtml-windows/dtdquery.exe
Normal file
BIN
html2xhtml-windows/dtdquery.exe
Normal file
Binary file not shown.
BIN
html2xhtml-windows/html2xhtml.exe
Normal file
BIN
html2xhtml-windows/html2xhtml.exe
Normal file
Binary file not shown.
95
src/book.rs
Normal file
95
src/book.rs
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
use scraper::Html;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::{html, http};
|
||||||
|
|
||||||
|
/// A struct representing a book & all the needed data to generate one.
|
||||||
|
pub struct Book {
|
||||||
|
/// The RoyalRoad Url for the book.
|
||||||
|
book_url: Url,
|
||||||
|
|
||||||
|
/// The book's title.
|
||||||
|
title: String,
|
||||||
|
|
||||||
|
/// The book's author.
|
||||||
|
author: String,
|
||||||
|
|
||||||
|
/// A Url to the book's cover image.
|
||||||
|
cover_image_url: Url,
|
||||||
|
|
||||||
|
/// The raw html data of the RoyalRoad index page.
|
||||||
|
index_html: Html,
|
||||||
|
|
||||||
|
/// A vector of the book's chapters.
|
||||||
|
chapters: Vec<Chapter>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Book {
|
||||||
|
/// Generate a new book instance with all the needed data from a given url.
|
||||||
|
pub fn new(book_url: Url) -> Book {
|
||||||
|
let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
|
||||||
|
|
||||||
|
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
|
||||||
|
|
||||||
|
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
|
||||||
|
|
||||||
|
for i in 0..chapter_names_and_urls.len() {
|
||||||
|
let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
|
||||||
|
chapters.push(chapter);
|
||||||
|
}
|
||||||
|
|
||||||
|
Book {
|
||||||
|
book_url: book_url,
|
||||||
|
title: html::get_title_from_index(&index_html),
|
||||||
|
author: html::get_author_from_index(&index_html),
|
||||||
|
cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
|
||||||
|
index_html: index_html,
|
||||||
|
chapters: chapters,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count how many paragraphs are in the book.
|
||||||
|
pub fn count_paragraphs(&self) -> u128 {
|
||||||
|
// TODO!
|
||||||
|
0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A struct representing a chapter.
|
||||||
|
struct Chapter {
|
||||||
|
/// The Url of the chapter.
|
||||||
|
chapter_url: Url,
|
||||||
|
|
||||||
|
/// The name of the chapter.
|
||||||
|
chapter_name: String,
|
||||||
|
|
||||||
|
/// The raw html data of the page.
|
||||||
|
raw_chapter_html: Html,
|
||||||
|
|
||||||
|
/// The isolated chapter html.
|
||||||
|
isolated_chapter_html: Html,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Chapter {
|
||||||
|
fn new(chapter_name: &str, chapter_url: &str) -> Self {
|
||||||
|
let chapter_url = http::string_to_url(&chapter_url);
|
||||||
|
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
|
||||||
|
|
||||||
|
Chapter {
|
||||||
|
chapter_url: chapter_url,
|
||||||
|
chapter_name: chapter_name.to_string(),
|
||||||
|
raw_chapter_html: raw_chapter_html.clone(),
|
||||||
|
isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO!
|
||||||
|
struct BookImages {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO!
|
||||||
|
struct BookCss {
|
||||||
|
|
||||||
|
}
|
141
src/html.rs
Normal file
141
src/html.rs
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
use std::process::exit;
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
/// Convert a string to an html document.
|
||||||
|
pub fn string_to_html_document(document_string: &str) -> Html {
|
||||||
|
Html::parse_document(document_string)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a string to an html fragment.
|
||||||
|
pub fn string_to_html_fragment(fragment_string: &str) -> Html {
|
||||||
|
Html::parse_fragment(fragment_string)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the book's title from the index.
|
||||||
|
pub fn get_title_from_index(index_html: &Html) -> String {
|
||||||
|
let selector = Selector::parse("meta").unwrap(); // Build a selector that finds the 'meta' html tag
|
||||||
|
for element in index_html.select(&selector) {
|
||||||
|
// Loop through all meta tags in the html document.
|
||||||
|
match element.value().attr("name") {
|
||||||
|
// Check if the meta tag contains attribute: "name"
|
||||||
|
None => continue,
|
||||||
|
Some(x) => {
|
||||||
|
if x == "twitter:title" {
|
||||||
|
// If it does contain attribute "name", check if the content of that attribute is "twitter:title"
|
||||||
|
return element.value().attr("content").unwrap().to_owned();
|
||||||
|
// If it is, extract the data from the content attribute.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("Error! Unable to find book title. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the book's author from index
|
||||||
|
pub fn get_author_from_index(index_html: &Html) -> String {
|
||||||
|
let selector = Selector::parse("meta").unwrap();
|
||||||
|
for element in index_html.select(&selector) {
|
||||||
|
match element.value().attr("property") {
|
||||||
|
None => continue,
|
||||||
|
Some(x) => {
|
||||||
|
if x == "books:author" {
|
||||||
|
return element.value().attr("content").unwrap().to_owned();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("Error! Unable to find book author. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the book's cover image url from the index
|
||||||
|
pub fn get_cover_image_url_from_index(index_html: &Html) -> String {
|
||||||
|
let selector = Selector::parse("meta").unwrap();
|
||||||
|
for element in index_html.select(&selector) {
|
||||||
|
match element.value().attr("property") {
|
||||||
|
None => continue,
|
||||||
|
Some(x) => {
|
||||||
|
if x == "og:image" {
|
||||||
|
return element.value().attr("content").unwrap().to_owned();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("Error! Unable to find cover image url. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the chapter names and urls from the index.
|
||||||
|
///
|
||||||
|
/// This gets stored in a vector where index 0 is the chapter name, and index 1 is the url.
|
||||||
|
pub fn get_chapter_names_and_urls_from_index(index_html: &Html) -> Vec<[String; 2]> {
|
||||||
|
// I wont lie. I have almost 0 idea what a bunch of this shit does since it's highly specific to RoyalRoad.
|
||||||
|
// I've commented in the gist of it, but we have no memory actually writing this function.
|
||||||
|
|
||||||
|
let mut chapters: Vec<[String; 2]> = Vec::new();
|
||||||
|
let mut raw_json_data = String::new();
|
||||||
|
|
||||||
|
// Find a script tag that has "window.chapters" inside the inner html. This is all in json format.
|
||||||
|
let selector = Selector::parse("script").unwrap();
|
||||||
|
for element in index_html.select(&selector) {
|
||||||
|
if element.inner_html().contains("window.chapters") {
|
||||||
|
raw_json_data = element.inner_html();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Exit it if unable to find the needed json data. That probably means royal road has changed their code.
|
||||||
|
if raw_json_data.is_empty() {
|
||||||
|
eprintln!("Error! Unable to find json chapter data. Royal road have probably changed their front-end code. Please report this to me on:\nhttps://github.com/Raine-gay/royal_road_archiver");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// I have absolutely no idea what this regex does; but it's probably important.
|
||||||
|
const REGEX: &str = r#"window.chapters = (\[.*?]);"#;
|
||||||
|
let regex = Regex::new(REGEX).unwrap();
|
||||||
|
|
||||||
|
// I still have no fucking clue what this magic part does; but it works so we ain't fucking touching it.
|
||||||
|
let chapter_raw_json = regex
|
||||||
|
.captures(&raw_json_data)
|
||||||
|
.unwrap()
|
||||||
|
.get(1)
|
||||||
|
.map_or("[]", |m| m.as_str());
|
||||||
|
|
||||||
|
// and it just spits out json when done. Neat.
|
||||||
|
let chapter_json: serde_json::Value = serde_json::from_str(chapter_raw_json).unwrap();
|
||||||
|
|
||||||
|
// For each chapter in the json, do some processing to remove the quotes then shove it onto the vector.
|
||||||
|
for chapter in chapter_json.as_array().unwrap() {
|
||||||
|
let chapter_name = chapter["title"].to_string().replace('"', "");
|
||||||
|
let url = format!(
|
||||||
|
"https://www.royalroad.com{}",
|
||||||
|
chapter["url"].to_string().replace('"', "")
|
||||||
|
);
|
||||||
|
|
||||||
|
chapters.push([chapter_name, url]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return that wanker.
|
||||||
|
return chapters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Isolate chapter content from the rest of the shit on the page.
|
||||||
|
pub fn isolate_chapter_content(raw_chapter_html: Html) -> Html {
|
||||||
|
let page_html = Html::parse_document(&raw_chapter_html.html());
|
||||||
|
|
||||||
|
let selector = Selector::parse("div").unwrap();
|
||||||
|
for element in page_html.select(&selector) {
|
||||||
|
match element.value().attr("class") {
|
||||||
|
None => continue,
|
||||||
|
Some(x) => {
|
||||||
|
if x == "chapter-inner chapter-content" {
|
||||||
|
return string_to_html_fragment(&element.inner_html());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("Error! Unable to isolate chapter content");
|
||||||
|
exit(1);
|
||||||
|
}
|
63
src/http.rs
Normal file
63
src/http.rs
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
use std::process::exit;
|
||||||
|
|
||||||
|
use reqwest::{blocking::Response, header::HeaderMap};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
// A struct representing an HttpResponse and the Url it originated from.
|
||||||
|
pub struct HttpResponse {
|
||||||
|
url: Url,
|
||||||
|
pub response: Response,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpResponse {
|
||||||
|
/// Get the response headers.
|
||||||
|
pub fn get_headers(&self) -> &HeaderMap {
|
||||||
|
self.response.headers()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempt to convert the response to text. Exits the program if it fails.
|
||||||
|
pub fn get_text(self) -> String {
|
||||||
|
match self.response.text() {
|
||||||
|
Ok(response_text) => response_text,
|
||||||
|
Err(error) => {
|
||||||
|
eprintln!("Error! Unable to convert response from {0} into text\n{error}", self.url);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempt to convert the response to bytes. Used for images. Exits the program if it fails.
|
||||||
|
pub fn get_bytes(self) -> bytes::Bytes{
|
||||||
|
match self.response.bytes() {
|
||||||
|
Ok(response_bytes) => response_bytes,
|
||||||
|
Err(error) => {
|
||||||
|
eprintln!("Error! Unable to convert response from {0} into bytes\n{error}", self.url);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get an http response for a given url. Exits the program if it fails.
|
||||||
|
pub fn get_response(url: Url) -> HttpResponse {
|
||||||
|
let response_result = reqwest::blocking::get(url.clone());
|
||||||
|
|
||||||
|
match response_result {
|
||||||
|
Ok(response) => HttpResponse { url, response },
|
||||||
|
Err(error) => {
|
||||||
|
eprintln!("Error! Unable to get a response from: {url}\n{error}");
|
||||||
|
exit(1);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A function to convert a string to a url. Exits the program if it fails.
|
||||||
|
pub fn string_to_url(url: &str) -> Url {
|
||||||
|
match Url::parse(url) {
|
||||||
|
Ok(url) => url,
|
||||||
|
Err(error) => {
|
||||||
|
eprintln!("Error! Unable to parse: {url} into a valid url.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,6 +3,10 @@ use std::path::PathBuf;
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
mod book;
|
||||||
|
mod html;
|
||||||
|
mod http;
|
||||||
|
|
||||||
/// struct that corresponds to arguments for Audiobook generation.
|
/// struct that corresponds to arguments for Audiobook generation.
|
||||||
#[derive(Args, Debug)]
|
#[derive(Args, Debug)]
|
||||||
pub struct AudiobookArgs {
|
pub struct AudiobookArgs {
|
||||||
|
@ -71,5 +75,5 @@ pub fn generate_html(html_args: HtmlArgs, book_url: Url, output_directory: PathB
|
||||||
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
/// This function DOES NOT do any error checking on the Url or output directory & WILL panic if they are wrong.
|
||||||
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
/// Make sure the Url is valid and the output directory is writable BEFORE passing them to this.
|
||||||
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
|
pub fn generate_markdown(markdown_args: MarkdownArgs, book_url: Url, output_directory: PathBuf) {
|
||||||
eprintln!("This is not implemented yet.");
|
let book = book::Book::new(book_url);
|
||||||
}
|
}
|
Loading…
Reference in a new issue