2024-01-25 11:16:03 -06:00
|
|
|
use std::collections::HashMap;
|
|
|
|
|
2024-01-25 10:13:09 -06:00
|
|
|
use indicatif::{ProgressBar, ProgressStyle};
|
2024-01-25 21:17:50 -06:00
|
|
|
use crate::misc::HashMapExt;
|
2024-01-25 08:49:55 -06:00
|
|
|
use scraper::Html;
|
|
|
|
use url::Url;
|
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
use crate::{file_system_crap::remove_illegal_chars, html, http};
|
2024-01-25 08:49:55 -06:00
|
|
|
|
|
|
|
/// A struct representing a book & all the needed data to generate one.
|
|
|
|
pub struct Book {
|
|
|
|
/// The RoyalRoad Url for the book.
|
2024-01-25 21:17:50 -06:00
|
|
|
pub book_url: Url,
|
2024-01-25 08:49:55 -06:00
|
|
|
|
|
|
|
/// The book's title.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub title: String,
|
2024-01-25 08:49:55 -06:00
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
/// Book title used for the filename.
|
|
|
|
/// Should have illegal chars expunged via file_system_crap::remove_illegal_chars.
|
|
|
|
pub file_name_title: String,
|
|
|
|
|
2024-01-25 08:49:55 -06:00
|
|
|
/// The book's author.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub author: String,
|
2024-01-25 08:49:55 -06:00
|
|
|
|
|
|
|
/// A Url to the book's cover image.
|
2024-01-25 21:17:50 -06:00
|
|
|
pub cover_image_url: Url,
|
2024-01-25 08:49:55 -06:00
|
|
|
|
|
|
|
/// The raw html data of the RoyalRoad index page.
|
|
|
|
index_html: Html,
|
|
|
|
|
|
|
|
/// A vector of the book's chapters.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub chapters: Vec<Chapter>,
|
2024-01-25 11:16:03 -06:00
|
|
|
|
|
|
|
/// A hashmap representing the book image urls and their corresponding img html tags.
|
2024-01-25 21:17:50 -06:00
|
|
|
pub image_urls_and_tags: HashMap<Url, Vec<String>>,
|
2024-01-25 08:49:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Book {
|
|
|
|
/// Generate a new book instance with all the needed data from a given url.
|
|
|
|
pub fn new(book_url: Url) -> Book {
|
|
|
|
let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());
|
|
|
|
|
|
|
|
let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);
|
|
|
|
let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());
|
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
let mut image_urls_and_tags: HashMap<Url, Vec<String>> = HashMap::new();
|
2024-01-25 11:16:03 -06:00
|
|
|
|
2024-01-25 10:13:09 -06:00
|
|
|
println!("\nDownloading and processing chapters:");
|
|
|
|
// Spawn a progress bar showing how many chapters have been downloaded & processed.
|
|
|
|
let progress_bar = ProgressBar::new(chapter_names_and_urls.len().try_into().unwrap());
|
|
|
|
progress_bar.set_style(
|
|
|
|
ProgressStyle::with_template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {percent}% ")
|
|
|
|
.unwrap()
|
|
|
|
.progress_chars("#>-"),
|
|
|
|
);
|
|
|
|
|
|
|
|
// Generate the chapters and add em to the book.
|
2024-01-25 08:49:55 -06:00
|
|
|
for i in 0..chapter_names_and_urls.len() {
|
|
|
|
let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
|
2024-01-25 11:16:03 -06:00
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
// extract the image urls and add em to the image_urls_and_tags hashmap.
|
|
|
|
image_urls_and_tags = image_urls_and_tags.join(html::extract_urls_and_img_tag(&chapter.isolated_chapter_html));
|
2024-01-25 11:16:03 -06:00
|
|
|
|
2024-01-25 08:49:55 -06:00
|
|
|
chapters.push(chapter);
|
2024-01-25 10:13:09 -06:00
|
|
|
|
|
|
|
progress_bar.inc(1);
|
2024-01-25 08:49:55 -06:00
|
|
|
}
|
|
|
|
|
2024-01-25 10:13:09 -06:00
|
|
|
progress_bar.finish();
|
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
let title = html::get_title_from_index(&index_html);
|
|
|
|
|
2024-01-25 08:49:55 -06:00
|
|
|
Book {
|
|
|
|
book_url: book_url,
|
2024-01-25 21:17:50 -06:00
|
|
|
title: title.clone(),
|
|
|
|
file_name_title: remove_illegal_chars(title),
|
2024-01-25 08:49:55 -06:00
|
|
|
author: html::get_author_from_index(&index_html),
|
|
|
|
cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
|
|
|
|
index_html: index_html,
|
|
|
|
chapters: chapters,
|
2024-01-25 21:17:50 -06:00
|
|
|
image_urls_and_tags,
|
2024-01-25 08:49:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Count how many paragraphs are in the book.
|
|
|
|
pub fn count_paragraphs(&self) -> u128 {
|
|
|
|
// TODO!
|
|
|
|
0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A struct representing a chapter.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub struct Chapter {
|
2024-01-25 08:49:55 -06:00
|
|
|
/// The Url of the chapter.
|
|
|
|
chapter_url: Url,
|
|
|
|
|
|
|
|
/// The name of the chapter.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub chapter_name: String,
|
2024-01-25 08:49:55 -06:00
|
|
|
|
2024-01-25 21:17:50 -06:00
|
|
|
/// The raw html data of the chapter page.
|
2024-01-25 08:49:55 -06:00
|
|
|
raw_chapter_html: Html,
|
|
|
|
|
|
|
|
/// The isolated chapter html.
|
2024-01-25 10:13:09 -06:00
|
|
|
pub isolated_chapter_html: Html,
|
2024-01-25 08:49:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Chapter {
|
|
|
|
fn new(chapter_name: &str, chapter_url: &str) -> Self {
|
|
|
|
let chapter_url = http::string_to_url(&chapter_url);
|
|
|
|
let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());
|
|
|
|
|
2024-01-25 10:13:09 -06:00
|
|
|
Chapter {
|
2024-01-25 08:49:55 -06:00
|
|
|
chapter_url: chapter_url,
|
|
|
|
chapter_name: chapter_name.to_string(),
|
2024-01-25 21:17:50 -06:00
|
|
|
isolated_chapter_html: html::isolate_chapter_content(&raw_chapter_html),
|
|
|
|
raw_chapter_html: raw_chapter_html,
|
2024-01-25 08:49:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|