royal_road_archiver/src/book.rs

use scraper::Html;
use url::Url;

use crate::{html, http};

/// A struct representing a book & all the needed data to generate one.
pub struct Book {
    /// The RoyalRoad Url for the book.
    book_url: Url,

    /// The book's title.
    title: String,

    /// The book's author.
    author: String,
    
    /// A Url to the book's cover image.
    cover_image_url: Url,

    /// The raw html data of the RoyalRoad index page.
    index_html: Html,

    /// A vector of the book's chapters.
    chapters: Vec<Chapter>,
}

impl Book {
    /// Generate a new book instance with all the needed data from a given url.
    pub fn new(book_url: Url) -> Book {
        let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());

        let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);

        let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());

        for i in 0..chapter_names_and_urls.len() {
            let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);
            chapters.push(chapter);
        }

        Book { 
            book_url: book_url, 
            title: html::get_title_from_index(&index_html),
            author: html::get_author_from_index(&index_html),
            cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),
            index_html: index_html,
            chapters: chapters,
        }
    }

    /// Count how many paragraphs are in the book.
    pub fn count_paragraphs(&self) -> u128 {
        // TODO!
        0
    }
}

/// A struct representing a chapter.
struct Chapter {
    /// The Url of the chapter.
    chapter_url: Url,
    
    /// The name of the chapter.
    chapter_name: String,
    
    /// The raw html data of the page.
    raw_chapter_html: Html,

    /// The isolated chapter html.
    isolated_chapter_html: Html,
}

impl Chapter {
    fn new(chapter_name: &str, chapter_url: &str) -> Self {
        let chapter_url = http::string_to_url(&chapter_url);
        let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());

        Chapter { 
            chapter_url: chapter_url, 
            chapter_name: chapter_name.to_string(),
            raw_chapter_html: raw_chapter_html.clone(),
            isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)
        }
    }
}

// TODO!
struct BookImages {

}

// TODO!
struct BookCss {

}
Bundled both windows and linux builds of html2xhtml and just generally made a bunch of work 2024-01-25 08:49:55 -06:00			`use scraper::Html;`
			`use url::Url;`

			`use crate::{html, http};`

			`/// A struct representing a book & all the needed data to generate one.`
			`pub struct Book {`
			`/// The RoyalRoad Url for the book.`
			`book_url: Url,`

			`/// The book's title.`
			`title: String,`

			`/// The book's author.`
			`author: String,`

			`/// A Url to the book's cover image.`
			`cover_image_url: Url,`

			`/// The raw html data of the RoyalRoad index page.`
			`index_html: Html,`

			`/// A vector of the book's chapters.`
			`chapters: Vec<Chapter>,`
			`}`

			`impl Book {`
			`/// Generate a new book instance with all the needed data from a given url.`
			`pub fn new(book_url: Url) -> Book {`
			`let index_html = html::string_to_html_document(&http::get_response(book_url.clone()).get_text());`

			`let chapter_names_and_urls = html::get_chapter_names_and_urls_from_index(&index_html);`

			`let mut chapters: Vec<Chapter> = Vec::with_capacity(chapter_names_and_urls.len());`

			`for i in 0..chapter_names_and_urls.len() {`
			`let chapter = Chapter::new(&chapter_names_and_urls[i][0], &chapter_names_and_urls[i][1]);`
			`chapters.push(chapter);`
			`}`

			`Book {`
			`book_url: book_url,`
			`title: html::get_title_from_index(&index_html),`
			`author: html::get_author_from_index(&index_html),`
			`cover_image_url: http::string_to_url(&html::get_cover_image_url_from_index(&index_html)),`
			`index_html: index_html,`
			`chapters: chapters,`
			`}`
			`}`

			`/// Count how many paragraphs are in the book.`
			`pub fn count_paragraphs(&self) -> u128 {`
			`// TODO!`
			`0`
			`}`
			`}`

			`/// A struct representing a chapter.`
			`struct Chapter {`
			`/// The Url of the chapter.`
			`chapter_url: Url,`

			`/// The name of the chapter.`
			`chapter_name: String,`

			`/// The raw html data of the page.`
			`raw_chapter_html: Html,`

			`/// The isolated chapter html.`
			`isolated_chapter_html: Html,`
			`}`

			`impl Chapter {`
			`fn new(chapter_name: &str, chapter_url: &str) -> Self {`
			`let chapter_url = http::string_to_url(&chapter_url);`
			`let raw_chapter_html = html::string_to_html_document(&http::get_response(chapter_url.clone()).get_text());`

			`Chapter {`
			`chapter_url: chapter_url,`
			`chapter_name: chapter_name.to_string(),`
			`raw_chapter_html: raw_chapter_html.clone(),`
			`isolated_chapter_html: html::isolate_chapter_content(raw_chapter_html)`
			`}`
			`}`
			`}`

			`// TODO!`
			`struct BookImages {`

			`}`

			`// TODO!`
			`struct BookCss {`

			`}`