1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
use scraper::{
    element_ref::ElementRef,
    Html,
    Selector,
};
use std::borrow::Cow;
use url::Url;

/// Error that may occur while parsing a [`SearchResults`] from html.
#[derive(Debug, thiserror::Error)]
pub enum FromHtmlError {
    /// Missing the pages div
    #[error("missing pages div")]
    MissingPagesDiv,

    /// Missing Your Image Div
    #[error("missing your image div")]
    MissingYourImageDiv,

    /// Missing best match div
    #[error("missing best match div")]
    MissingBestMatchDiv,

    #[error("failed to parse a match")]
    InvalidMatch(#[from] FromElementError),
}

/// The results of an image search
#[derive(Debug)]
pub struct SearchResults {
    /// The best match
    pub best_match: Option<Match>,

    /// Possible matches
    pub possible_matches: Vec<Match>,
}

impl SearchResults {
    /// Make a [`SearchResults`] from html
    pub fn from_html(html: &Html) -> Result<Self, FromHtmlError> {
        lazy_static::lazy_static! {
            static ref PAGES_SELECTOR: Selector = Selector::parse(".pages").expect("invalid pages selector");
            static ref DIV_SELECTOR: Selector = Selector::parse("div").expect("invalid div selector");
        };

        let pages_el = html
            .select(&PAGES_SELECTOR)
            .next()
            .ok_or(FromHtmlError::MissingPagesDiv)?;

        let mut pages_el_divs_iter = pages_el.select(&DIV_SELECTOR);

        let _your_image = pages_el_divs_iter
            .next()
            .ok_or(FromHtmlError::MissingYourImageDiv)?;

        let best_match_div = pages_el_divs_iter
            .next()
            .ok_or(FromHtmlError::MissingBestMatchDiv)?;

        let best_match = if best_match_div.value().classes().any(|el| el == "nomatch") {
            None
        } else {
            Some(Match::from_element(best_match_div)?)
        };

        let possible_matches = pages_el_divs_iter
            .map(Match::from_element)
            .collect::<Result<_, _>>()?;

        Ok(Self {
            best_match,
            possible_matches,
        })
    }
}

#[derive(Debug, thiserror::Error)]
pub enum FromElementError {
    /// Missing a link
    #[error("missing link")]
    MissingLink,

    /// A link was missing a href
    #[error("missing href")]
    MissingHref,

    /// A link href was invalid
    #[error("invalid href")]
    InvalidHref(#[source] url::ParseError),

    /// Missing an img element
    #[error("missing img")]
    MissingImg,

    /// Missing img url
    #[error("missing img url")]
    MissingImgUrl,

    /// Invalid img url
    #[error("invalid img url")]
    InvalidImgUrl(#[source] url::ParseError),
}

/// A best or possible image match
#[derive(Debug)]
pub struct Match {
    /// The page url of the match
    pub url: Url,

    /// The url of the img
    pub image_url: Url,
}

impl Match {
    /// Create an element
    pub fn from_element(element: ElementRef<'_>) -> Result<Self, FromElementError> {
        lazy_static::lazy_static! {
            static ref LINK_SELECTOR: Selector = Selector::parse("a").expect("invalid link selector");
            static ref IMG_SELECTOR: Selector = Selector::parse("img").expect("invalid img selector");
        }

        let link_el = element
            .select(&LINK_SELECTOR)
            .next()
            .ok_or(FromElementError::MissingLink)?;

        let link_href = link_el
            .value()
            .attr("href")
            .ok_or(FromElementError::MissingHref)
            .map(fixup_url)?;

        let url = Url::parse(&link_href).map_err(FromElementError::InvalidHref)?;

        let img_el = element
            .select(&IMG_SELECTOR)
            .next()
            .ok_or(FromElementError::MissingImg)?;

        let img_src = img_el
            .value()
            .attr("src")
            .ok_or(FromElementError::MissingImgUrl)
            .map(fixup_url)?;

        let image_url = Url::parse(&img_src).map_err(FromElementError::InvalidImgUrl)?;

        Ok(Self { url, image_url })
    }
}

/// Fixup a url for parsing
fn fixup_url(link: &str) -> Cow<str> {
    let mut link = Cow::Borrowed(link);

    // Fixup no protocol
    if link.starts_with("//") {
        link = format!("https:{}", link).into()
    }

    // Fixup relative urls
    if link.starts_with('/') {
        link = format!("https://iqdb.org{}", link).into();
    }

    link
}

#[cfg(test)]
mod test {
    use super::*;

    const VALID: &str = include_str!("../../test_data/valid.html");
    const INVALID: &str = include_str!("../../test_data/invalid.html");

    #[test]
    fn parse_valid_search_results() {
        let html = Html::parse_document(VALID);

        let results = SearchResults::from_html(&html).expect("failed to parse");
        dbg!(&results);
        assert!(results.best_match.is_some());
    }

    #[test]
    fn parse_invalid_search_results() {
        let html = Html::parse_document(INVALID);

        let results = SearchResults::from_html(&html).expect("failed to parse");
        dbg!(&results);
        assert!(results.best_match.is_none());
    }
}