scraper/
lib.rs

1//! HTML parsing and querying with CSS selectors.
2//!
3//! `scraper` is on [Crates.io][crate] and [GitHub][github].
4//!
5//! [crate]: https://crates.io/crates/scraper
6//! [github]: https://github.com/programble/scraper
7//!
8//! Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade
9//! parsing and querying.
10//!
11//! # Examples
12//!
13//! ## Parsing a document
14//!
15//! ```
16//! use scraper::Html;
17//!
18//! let html = r#"
19//!     <!DOCTYPE html>
20//!     <meta charset="utf-8">
21//!     <title>Hello, world!</title>
22//!     <h1 class="foo">Hello, <i>world!</i></h1>
23//! "#;
24//!
25//! let document = Html::parse_document(html);
26//! ```
27//!
28//! ## Parsing a fragment
29//!
30//! ```
31//! use scraper::Html;
32//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
33//! ```
34//!
35//! ## Parsing a selector
36//!
37//! ```
38//! use scraper::Selector;
39//! let selector = Selector::parse("h1.foo").unwrap();
40//! ```
41//!
42//! ## Selecting elements
43//!
44//! ```
45//! use scraper::{Html, Selector};
46//!
47//! let html = r#"
48//!     <ul>
49//!         <li>Foo</li>
50//!         <li>Bar</li>
51//!         <li>Baz</li>
52//!     </ul>
53//! "#;
54//!
55//! let fragment = Html::parse_fragment(html);
56//! let selector = Selector::parse("li").unwrap();
57//!
58//! for element in fragment.select(&selector) {
59//!     assert_eq!("li", element.value().name());
60//! }
61//! ```
62//!
63//! ## Selecting descendent elements
64//!
65//! ```
66//! use scraper::{Html, Selector};
67//!
68//! let html = r#"
69//!     <ul>
70//!         <li>Foo</li>
71//!         <li>Bar</li>
72//!         <li>Baz</li>
73//!     </ul>
74//! "#;
75//!
76//! let fragment = Html::parse_fragment(html);
77//! let ul_selector = Selector::parse("ul").unwrap();
78//! let li_selector = Selector::parse("li").unwrap();
79//!
80//! let ul = fragment.select(&ul_selector).next().unwrap();
81//! for element in ul.select(&li_selector) {
82//!     assert_eq!("li", element.value().name());
83//! }
84//! ```
85//!
86//! ## Accessing element attributes
87//!
88//! ```
89//! use scraper::{Html, Selector};
90//!
91//! let fragment = Html::parse_fragment(r#"<input name="foo" value="bar">"#);
92//! let selector = Selector::parse(r#"input[name="foo"]"#).unwrap();
93//!
94//! let input = fragment.select(&selector).next().unwrap();
95//! assert_eq!(Some("bar"), input.value().attr("value"));
96//! ```
97//!
98//! ## Serializing HTML and inner HTML
99//!
100//! ```
101//! use scraper::{Html, Selector};
102//!
103//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
104//! let selector = Selector::parse("h1").unwrap();
105//!
106//! let h1 = fragment.select(&selector).next().unwrap();
107//!
108//! assert_eq!("<h1>Hello, <i>world!</i></h1>", h1.html());
109//! assert_eq!("Hello, <i>world!</i>", h1.inner_html());
110//! ```
111//!
112//! ## Accessing descendent text
113//!
114//! ```
115//! use scraper::{Html, Selector};
116//!
117//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
118//! let selector = Selector::parse("h1").unwrap();
119//!
120//! let h1 = fragment.select(&selector).next().unwrap();
121//! let text = h1.text().collect::<Vec<_>>();
122//!
123//! assert_eq!(vec!["Hello, ", "world!"], text);
124//! ```
125
126#![warn(
127    missing_docs,
128    missing_debug_implementations,
129    missing_copy_implementations,
130    trivial_casts,
131    trivial_numeric_casts,
132    unused_extern_crates,
133    unused_import_braces,
134    unused_qualifications,
135    variant_size_differences
136)]
137
138#[macro_use]
139extern crate html5ever;
140
141pub use crate::element_ref::ElementRef;
142pub use crate::html::{Html, HtmlTreeSink};
143pub use crate::node::Node;
144pub use crate::selector::Selector;
145
146pub use selectors::{attr::CaseSensitivity, Element};
147
148pub mod element_ref;
149pub mod error;
150pub mod html;
151pub mod node;
152pub mod selectable;
153pub mod selector;
154
155#[cfg(feature = "atomic")]
156pub(crate) mod tendril_util {
157    use html5ever::tendril;
158    /// Atomic equivalent to the default `StrTendril` type.
159    pub type StrTendril = tendril::Tendril<tendril::fmt::UTF8, tendril::Atomic>;
160
161    /// Convert a standard tendril into an atomic one.
162    pub fn make(s: tendril::StrTendril) -> StrTendril {
163        s.into_send().into()
164    }
165}
166
167#[cfg(not(feature = "atomic"))]
168pub(crate) mod tendril_util {
169    use html5ever::tendril;
170    /// Primary string tendril type.
171    pub type StrTendril = tendril::StrTendril;
172
173    /// Return unaltered.
174    pub fn make(s: StrTendril) -> StrTendril {
175        s
176    }
177}
178
179pub use tendril_util::StrTendril;
180
181#[cfg(test)]
182mod test;