docx_reader/reader/
mod.rsmod a_graphic;
mod a_graphic_data;
mod attributes;
mod custom_properties;
mod delete;
mod div;
mod doc_defaults;
mod doc_grid;
mod document;
mod document_rels;
mod drawing;
mod errors;
mod font_group;
mod font_scheme;
mod footer;
mod from_xml;
mod header;
mod hyperlink;
mod ignore;
mod insert;
mod level;
mod level_override;
mod mc_fallback;
mod numbering_property;
mod numberings;
mod paragraph;
mod paragraph_property;
mod paragraph_property_change;
mod pic;
mod read_zip;
mod rels;
mod run;
mod run_property;
mod section_property;
mod settings;
mod shading;
mod shape;
mod structured_data_tag;
mod style;
mod styles;
mod tab;
mod table;
mod table_borders;
mod table_cell;
mod table_cell_borders;
mod table_cell_margins;
mod table_cell_property;
mod table_property;
mod table_row;
mod tabs;
mod text_box_content;
mod theme;
mod web_settings;
mod wp_anchor;
mod wps_shape;
mod wps_text_box;
mod xml_element;
use std::{collections::HashMap, io::Cursor};
use zip::ZipArchive;
use crate::documents::*;
pub use attributes::*;
pub use document_rels::*;
pub use errors::ReaderError;
pub use from_xml::*;
pub use mc_fallback::*;
pub use read_zip::*;
pub use xml_element::*;
const DOC_RELATIONSHIP_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const CUSTOM_PROPERTIES_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
const STYLE_RELATIONSHIP_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
const NUMBERING_RELATIONSHIP_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering";
const SETTINGS_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings";
const WEB_SETTINGS_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings";
const HEADER_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header";
const FOOTER_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer";
const THEME_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme";
const IMAGE_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
const HYPERLINK_TYPE: &str =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
fn read_headers(
rels: &ReadDocumentRels,
archive: &mut ZipArchive<Cursor<&[u8]>>,
) -> HashMap<RId, Header> {
let header_paths = rels.find_target_path(HEADER_TYPE);
let headers: HashMap<RId, Header> = header_paths
.unwrap_or_default()
.into_iter()
.filter_map(|(rid, path, ..)| {
let data = read_zip(archive, path.to_str().expect("should have header path."));
if let Ok(d) = data {
if let Ok(h) = Header::from_xml(&d[..]) {
return Some((rid, h));
}
}
None
})
.collect();
headers
}
fn read_footers(
rels: &ReadDocumentRels,
archive: &mut ZipArchive<Cursor<&[u8]>>,
) -> HashMap<RId, Footer> {
let footer_paths = rels.find_target_path(FOOTER_TYPE);
let footers: HashMap<RId, Footer> = footer_paths
.unwrap_or_default()
.into_iter()
.filter_map(|(rid, path, ..)| {
let data = read_zip(archive, path.to_str().expect("should have footer path."));
if let Ok(d) = data {
if let Ok(h) = Footer::from_xml(&d[..]) {
return Some((rid, h));
}
}
None
})
.collect();
footers
}
fn read_themes(rels: &ReadDocumentRels, archive: &mut ZipArchive<Cursor<&[u8]>>) -> Vec<Theme> {
let theme_paths = rels.find_target_path(THEME_TYPE);
theme_paths
.unwrap_or_default()
.into_iter()
.filter_map(|(_rid, path, ..)| {
let data = read_zip(archive, path.to_str().expect("should have footer path."));
if let Ok(d) = data {
if let Ok(h) = Theme::from_xml(&d[..]) {
return Some(h);
}
}
None
})
.collect()
}
pub fn read_docx(buf: &[u8]) -> Result<Docx, ReaderError> {
let mut docx = Docx::new();
let cur = Cursor::new(buf);
let mut archive = zip::ZipArchive::new(cur)?;
let _content_types = {
let data = read_zip(&mut archive, "[Content_Types].xml")?;
ContentTypes::from_xml(&data[..])?
};
let rels = {
let data = read_zip(&mut archive, "_rels/.rels")?;
Rels::from_xml(&data[..])?
};
let main_rel = rels
.find_target(DOC_RELATIONSHIP_TYPE)
.ok_or(ReaderError::DocumentNotFoundError);
let document_path = if let Ok(rel) = main_rel {
rel.2.clone()
} else {
"word/document.xml".to_owned()
};
if let Some(custom_props) = rels.find_target(CUSTOM_PROPERTIES_TYPE) {
let data = read_zip(&mut archive, &custom_props.2);
if let Ok(data) = data {
if let Ok(custom) = CustomProps::from_xml(&data[..]) {
docx.doc_props.custom = custom;
}
}
}
let rels = read_document_rels(&mut archive, &document_path)?;
let headers = read_headers(&rels, &mut archive);
let footers = read_footers(&rels, &mut archive);
docx.themes = read_themes(&rels, &mut archive);
if let Some(h) = docx.document.section_property.header_reference.clone() {
if let Some(header) = headers.get(&h.id) {
docx.document = docx.document.header(header.clone(), &h.id);
let count = docx.document_rels.header_count + 1;
docx.document_rels.header_count = count;
docx.content_type = docx.content_type.add_header();
}
}
if let Some(ref h) = docx
.document
.section_property
.first_header_reference
.clone()
{
if let Some(header) = headers.get(&h.id) {
docx.document = docx.document.first_header(header.clone(), &h.id);
let count = docx.document_rels.header_count + 1;
docx.document_rels.header_count = count;
docx.content_type = docx.content_type.add_header();
}
}
if let Some(ref h) = docx.document.section_property.even_header_reference.clone() {
if let Some(header) = headers.get(&h.id) {
docx.document = docx.document.even_header(header.clone(), &h.id);
let count = docx.document_rels.header_count + 1;
docx.document_rels.header_count = count;
docx.content_type = docx.content_type.add_header();
}
}
if let Some(f) = docx.document.section_property.footer_reference.clone() {
if let Some(footer) = footers.get(&f.id) {
docx.document = docx.document.footer(footer.clone(), &f.id);
let count = docx.document_rels.footer_count + 1;
docx.document_rels.footer_count = count;
docx.content_type = docx.content_type.add_footer();
}
}
if let Some(ref f) = docx
.document
.section_property
.first_footer_reference
.clone()
{
if let Some(footer) = footers.get(&f.id) {
docx.document = docx.document.first_footer(footer.clone(), &f.id);
let count = docx.document_rels.footer_count + 1;
docx.document_rels.footer_count = count;
docx.content_type = docx.content_type.add_footer();
}
}
if let Some(ref f) = docx.document.section_property.even_footer_reference.clone() {
if let Some(footer) = footers.get(&f.id) {
docx.document = docx.document.even_footer(footer.clone(), &f.id);
let count = docx.document_rels.footer_count + 1;
docx.document_rels.footer_count = count;
docx.content_type = docx.content_type.add_footer();
}
}
let style_path = rels.find_target_path(STYLE_RELATIONSHIP_TYPE);
if let Some(paths) = style_path {
if let Some((_, style_path, ..)) = paths.get(0) {
let data = read_zip(
&mut archive,
style_path.to_str().expect("should have styles"),
)?;
let styles = Styles::from_xml(&data[..])?;
docx = docx.styles(styles);
}
}
let num_path = rels.find_target_path(NUMBERING_RELATIONSHIP_TYPE);
if let Some(paths) = num_path {
if let Some((_, num_path, ..)) = paths.get(0) {
let data = read_zip(
&mut archive,
num_path.to_str().expect("should have numberings"),
)?;
let nums = Numberings::from_xml(&data[..])?;
docx = docx.numberings(nums);
}
}
let settings_path = rels.find_target_path(SETTINGS_TYPE);
if let Some(paths) = settings_path {
if let Some((_, settings_path, ..)) = paths.get(0) {
let data = read_zip(
&mut archive,
settings_path.to_str().expect("should have settings"),
)?;
let settings = Settings::from_xml(&data[..])?;
docx = docx.settings(settings);
}
}
let web_settings_path = rels.find_target_path(WEB_SETTINGS_TYPE);
if let Some(paths) = web_settings_path {
if let Some((_, web_settings_path, ..)) = paths.get(0) {
let data = read_zip(
&mut archive,
web_settings_path
.to_str()
.expect("should have web settings"),
)?;
let web_settings = WebSettings::from_xml(&data[..])?;
docx = docx.web_settings(web_settings);
}
}
let media = rels.find_target_path(IMAGE_TYPE);
if let Some(paths) = media {
for (id, media, ..) in paths {
if let Ok(data) = read_zip(&mut archive, media.to_str().expect("should have media")) {
docx = docx.add_image(id, media.to_str().unwrap().to_string(), data);
}
}
}
let links = rels.find_target_path(HYPERLINK_TYPE);
if let Some(paths) = links {
for (id, target, mode) in paths {
if let Some(mode) = mode {
docx =
docx.add_hyperlink(id, target.to_str().expect("should convert to str"), mode);
}
}
}
Ok(docx)
}