======
File path: ./Cargo.toml
[package]
name = "metatron"
version = "0.2.0"
edition = "2021"
authors = ["Evgeny Igumnov <igumnovnsk@gmail.com>"]
repository = "https://github.com/igumnoff/metatron"
documentation = "https://docs.rs/metatron"
license-file = "LICENSE"
keywords = ["report-generation", "data-reporting", "text-report", "pdf", "template-engine"]
categories = ["text-processing", "web-programming"]
readme = "README.md"
description = "Metatron library"
[dependencies]
anyhow = "1.0.75"
thiserror = "1.0.44"
shiva = {path = "../shiva/lib"}
#shiva = "0.1.14"
serde_yaml = "0.9.34"
serde_json = "1.0.115"
bytes = "1.5.0"
[dev-dependencies]
env_logger = "0.10.0"
======
File path: ./src/core.rs
use bytes::Bytes;
use std::collections::HashMap;
use std::fmt::Debug;
use thiserror::Error;
#[derive(Debug)]
pub struct Document {
pub elements: Vec<Element>,
pub page_width: f32,
pub page_height: f32,
pub left_page_indent: f32,
pub right_page_indent: f32,
pub top_page_indent: f32,
pub bottom_page_indent: f32,
pub page_header: Vec<Element>,
pub page_footer: Vec<Element>,
}
impl Document {
pub fn new(elements: Vec<Element>) -> Document {
Document {
elements: elements,
page_width: 210.0,
page_height: 297.0,
left_page_indent: 10.0,
right_page_indent: 10.0,
top_page_indent: 10.0,
bottom_page_indent: 10.0,
page_header: vec![],
page_footer: vec![],
}
}
}
pub trait TransformerTrait {
fn parse(document: &Bytes, images: &HashMap<String, Bytes>) -> anyhow::Result<Document>;
fn generate(document: &Document) -> anyhow::Result<(Bytes, HashMap<String, Bytes>)>;
}
#[derive(Error, Debug)]
pub enum ParserError {
#[error("Parser error")]
Common,
}
#[derive(Error, Debug)]
pub enum GeneratorError {
#[error("Generator error")]
Common,
}
#[derive(Debug, Clone)]
pub enum Element {
Text {
text: String,
size: u8,
},
Header {
level: u8,
text: String,
},
Paragraph {
elements: Vec<Element>,
},
Table {
headers: Vec<TableHeader>,
rows: Vec<TableRow>,
},
List {
elements: Vec<ListItem>,
numbered: bool,
},
Image {
bytes: Bytes,
title: String,
alt: String,
image_type: ImageType,
},
Hyperlink {
title: String,
url: String,
alt: String,
},
}
#[derive(Debug, Clone)]
pub struct ListItem {
pub element: Element,
}
#[derive(Debug, Clone)]
pub struct TableHeader {
pub element: Element,
pub width: f32,
}
#[derive(Debug, Clone)]
pub struct TableRow {
pub cells: Vec<TableCell>,
}
#[derive(Debug, Clone)]
pub struct TableCell {
pub element: Element,
}
#[derive(Debug, Clone)]
pub enum ImageType {
Png,
Jpeg,
}
======
File path: ./src/lib.rs
//!
#![doc = include_str!("../README.md")]
//!
use bytes::Bytes;
use serde_json::Value as JValue;
use serde_yaml::Value as YValue;
use shiva::core::Element::{Header, Paragraph, Table, Text};
use shiva::core::{Document, Element, TableCell, TableHeader, TableRow};
use std::collections::HashMap;
use crate::ReportError::Common;
use thiserror::Error;
pub struct Report;
impl Report {
pub fn generate(
template: &str,
data: &str,
_images: &HashMap<String, Bytes>,
) -> anyhow::Result<Document> {
let template: YValue = serde_yaml::from_str(template)?;
let data: JValue = serde_json::from_str(data)?;
let params_src = data["params"]
.as_object()
.ok_or(Common("Missing 'params' in data".to_string()))?;
let params: HashMap<String, String> = params_src
.iter()
.map(|(k, v)| {
let value: String = if v.is_number() {
v.to_string()
} else {
v.as_str().unwrap_or("").to_string()
};
(k.clone(), value)
})
.collect();
let mut elements: Vec<Element> = vec![];
let mut page_header: Vec<Element> = vec![];
let mut page_footer: Vec<Element> = vec![];
if let Some(title) = template["title"].as_sequence() {
for header in title {
if let Some(header_text) = header["header"].as_str() {
let mut resolved_text = header_text.to_string();
for (key, value) in ¶ms {
resolved_text = resolved_text.replace(&format!("$P{{{}}}", key), &value);
}
let header_element = Header {
text: resolved_text,
level: header["level"].as_u64().unwrap_or(1) as u8,
};
elements.push(header_element);
}
}
}
if let Some(columns) = template["column_header"].as_sequence() {
let mut headers = Vec::new();
for column in columns {
if let Some(name) = column["name"].as_str() {
let text_element = Text {
text: name.to_string(),
size: 8,
};
let width = column["width"].as_f64().unwrap_or(20.0) as f32;
let header_element = TableHeader {
element: text_element,
width: width,
};
headers.push(header_element);
}
}
let mut rows = Vec::new();
if let Some(data_rows) = data["rows"].as_array() {
for data_row in data_rows {
let mut cells = Vec::new();
if let Some(row_configs) = template["row"].as_sequence() {
for row_config in row_configs {
if let Some(value_key) = row_config["value"].as_str() {
let field_name =
value_key.trim_start_matches("$F(").trim_end_matches(")");
if let Some(value) = data_row[field_name].as_str() {
let text_element = Text {
text: value.to_string(),
size: 8,
};
cells.push(TableCell {
element: text_element,
});
}
if let Some(value) = data_row[field_name].as_number() {
let value = value.to_string();
let text_element = Text {
text: value,
size: 8,
}; // Default font size for cells
cells.push(TableCell {
element: text_element,
});
}
}
}
}
rows.push(TableRow { cells });
}
}
if let Some(footer_configs) = template["column_footer"].as_sequence() {
let mut footer_cells = Vec::new();
for footer_config in footer_configs {
if let Some(value_key) = footer_config["value"].as_str() {
let mut resolved_text = value_key.to_string();
for (key, value) in ¶ms {
resolved_text =
resolved_text.replace(&format!("$P{{{}}}", key), &value);
}
if resolved_text.is_empty() {
resolved_text = " ".to_string();
}
let text_element = Text {
text: resolved_text,
size: 8,
};
footer_cells.push(TableCell {
element: text_element,
});
} else {
let text_element = Text {
text: " ".to_string(),
size: 8,
};
footer_cells.push(TableCell {
element: text_element,
});
}
}
if !footer_cells.is_empty() {
let footer_row = TableRow {
cells: footer_cells,
};
rows.push(footer_row);
}
}
let table_element_with_footer = Table { headers, rows };
elements.push(table_element_with_footer);
}
if let Some(headers) = template["page_header"].as_sequence() {
for header in headers {
if let Some(header_text) = header["text"].as_str() {
let text_size = header["size"].as_u64().unwrap_or(7) as u8; // Default size if not specified
let text_element = Text {
text: header_text.to_string(),
size: text_size,
};
page_header.push(text_element);
}
}
}
if let Some(footers) = template["page_footer"].as_sequence() {
for footer in footers {
if let Some(footer_text) = footer["text"].as_str() {
let text_size = footer["size"].as_u64().unwrap_or(7) as u8; // Default size if not specified
let text_element = Text {
text: footer_text.to_string(),
size: text_size,
};
page_footer.push(text_element);
}
}
}
if let Some(summary) = template["summary"].as_sequence() {
for paragraph_config in summary {
if let Some(paragraph_items) = paragraph_config["paragraph"].as_sequence() {
let mut paragraph_elements: Vec<Element> = vec![];
for text_item in paragraph_items {
if let Some(text_value) = text_item["text"].as_str() {
let mut resolved_text = text_value.to_string();
for (key, value) in ¶ms {
resolved_text =
resolved_text.replace(&format!("$P{{{}}}", key), value);
}
let text_size = text_item["size"].as_u64().unwrap_or(10) as u8; // Default size if not specified
let text_element = Text {
text: resolved_text,
size: text_size,
};
paragraph_elements.push(text_element);
}
}
if !paragraph_elements.is_empty() {
let paragraph_element = Paragraph {
elements: paragraph_elements,
};
elements.push(paragraph_element);
}
}
}
}
let mut document = Document::new(elements);
document.page_header = page_header;
document.page_footer = page_footer;
Ok(document)
}
}
#[derive(Error, Debug)]
pub enum ReportError {
#[error("Report error: {0}")]
Common(String),
}
#[cfg(test)]
mod tests {
use super::*;
use shiva::core::TransformerTrait;
use std::collections::HashMap;
#[test]
fn test_generate() -> anyhow::Result<()> {
let template_vec = std::fs::read("data/report-template.yaml")?;
let template = std::str::from_utf8(&template_vec).unwrap();
let data_vec = std::fs::read("data/report-data.json")?;
let data = std::str::from_utf8(&data_vec).unwrap();
let images = HashMap::new();
let result = Report::generate(template, data, &images);
assert!(result.is_ok());
let doc = result?;
println!("{:?}", doc);
println!("=========================");
let result = shiva::pdf::Transformer::generate(&doc)?;
std::fs::write("./data/report.pdf",result.0)?;
Ok(())
}
}
======
File path: ./src/pdf.rs
use crate::core::Element::{Header, List, Paragraph, Table, Text};
use crate::core::{Document, Element, ListItem, ParserError, TableHeader, TransformerTrait};
use bytes::Bytes;
use lopdf::content::Content;
use lopdf::{Document as PdfDocument, Object, ObjectId};
use std::collections::{BTreeMap, HashMap};
pub struct Transformer;
impl TransformerTrait for Transformer {
fn parse(document: &Bytes, _images: &HashMap<String, Bytes>) -> anyhow::Result<Document> {
let mut elements: Vec<Element> = Vec::new();
let pdf_document = PdfDocument::load_mem(&document)?;
for (_id, page_id) in pdf_document.get_pages() {
let objects = pdf_document.get_page_contents(page_id);
for object_id in objects {
let object = pdf_document.get_object(object_id)?;
parse_object(page_id, &pdf_document, &object, &mut elements)?;
}
}
Ok(Document::new(elements))
}
fn generate(document: &Document) -> anyhow::Result<(Bytes, HashMap<String, Bytes>)> {
use printpdf::*;
const PAGE_WIDTH: f32 = 210.0;
const PAGE_HEIGHT: f32 = 297.0;
let (mut pdf, mut page1, mut layer1) =
PdfDocument::new("PDF Document", Mm(PAGE_WIDTH), Mm(PAGE_HEIGHT), "Layer 1");
fn render_table_header(
header: &TableHeader,
pdf: &mut PdfDocumentReference,
page: &mut PdfPageIndex,
layer: &mut PdfLayerIndex,
vertical_position: &mut f32,
horizontal_position: &mut f32,
document: &Document
) -> anyhow::Result<()> {
let font_size:f32 = match &header.element {
Text { text: _, size } => {
size.clone() as f32
}
_ => { 10.0 }
};
let font = pdf.add_builtin_font(BuiltinFont::Helvetica)?;
let max_text_width = header.width;
let max_chars = (max_text_width / (0.3528 * font_size)) as usize;
let text_elements:Vec<String> = match &header.element {
Text { text, size: _ } => {
split_string(text, max_chars)
}
_ => { vec!["".to_string()] }
};
for text in text_elements {
let step: f32 = 0.3528 * font_size;
if (*vertical_position + step) > (document.page_height - document.bottom_page_indent) {
let (new_page, new_layer) = pdf.add_page(
Mm(document.page_width),
Mm(document.page_height),
"Layer 1"
);
*vertical_position = document.top_page_indent;
*page = new_page;
*layer = new_layer;
}
let current_layer = pdf.get_page(*page).get_layer(*layer);
current_layer.use_text(
text,
font_size,
Mm(document.left_page_indent + *horizontal_position),
Mm(document.page_height - *vertical_position),
&font
);
*vertical_position += step + 2.5; // Adjust vertical position for next element
}
Ok(())
}
fn generate_pdf(
document: &Document,
element: &Element,
pdf: &mut PdfDocumentReference,
page: &mut PdfPageIndex,
layer: &mut PdfLayerIndex,
vertical_position: &mut f32,
) -> anyhow::Result<()> {
match element {
Header { level, text } => {
let font_size = match level {
1 => 18.0, // Example font size for level 1 header
2 => 16.0, // Example font size for level 2 header
3 => 14.0, // Example font size for level 3 header
// Additional levels as needed...
_ => 12.0, // Default font size for other header levels
};
let font_width = (0.3528 * (font_size as f32) * 0.87) as f32;
let max_text_width = document.page_height
- document.left_page_indent
- document.right_page_indent;
let max_chars = (max_text_width / font_width) as usize;
let text_elements = split_string(text, max_chars);
for text in text_elements {
let step: f32 = 0.3528 * font_size as f32;
if (*vertical_position + step)
> (document.page_height - document.bottom_page_indent)
{
let (new_page, new_layer) = pdf.add_page(
Mm(document.page_width),
Mm(document.page_height),
"Layer 1",
);
*vertical_position = 0.0 + document.top_page_indent;
*layer = new_layer;
*page = new_page;
}
*vertical_position = *vertical_position + step;
let font = pdf.add_builtin_font(BuiltinFont::Courier)?;
let current_layer = pdf.get_page(*page).get_layer(*layer);
current_layer.use_text(
text,
font_size as f32,
Mm(document.left_page_indent + 0.0),
Mm(document.page_height - *vertical_position),
&font,
);
*vertical_position = *vertical_position + 2.5;
}
}
Paragraph { elements } => {
for paragraph_element in elements {
match paragraph_element {
Text { text, size } => {
let font_width = (0.3528 * (*size as f32) * 0.87) as f32;
let max_text_width = document.page_height
- document.left_page_indent
- document.right_page_indent;
let max_chars = (max_text_width / font_width) as usize;
let text_elements = split_string(text, max_chars);
for text in text_elements {
let step: f32 = 0.3528 * *size as f32;
if (*vertical_position + step)
> (document.page_height - document.bottom_page_indent)
{
let (new_page, new_layer) = pdf.add_page(
Mm(document.page_width),
Mm(document.page_height),
"Layer 1",
);
*vertical_position = 0.0 + document.top_page_indent;
*layer = new_layer;
*page = new_page;
}
*vertical_position = *vertical_position + step;
let font = pdf.add_builtin_font(BuiltinFont::Courier)?;
let current_layer = pdf.get_page(*page).get_layer(*layer);
current_layer.use_text(
text,
*size as f32,
Mm(document.left_page_indent + 0.0),
Mm(document.page_height - *vertical_position),
&font,
);
}
}
_ => {}
}
}
}
Table { headers, rows } => {
let mut vertical_position_max: f32 = *vertical_position;
if !headers.is_empty() {
let mut horizontal_position: f32 = 0.0;
let vertical_position_backup: f32 = *vertical_position;
for header in headers {
render_table_header(
&header,
pdf,
page,
layer,
vertical_position,
&mut horizontal_position,
document
)?;
horizontal_position = horizontal_position + header.width;
if *vertical_position > vertical_position_max {
vertical_position_max = *vertical_position;
}
*vertical_position = vertical_position_backup;
}
}
*vertical_position = vertical_position_max;
for row in rows {
for _cell in &row.cells {}
}
}
_ => {}
}
Ok(())
}
let mut vertical_position = 0.0 + document.top_page_indent;
for element in &document.elements {
_ = generate_pdf(
document,
element,
&mut pdf,
&mut page1,
&mut layer1,
&mut vertical_position,
)?;
}
let result = pdf.save_to_bytes()?;
let bytes = Bytes::from(result);
Ok((bytes, HashMap::new()))
}
}
fn parse_object(
page_id: ObjectId,
pdf_document: &PdfDocument,
_object: &Object,
elements: &mut Vec<Element>,
) -> anyhow::Result<()> {
fn collect_text(
text: &mut String,
encoding: Option<&str>,
operands: &[Object],
elements: &mut Vec<Element>,
) -> anyhow::Result<()> {
for operand in operands.iter() {
// println!("2 {:?}", operand);
match *operand {
Object::String(ref bytes, _) => {
let decoded_text = PdfDocument::decode_text(encoding, bytes);
text.push_str(&decoded_text);
if bytes.len() == 1 && bytes[0] == 1 {
match elements.last() {
None => {
let list_element = List {
elements: vec![],
numbered: false,
};
elements.push(list_element);
}
Some(el) => {
match el {
List { .. } => {
let old_list = elements.pop().unwrap();
// let list = old_list.list_as_ref()?;
if let List {
elements: list_elements,
numbered,
} = old_list
{
let mut list_item_elements = list_elements.clone();
let text_element = Text {
text: text.clone(),
size: 8,
};
let new_list_item_element = ListItem {
element: text_element,
};
list_item_elements.push(new_list_item_element);
let new_list = List {
elements: list_item_elements,
numbered: numbered,
};
elements.push(new_list);
text.clear();
}
}
Paragraph { .. } => {
let old_paragraph = elements.pop().unwrap();
// let paragraph = old_paragraph.paragraph_as_ref()?;
if let Paragraph {
elements: paragraph_elements,
} = old_paragraph
{
let mut paragraph_elements = paragraph_elements.clone();
let text_element = Text {
text: text.clone(),
size: 8,
};
paragraph_elements.push(text_element);
let new_paragraph = Paragraph {
elements: paragraph_elements,
};
elements.push(new_paragraph);
text.clear();
let list_element = List {
elements: vec![],
numbered: false,
};
elements.push(list_element);
}
}
_ => {
let list_element = List {
elements: vec![],
numbered: false,
};
elements.push(*Box::new(list_element));
}
}
}
}
}
}
Object::Array(ref arr) => {
let _ = collect_text(text, encoding, arr, elements);
text.push(' ');
}
Object::Integer(i) => {
if i < -100 {
text.push(' ');
}
}
_ => {}
}
}
Ok(())
}
let mut text = String::new();
let fonts = pdf_document.get_page_fonts(page_id);
let encodings = fonts
.into_iter()
.map(|(name, font)| (name, font.get_font_encoding()))
.collect::<BTreeMap<Vec<u8>, &str>>();
let vec = pdf_document.get_page_content(page_id)?;
let content = Content::decode(&vec)?;
let mut current_encoding = None;
for operation in &content.operations {
// println!("1 {:?}", operation.operator);
match operation.operator.as_ref() {
"Tm" => {
let text_element = Text {
text: text.clone(),
size: 8,
};
match elements.last() {
None => {
let paragraph_element = Paragraph {
elements: vec![text_element],
};
elements.push(paragraph_element);
}
Some(el) => match el {
Paragraph { .. } => {
let old_paragraph = elements.pop().unwrap();
if let Paragraph {
elements: paragraph_elements,
} = old_paragraph
{
let mut paragraph_elements = paragraph_elements.clone();
paragraph_elements.push(text_element);
let new_paragraph = Paragraph {
elements: paragraph_elements,
};
elements.push(new_paragraph);
}
}
_ => {
elements.push(text_element);
}
},
}
text.clear();
}
"Tf" => {
let current_font = operation
.operands
.first()
.ok_or_else(|| ParserError::Common)?
.as_name()?;
current_encoding = encodings.get(current_font).cloned();
}
"Tj" | "TJ" => {
_ = collect_text(&mut text, current_encoding, &operation.operands, elements);
}
"ET" => {
if !text.ends_with('\n') {
text.push('\n')
}
}
_ => {}
}
}
if text.len() > 0 {
let text_element = Text {
text: text.clone(),
size: 8,
};
match elements.last() {
None => {
let paragraph_element = Paragraph {
elements: vec![text_element],
};
elements.push(*Box::new(paragraph_element));
}
Some(el) => {
match el {
Paragraph { .. } => {
let old_paragraph = elements.pop().unwrap();
if let Paragraph {
elements: paragraph_elements,
} = old_paragraph
{
let mut paragraph_elements = paragraph_elements.clone();
paragraph_elements.push(text_element);
let new_paragraph = Paragraph {
elements: paragraph_elements,
};
elements.push(*Box::new(new_paragraph));
}
}
List { .. } => {
let old_list = elements.pop().unwrap();
// let list = old_list.list_as_ref()?;
if let List {
elements: list_elements,
numbered,
} = old_list
{
let mut list_item_elements = list_elements.clone();
let new_list_item_element = ListItem {
element: text_element,
};
list_item_elements.push(new_list_item_element);
let new_list = List {
elements: list_item_elements,
numbered: numbered,
};
elements.push(*Box::new(new_list));
}
}
_ => {}
}
}
}
}
println!("{}", text);
Ok(())
}
fn split_string(input: &str, max_length: usize) -> Vec<String> {
let mut result = Vec::new();
let mut current_string = String::new();
for char in input.chars() {
if current_string.chars().count() < max_length {
current_string.push(char);
} else {
result.push(current_string);
current_string = char.to_string();
}
}
if !current_string.is_empty() {
result.push(current_string);
}
result
}
#[cfg(test)]
mod tests {
use crate::core::*;
use crate::pdf::Transformer;
use bytes::Bytes;
use std::collections::HashMap;
#[test]
fn test() -> anyhow::Result<()> {
let pdf = std::fs::read("test/data/document.pdf")?;
let pdf_bytes = Bytes::from(pdf);
let parsed = Transformer::parse(&pdf_bytes, &HashMap::new());
assert!(parsed.is_ok());
let parsed_document = parsed.unwrap();
println!("==========================");
println!("{:?}", parsed_document);
println!("==========================");
let generated_result = Transformer::generate(&parsed_document);
assert!(generated_result.is_ok());
Ok(())
}
}