llm_chain/document_stores/
in_memory_document_store.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
use std::collections::HashMap;

use crate::document_stores::document_store::*;
use crate::schema::Document;

use async_trait::async_trait;
use serde::{de::DeserializeOwned, Serialize};
use thiserror::Error;

#[derive(Debug, Clone)]
pub struct InMemoryDocument<M>
where
    M: serde::Serialize + serde::de::DeserializeOwned,
{
    page_content: String,
    metadata: Option<M>,
}

impl<M> From<&InMemoryDocument<M>> for Document<M>
where
    M: serde::Serialize + serde::de::DeserializeOwned,
{
    fn from(val: &InMemoryDocument<M>) -> Self {
        let metadata = if let Some(m) = &val.metadata {
            let str = serde_json::to_string(&m).unwrap();
            let cloned = serde_json::from_str::<M>(&str).unwrap();
            Some(cloned)
        } else {
            None
        };

        Document {
            page_content: val.page_content.clone(),
            metadata,
        }
    }
}

impl<M> From<&Document<M>> for InMemoryDocument<M>
where
    M: serde::Serialize + serde::de::DeserializeOwned,
{
    fn from(val: &Document<M>) -> Self {
        let metadata = if let Some(m) = &val.metadata {
            let str = serde_json::to_string(&m).unwrap();
            let cloned = serde_json::from_str::<M>(&str).unwrap();
            Some(cloned)
        } else {
            None
        };

        InMemoryDocument {
            page_content: val.page_content.clone(),
            metadata,
        }
    }
}

#[derive(Debug, Error)]
pub enum InMemoryDocumentStoreError {
    #[error("Serde Error: {0}")]
    Serde(#[from] serde_json::Error),
    #[error("Key \"{0}\" already exists!")]
    KeyConflict(String),
}

impl DocumentStoreError for InMemoryDocumentStoreError {}

pub struct InMemoryDocumentStore<M>
where
    M: Serialize + DeserializeOwned + Send + Sync,
{
    map: HashMap<usize, InMemoryDocument<M>>,
}

impl<M> InMemoryDocumentStore<M>
where
    M: Serialize + DeserializeOwned + Send + Sync,
{
    pub fn new() -> Self {
        InMemoryDocumentStore {
            map: HashMap::new(),
        }
    }
}

impl<M> Default for InMemoryDocumentStore<M>
where
    M: Serialize + DeserializeOwned + Send + Sync,
{
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl<M> DocumentStore<usize, M> for InMemoryDocumentStore<M>
where
    M: Serialize + DeserializeOwned + Send + Sync,
{
    type Error = InMemoryDocumentStoreError;

    async fn get(&self, id: &usize) -> Result<Option<Document<M>>, Self::Error> {
        Ok(self.map.get(id).map(|m| m.into()))
    }

    async fn next_id(&self) -> Result<usize, Self::Error> {
        Ok(self.map.len())
    }

    async fn insert(&mut self, documents: &HashMap<usize, Document<M>>) -> Result<(), Self::Error> {
        for (key, value) in documents.iter() {
            if self.map.contains_key(key) {
                return Err(InMemoryDocumentStoreError::KeyConflict(key.to_string()));
            } else {
                self.map.insert(key.clone(), value.into());
            }
        }

        Ok(())
    }
}