dicom_anonymization/
processor.rs

1use crate::actions::{Action, HashLength};
2use crate::config::{Config, UidRoot};
3use crate::functions::anonymize::Anonymize;
4use crate::functions::date::HashDateAnonymizer;
5use crate::functions::errors::AnonymizeError;
6use crate::functions::hash::HashAnonymizer;
7use crate::functions::uid::UidAnonymizer;
8use crate::hashing::{Blake3Hasher, Hasher};
9use crate::vr;
10use dicom_core::header::Header;
11use dicom_core::value::{CastValueError, Value};
12use dicom_core::{DataElement, PrimitiveValue};
13use dicom_object::mem::InMemElement;
14use dicom_object::{AccessError, DefaultDicomObject};
15use log::warn;
16use std::borrow::Cow;
17use thiserror::Error;
18
19#[derive(Error, Debug, PartialEq)]
20pub enum Error {
21    #[error("Value error: {}", .0.to_lowercase())]
22    ValueError(String),
23
24    #[error("Element error: {}", .0.to_lowercase())]
25    ElementError(String),
26
27    #[error("Anonymization error: {}", .0.to_lowercase())]
28    AnonymizationError(String),
29}
30
31impl From<CastValueError> for Error {
32    fn from(err: CastValueError) -> Self {
33        Error::ValueError(format!("{err}"))
34    }
35}
36
37impl From<AccessError> for Error {
38    fn from(err: AccessError) -> Self {
39        Error::ElementError(format!("{err}"))
40    }
41}
42
43impl From<AnonymizeError> for Error {
44    fn from(err: AnonymizeError) -> Self {
45        Error::AnonymizationError(format!("{err}"))
46    }
47}
48
49pub type Result<T, E = Error> = std::result::Result<T, E>;
50
51pub trait Processor {
52    fn process_element<'a>(
53        &'a self,
54        obj: &DefaultDicomObject,
55        elem: &'a InMemElement,
56    ) -> Result<Option<Cow<'a, InMemElement>>>;
57}
58
59/// A processor for DICOM data elements that applies anonymization rules based on the given configuration
60///
61/// This processor uses a [`Config`] to determine how to transform individual DICOM elements
62/// according to defined anonymization actions like hashing, replacing, or emptying tag values,
63/// or completely removing tags.
64///
65/// Limitation: only top-level DICOM tags are processed for now, not tags nested inside sequences.
66/// This may change in the future.
67#[derive(Debug, Clone, PartialEq)]
68pub struct DataElementProcessor {
69    config: Config,
70}
71
72impl DataElementProcessor {
73    pub fn new(config: Config) -> Self {
74        Self { config }
75    }
76}
77
78impl Processor for DataElementProcessor {
79    /// Process a DICOM data element according to the configured anonymization rules
80    ///
81    /// Takes a DICOM object and one of its elements, applies the appropriate anonymization
82    /// action based on the configuration, and returns the result.
83    ///
84    /// # Arguments
85    ///
86    /// * `obj` - Reference to the DICOM object containing the element
87    /// * `elem` - Reference to the element to be processed
88    ///
89    /// # Returns
90    ///
91    /// Returns a `Result` containing:
92    /// * `Some(Cow<InMemElement>)` - The processed element, either borrowed or owned
93    /// * `None` - If the element should be removed
94    /// * `Err` - If there was an error processing the element
95    fn process_element<'a>(
96        &'a self,
97        obj: &DefaultDicomObject,
98        elem: &'a InMemElement,
99    ) -> Result<Option<Cow<'a, InMemElement>>> {
100        let hasher = Blake3Hasher::new();
101
102        match self.config.get_action(&elem.tag()) {
103            Action::Empty => process_action_empty(elem),
104            Action::Remove => Ok(None),
105            Action::Replace(new_value) => process_change_action_replace(elem, new_value),
106            Action::Hash(hash_length) => process_change_action_hash(elem, &hasher, *hash_length),
107            Action::HashDate(other_tag) => match obj.element(*other_tag) {
108                Ok(other_elem) => {
109                    if let Ok(other_value) = other_elem.value().string() {
110                        process_change_action_hash_date(elem, &hasher, other_value.into())
111                    } else {
112                        warn!(
113                            "did not change tag {} because the other tag {} does not have a valid value",
114                            elem.tag(),
115                            other_tag
116                        );
117                        Ok(Some(Cow::Borrowed(elem)))
118                    }
119                }
120                Err(_) => {
121                    warn!(
122                        "did not change tag {} because the other tag {} is not available",
123                        elem.tag(),
124                        other_tag
125                    );
126                    Ok(Some(Cow::Borrowed(elem)))
127                }
128            },
129            Action::HashUID => {
130                process_change_action_hash_uid(elem, &hasher, self.config.get_uid_root())
131            }
132            Action::Keep | Action::None => Ok(Some(Cow::Borrowed(elem))),
133        }
134    }
135}
136
137fn is_empty_element(elem: &InMemElement) -> bool {
138    elem.value() == &Value::Primitive(PrimitiveValue::Empty)
139}
140
141fn process_change_action_replace<'a>(
142    elem: &'a InMemElement,
143    new_value: &'a str,
144) -> Result<Option<Cow<'a, InMemElement>>> {
145    let mut elem = elem.clone();
146    elem.update_value(|v| {
147        if let Value::Primitive(p) = v {
148            *p = PrimitiveValue::Str(new_value.into());
149        }
150    });
151    Ok(Some(Cow::Owned(elem)))
152}
153
154fn process_change_action_hash<'a, H: Hasher>(
155    elem: &'a InMemElement,
156    hasher: &H,
157    hash_length: Option<HashLength>,
158) -> Result<Option<Cow<'a, InMemElement>>> {
159    if is_empty_element(elem) {
160        return Ok(Some(Cow::Borrowed(elem)));
161    }
162
163    let max_length = vr::max_length(elem.vr());
164    let length = match hash_length {
165        Some(length) => match max_length {
166            Some(max_length) if max_length < length.0 => Some(HashLength(max_length)),
167            _ => Some(HashLength(length.0)),
168        },
169        None => max_length.map(HashLength),
170    };
171
172    let value_anonymizer = HashAnonymizer::new(hasher, length);
173    let elem_value_as_string = elem.value().string()?;
174    let anonymized_value = value_anonymizer.anonymize(elem_value_as_string)?;
175
176    let new_elem = DataElement::new::<PrimitiveValue>(
177        elem.tag(),
178        elem.vr(),
179        PrimitiveValue::from(anonymized_value),
180    );
181    Ok(Some(Cow::Owned(new_elem)))
182}
183
184fn process_change_action_hash_uid<'a, H: Hasher>(
185    elem: &'a InMemElement,
186    hasher: &H,
187    uid_root: &'a UidRoot,
188) -> Result<Option<Cow<'a, InMemElement>>> {
189    if is_empty_element(elem) {
190        return Ok(Some(Cow::Borrowed(elem)));
191    }
192
193    let value_anonymizer = UidAnonymizer::new(hasher, uid_root);
194    let elem_value_as_string = elem.value().string()?;
195    let anonymized_value = value_anonymizer.anonymize(elem_value_as_string)?;
196
197    let new_elem = DataElement::new::<PrimitiveValue>(
198        elem.tag(),
199        elem.vr(),
200        PrimitiveValue::from(anonymized_value),
201    );
202    Ok(Some(Cow::Owned(new_elem)))
203}
204
205fn process_change_action_hash_date<'a, H: Hasher>(
206    elem: &'a InMemElement,
207    hasher: &H,
208    other_value: String,
209) -> Result<Option<Cow<'a, InMemElement>>> {
210    if is_empty_element(elem) {
211        return Ok(Some(Cow::Borrowed(elem)));
212    }
213
214    let value_anonymizer = HashDateAnonymizer::new(hasher, other_value);
215    let elem_value_as_string = elem.value().string()?;
216    let anonymized_value = value_anonymizer.anonymize(elem_value_as_string)?;
217
218    let new_elem = DataElement::new::<PrimitiveValue>(
219        elem.tag(),
220        elem.vr(),
221        PrimitiveValue::from(anonymized_value),
222    );
223    Ok(Some(Cow::Owned(new_elem)))
224}
225
226fn process_action_empty(elem: &InMemElement) -> Result<Option<Cow<InMemElement>>> {
227    let new_elem = DataElement::new::<PrimitiveValue>(elem.tag(), elem.vr(), PrimitiveValue::Empty);
228    Ok(Some(Cow::Owned(new_elem)))
229}
230
231struct DoNothingProcessor;
232
233impl DoNothingProcessor {
234    fn new() -> Self {
235        Self {}
236    }
237}
238
239impl Default for DoNothingProcessor {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245impl Processor for DoNothingProcessor {
246    fn process_element<'a>(
247        &'a self,
248        _obj: &DefaultDicomObject,
249        elem: &'a InMemElement,
250    ) -> Result<Option<Cow<'a, InMemElement>>> {
251        // just return it as is, without any changes
252        Ok(Some(Cow::Borrowed(elem)))
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use crate::config::ConfigBuilder;
260    use dicom_core::header::HasLength;
261    use dicom_core::{header, VR};
262    use dicom_dictionary_std::tags;
263    use dicom_object::meta::FileMetaTableBuilder;
264    use dicom_object::{FileDicomObject, FileMetaTable, InMemDicomObject};
265
266    fn make_file_meta() -> FileMetaTable {
267        FileMetaTableBuilder::new()
268            .media_storage_sop_class_uid("1.2.3")
269            .media_storage_sop_instance_uid("2.3.4")
270            .transfer_syntax("1.2.840.10008.1.2.1") // Explicit VR Little Endian
271            .build()
272            .unwrap()
273    }
274
275    #[test]
276    fn test_is_empty_element() {
277        let elem = InMemElement::new(
278            tags::ACCESSION_NUMBER,
279            VR::SH,
280            Value::Primitive(PrimitiveValue::Empty),
281        );
282        assert!(is_empty_element(&elem));
283    }
284
285    #[test]
286    fn test_process_change_action_replace() {
287        let elem = InMemElement::new(
288            tags::ACCESSION_NUMBER,
289            VR::SH,
290            Value::from("0123456789ABCDEF"),
291        );
292        let processed = process_change_action_replace(&elem, "new_value_123").unwrap();
293        assert_eq!(processed.unwrap().value(), &Value::from("new_value_123"));
294    }
295
296    #[test]
297    fn test_process_change_action_hash() {
298        let elem = InMemElement::new(
299            tags::ACCESSION_NUMBER,
300            VR::SH,
301            Value::from("0123456789ABCDEF"),
302        );
303        let hasher = Blake3Hasher::new();
304        let processed = process_change_action_hash(&elem, &hasher, None).unwrap();
305        assert_eq!(processed.unwrap().value().length(), header::Length(16));
306    }
307
308    #[test]
309    fn test_process_change_action_hash_with_length() {
310        let elem = InMemElement::new(
311            tags::ACCESSION_NUMBER,
312            VR::SH,
313            Value::from("0123456789ABCDEF"),
314        );
315        let hasher = Blake3Hasher::new();
316        let processed = process_change_action_hash(&elem, &hasher, Some(HashLength(10))).unwrap();
317        assert_eq!(processed.unwrap().value().length(), header::Length(10));
318    }
319
320    #[test]
321    fn test_process_change_action_hash_length_more_than_max_length() {
322        let elem = InMemElement::new(
323            tags::ACCESSION_NUMBER,
324            VR::SH,
325            Value::from("0123456789ABCDEF"),
326        );
327        let hasher = Blake3Hasher::new();
328        let processed = process_change_action_hash(&elem, &hasher, Some(HashLength(32))).unwrap();
329        assert_eq!(processed.unwrap().value().length(), header::Length(16));
330    }
331
332    #[test]
333    fn test_process_change_action_hash_empty_input_element() {
334        let elem = InMemElement::new(
335            tags::ACCESSION_NUMBER,
336            VR::SH,
337            Value::Primitive(PrimitiveValue::Empty),
338        );
339        let hasher = Blake3Hasher::new();
340        let processed = process_change_action_hash(&elem, &hasher, Some(HashLength(8))).unwrap();
341        assert_eq!(processed.unwrap().into_owned(), elem);
342    }
343
344    #[test]
345    fn test_process_change_action_hash_uid() {
346        let elem = InMemElement::new(
347            tags::STUDY_INSTANCE_UID,
348            VR::UI,
349            Value::from("12.34.56.78.9"),
350        );
351        let hasher = Blake3Hasher::new();
352        let uid_root = "".parse().unwrap();
353        let processed = process_change_action_hash_uid(&elem, &hasher, &uid_root).unwrap();
354        // make sure it's cut off at the max length for VR UI (i.e. 64)
355        assert_eq!(
356            processed.unwrap().into_owned().value().length(),
357            header::Length(64)
358        );
359    }
360
361    #[test]
362    fn test_process_change_action_hash_uid_with_root() {
363        let elem = InMemElement::new(
364            tags::STUDY_INSTANCE_UID,
365            VR::UI,
366            Value::from("12.34.56.78.9"),
367        );
368        let hasher = Blake3Hasher::new();
369        let uid_root = "9999".parse().unwrap();
370        let processed = process_change_action_hash_uid(&elem, &hasher, &uid_root).unwrap();
371        // make sure it's cut off at the max length for VR UI (i.e. 64)
372        let processed = processed.unwrap();
373        let processed = processed.into_owned();
374        assert_eq!(processed.value().length(), header::Length(64));
375        let processed_value: String = processed.value().to_str().unwrap().into();
376        assert!(processed_value.starts_with("9999."));
377    }
378
379    #[test]
380    fn test_process_change_action_hash_uid_empty_input_element() {
381        let elem = InMemElement::new(
382            tags::STUDY_INSTANCE_UID,
383            VR::UI,
384            Value::Primitive(PrimitiveValue::Empty),
385        );
386        let hasher = Blake3Hasher::new();
387        let uid_root = "".parse().unwrap();
388        let processed = process_change_action_hash_uid(&elem, &hasher, &uid_root).unwrap();
389        assert_eq!(processed.unwrap().into_owned(), elem);
390    }
391
392    #[test]
393    fn test_process_action_empty() {
394        let elem = InMemElement::new(tags::PATIENT_NAME, VR::PN, Value::from("John Doe"));
395        let processed = process_action_empty(&elem).unwrap();
396        let processed = processed.unwrap();
397        assert_eq!(processed.tag(), tags::PATIENT_NAME);
398        assert_eq!(processed.vr(), VR::PN);
399        assert_eq!(processed.value(), &Value::Primitive(PrimitiveValue::Empty));
400    }
401
402    #[test]
403    fn test_process_change_action_hash_date() {
404        let other_value = "203087";
405        let elem = InMemElement::new(tags::STUDY_DATE, VR::DA, Value::from("20010102"));
406        let hasher = Blake3Hasher::new();
407        let processed =
408            process_change_action_hash_date(&elem, &hasher, other_value.into()).unwrap();
409        let processed = processed.unwrap();
410        let processed = processed.into_owned();
411        assert_eq!(processed.value().length(), header::Length(8));
412        assert_eq!(processed.value(), &Value::from("20000921"));
413    }
414
415    #[test]
416    fn test_process_change_action_hash_date_extended_input_date_format() {
417        let other_value = "203087";
418        let elem = InMemElement::new(tags::STUDY_DATE, VR::DA, Value::from("2001-01-02"));
419        let hasher = Blake3Hasher::new();
420        let processed =
421            process_change_action_hash_date(&elem, &hasher, other_value.into()).unwrap();
422        let processed = processed.unwrap();
423        assert_eq!(processed.value().length(), header::Length(10));
424        assert_eq!(processed.value(), &Value::from("2000-09-21"));
425    }
426
427    #[test]
428    fn test_process_change_action_hash_date_empty_input_element() {
429        let elem = InMemElement::new(
430            tags::STUDY_DATE,
431            VR::DA,
432            Value::Primitive(PrimitiveValue::Empty),
433        );
434        let hasher = Blake3Hasher::new();
435        let processed = process_change_action_hash_date(&elem, &hasher, "123456".into()).unwrap();
436        assert_eq!(processed.unwrap().into_owned(), elem);
437    }
438
439    #[test]
440    fn test_process_element_hash_length() {
441        let meta = make_file_meta();
442        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
443
444        obj.put(InMemElement::new(
445            tags::ACCESSION_NUMBER,
446            VR::SH,
447            Value::from("0123456789ABCDEF"),
448        ));
449
450        let config = ConfigBuilder::new()
451            .tag_action(tags::ACCESSION_NUMBER, Action::Hash(None))
452            .build();
453
454        let elem = obj.element(tags::ACCESSION_NUMBER).unwrap();
455        let processor = DataElementProcessor::new(config);
456        let processed = processor.process_element(&obj, elem).unwrap();
457        assert_eq!(processed.unwrap().value().length(), header::Length(16));
458    }
459
460    #[test]
461    fn test_process_element_hash_max_length() {
462        let meta = make_file_meta();
463        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
464
465        obj.put(InMemElement::new(
466            tags::ACCESSION_NUMBER,
467            VR::SH,
468            Value::from("0123456789ABCDEF"),
469        ));
470
471        let config = ConfigBuilder::new()
472            .tag_action(tags::ACCESSION_NUMBER, Action::Hash(Some(HashLength(32))))
473            .build();
474
475        let elem = obj.element(tags::ACCESSION_NUMBER).unwrap();
476        let processor = DataElementProcessor::new(config);
477        let processed = processor.process_element(&obj, elem).unwrap();
478        // new value length should have been cut off at the max length for SH VR, which is 16
479        assert_eq!(processed.unwrap().value().length(), header::Length(16));
480    }
481
482    #[test]
483    fn test_process_element_hash_length_with_value() {
484        let meta = make_file_meta();
485        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
486
487        obj.put(InMemElement::new(
488            tags::ACCESSION_NUMBER,
489            VR::SH,
490            Value::from("0123456789ABCDEF"),
491        ));
492
493        let config = ConfigBuilder::new()
494            .tag_action(tags::ACCESSION_NUMBER, Action::Hash(Some(HashLength(8))))
495            .build();
496
497        let elem = obj.element(tags::ACCESSION_NUMBER).unwrap();
498        let processor = DataElementProcessor::new(config);
499        let processed = processor.process_element(&obj, elem).unwrap();
500        assert_eq!(processed.unwrap().value().length(), header::Length(8));
501    }
502
503    #[test]
504    fn test_process_element_replace() {
505        let meta = make_file_meta();
506        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
507
508        obj.put(InMemElement::new(
509            tags::PATIENT_NAME,
510            VR::PN,
511            Value::from("John Doe"),
512        ));
513
514        let config = ConfigBuilder::new()
515            .tag_action(tags::PATIENT_NAME, Action::Replace("Jane Doe".into()))
516            .build();
517
518        let elem = obj.element(tags::PATIENT_NAME).unwrap();
519        let processor = DataElementProcessor::new(config);
520        let processed = processor.process_element(&obj, elem).unwrap();
521        assert_eq!(processed.unwrap().value(), &Value::from("Jane Doe"));
522    }
523
524    #[test]
525    fn test_process_element_keep() {
526        let meta = make_file_meta();
527        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
528
529        obj.put(InMemElement::new(
530            tags::PATIENT_NAME,
531            VR::PN,
532            Value::from("John Doe"),
533        ));
534
535        let config = ConfigBuilder::new()
536            .tag_action(tags::PATIENT_NAME, Action::Keep)
537            .build();
538
539        let elem = obj.element(tags::PATIENT_NAME).unwrap();
540        let processor = DataElementProcessor::new(config);
541        let processed = processor.process_element(&obj, elem).unwrap();
542        assert_eq!(&processed.unwrap().into_owned(), elem);
543    }
544
545    #[test]
546    fn test_process_element_empty() {
547        let meta = make_file_meta();
548        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
549
550        obj.put(InMemElement::new(
551            tags::PATIENT_NAME,
552            VR::PN,
553            Value::from("John Doe"),
554        ));
555
556        let config = ConfigBuilder::new()
557            .tag_action(tags::PATIENT_NAME, Action::Empty)
558            .build();
559
560        let elem = obj.element(tags::PATIENT_NAME).unwrap();
561        let processor = DataElementProcessor::new(config);
562        let processed = processor.process_element(&obj, elem).unwrap();
563        assert_eq!(
564            processed.unwrap().value(),
565            &Value::Primitive(PrimitiveValue::Empty)
566        );
567    }
568
569    #[test]
570    fn test_process_element_remove() {
571        let meta = make_file_meta();
572        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
573
574        obj.put(InMemElement::new(
575            tags::PATIENT_NAME,
576            VR::PN,
577            Value::from("John Doe"),
578        ));
579
580        let config = ConfigBuilder::new()
581            .tag_action(tags::PATIENT_NAME, Action::Remove)
582            .build();
583
584        let elem = obj.element(tags::PATIENT_NAME).unwrap();
585        let processor = DataElementProcessor::new(config);
586        let processed = processor.process_element(&obj, elem).unwrap();
587        assert_eq!(processed, None);
588    }
589
590    #[test]
591    fn test_do_nothing_processor() {
592        let meta = make_file_meta();
593        let mut obj: FileDicomObject<InMemDicomObject> = FileDicomObject::new_empty_with_meta(meta);
594
595        obj.put(InMemElement::new(
596            tags::PATIENT_NAME,
597            VR::PN,
598            Value::from("John Doe"),
599        ));
600
601        let elem = obj.element(tags::PATIENT_NAME).unwrap();
602        let processor = DoNothingProcessor::new();
603        let processed = processor.process_element(&obj, elem).unwrap();
604        assert_eq!(processed.unwrap().into_owned(), elem.clone());
605    }
606}