datafusion_common/
error.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! DataFusion error types
19#[cfg(feature = "backtrace")]
20use std::backtrace::{Backtrace, BacktraceStatus};
21
22use std::borrow::Cow;
23use std::collections::VecDeque;
24use std::error::Error;
25use std::fmt::{Display, Formatter};
26use std::io;
27use std::result;
28use std::sync::Arc;
29
30use crate::utils::datafusion_strsim::normalized_levenshtein;
31use crate::utils::quote_identifier;
32use crate::{Column, DFSchema, Diagnostic, TableReference};
33#[cfg(feature = "avro")]
34use apache_avro::Error as AvroError;
35use arrow::error::ArrowError;
36#[cfg(feature = "parquet")]
37use parquet::errors::ParquetError;
38use sqlparser::parser::ParserError;
39use tokio::task::JoinError;
40
41/// Result type for operations that could result in an [DataFusionError]
42pub type Result<T, E = DataFusionError> = result::Result<T, E>;
43
44/// Result type for operations that could result in an [DataFusionError] and needs to be shared (wrapped into `Arc`).
45pub type SharedResult<T> = result::Result<T, Arc<DataFusionError>>;
46
47/// Error type for generic operations that could result in DataFusionError::External
48pub type GenericError = Box<dyn Error + Send + Sync>;
49
50/// DataFusion error
51#[derive(Debug)]
52pub enum DataFusionError {
53    /// Error returned by arrow.
54    ///
55    /// 2nd argument is for optional backtrace
56    ArrowError(ArrowError, Option<String>),
57    /// Error when reading / writing Parquet data.
58    #[cfg(feature = "parquet")]
59    ParquetError(ParquetError),
60    /// Error when reading Avro data.
61    #[cfg(feature = "avro")]
62    AvroError(AvroError),
63    /// Error when reading / writing to / from an object_store (e.g. S3 or LocalFile)
64    #[cfg(feature = "object_store")]
65    ObjectStore(object_store::Error),
66    /// Error when an I/O operation fails
67    IoError(io::Error),
68    /// Error when SQL is syntactically incorrect.
69    ///
70    /// 2nd argument is for optional backtrace
71    SQL(ParserError, Option<String>),
72    /// Error when a feature is not yet implemented.
73    ///
74    /// These errors are sometimes returned for features that are still in
75    /// development and are not entirely complete. Often, these errors are
76    /// tracked in our issue tracker.
77    NotImplemented(String),
78    /// Error due to bugs in DataFusion
79    ///
80    /// This error should not happen in normal usage of DataFusion. It results
81    /// from something that wasn't expected/anticipated by the implementation
82    /// and that is most likely a bug (the error message even encourages users
83    /// to open a bug report). A user should not be able to trigger internal
84    /// errors under normal circumstances by feeding in malformed queries, bad
85    /// data, etc.
86    ///
87    /// Note that I/O errors (or any error that happens due to external systems)
88    /// do NOT fall under this category. See other variants such as
89    /// [`Self::IoError`] and [`Self::External`].
90    ///
91    /// DataFusions has internal invariants that the compiler is not always able
92    /// to check. This error is raised when one of those invariants does not
93    /// hold for some reason.
94    Internal(String),
95    /// Error during planning of the query.
96    ///
97    /// This error happens when the user provides a bad query or plan, for
98    /// example the user attempts to call a function that doesn't exist, or if
99    /// the types of a function call are not supported.
100    Plan(String),
101    /// Error for invalid or unsupported configuration options.
102    Configuration(String),
103    /// Error when there is a problem with the query related to schema.
104    ///
105    /// This error can be returned in cases such as when schema inference is not
106    /// possible and when column names are not unique.
107    ///
108    /// 2nd argument is for optional backtrace
109    /// Boxing the optional backtrace to prevent <https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
110    SchemaError(SchemaError, Box<Option<String>>),
111    /// Error during execution of the query.
112    ///
113    /// This error is returned when an error happens during execution due to a
114    /// malformed input. For example, the user passed malformed arguments to a
115    /// SQL method, opened a CSV file that is broken, or tried to divide an
116    /// integer by zero.
117    Execution(String),
118    /// [`JoinError`] during execution of the query.
119    ///
120    /// This error can't occur for unjoined tasks, such as execution shutdown.
121    ExecutionJoin(JoinError),
122    /// Error when resources (such as memory of scratch disk space) are exhausted.
123    ///
124    /// This error is thrown when a consumer cannot acquire additional memory
125    /// or other resources needed to execute the query from the Memory Manager.
126    ResourcesExhausted(String),
127    /// Errors originating from outside DataFusion's core codebase.
128    ///
129    /// For example, a custom S3Error from the crate datafusion-objectstore-s3
130    External(GenericError),
131    /// Error with additional context
132    Context(String, Box<DataFusionError>),
133    /// Errors from either mapping LogicalPlans to/from Substrait plans
134    /// or serializing/deserializing protobytes to Substrait plans
135    Substrait(String),
136    /// Error wrapped together with additional contextual information intended
137    /// for end users, to help them understand what went wrong by providing
138    /// human-readable messages, and locations in the source query that relate
139    /// to the error in some way.
140    Diagnostic(Box<Diagnostic>, Box<DataFusionError>),
141    /// A collection of one or more [`DataFusionError`]. Useful in cases where
142    /// DataFusion can recover from an erroneous state, and produce more errors
143    /// before terminating. e.g. when planning a SELECT clause, DataFusion can
144    /// synchronize to the next `SelectItem` if the previous one had errors. The
145    /// end result is that the user can see errors about all `SelectItem`,
146    /// instead of just the first one.
147    Collection(Vec<DataFusionError>),
148    /// A [`DataFusionError`] which shares an underlying [`DataFusionError`].
149    ///
150    /// This is useful when the same underlying [`DataFusionError`] is passed
151    /// to multiple receivers. For example, when the source of a repartition
152    /// errors and the error is propagated to multiple consumers.
153    Shared(Arc<DataFusionError>),
154}
155
156#[macro_export]
157macro_rules! context {
158    ($desc:expr, $err:expr) => {
159        $err.context(format!("{} at {}:{}", $desc, file!(), line!()))
160    };
161}
162
163/// Schema-related errors
164#[derive(Debug)]
165pub enum SchemaError {
166    /// Schema contains a (possibly) qualified and unqualified field with same unqualified name
167    AmbiguousReference { field: Column },
168    /// Schema contains duplicate qualified field name
169    DuplicateQualifiedField {
170        qualifier: Box<TableReference>,
171        name: String,
172    },
173    /// Schema contains duplicate unqualified field name
174    DuplicateUnqualifiedField { name: String },
175    /// No field with this name
176    FieldNotFound {
177        field: Box<Column>,
178        valid_fields: Vec<Column>,
179    },
180}
181
182impl Display for SchemaError {
183    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
184        match self {
185            Self::FieldNotFound {
186                field,
187                valid_fields,
188            } => {
189                write!(f, "No field named {}", field.quoted_flat_name())?;
190                let lower_valid_fields = valid_fields
191                    .iter()
192                    .map(|column| column.flat_name().to_lowercase())
193                    .collect::<Vec<String>>();
194
195                let valid_fields_names = valid_fields
196                    .iter()
197                    .map(|column| column.flat_name())
198                    .collect::<Vec<String>>();
199                if lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
200                    write!(
201                        f,
202                        ". Column names are case sensitive. You can use double quotes to refer to the \"{}\" column \
203                        or set the datafusion.sql_parser.enable_ident_normalization configuration",
204                        field.quoted_flat_name()
205                    )?;
206                }
207                let field_name = field.name();
208                if let Some(matched) = valid_fields_names
209                    .iter()
210                    .filter(|str| normalized_levenshtein(str, field_name) >= 0.5)
211                    .collect::<Vec<&String>>()
212                    .first()
213                {
214                    write!(f, ". Did you mean '{matched}'?")?;
215                } else if !valid_fields.is_empty() {
216                    write!(
217                        f,
218                        ". Valid fields are {}",
219                        valid_fields
220                            .iter()
221                            .map(|field| field.quoted_flat_name())
222                            .collect::<Vec<String>>()
223                            .join(", ")
224                    )?;
225                }
226                write!(f, ".")
227            }
228            Self::DuplicateQualifiedField { qualifier, name } => {
229                write!(
230                    f,
231                    "Schema contains duplicate qualified field name {}.{}",
232                    qualifier.to_quoted_string(),
233                    quote_identifier(name)
234                )
235            }
236            Self::DuplicateUnqualifiedField { name } => {
237                write!(
238                    f,
239                    "Schema contains duplicate unqualified field name {}",
240                    quote_identifier(name)
241                )
242            }
243            Self::AmbiguousReference { field } => {
244                if field.relation.is_some() {
245                    write!(
246                        f,
247                        "Schema contains qualified field name {} and unqualified field name {} which would be ambiguous",
248                        field.quoted_flat_name(),
249                        quote_identifier(&field.name)
250                    )
251                } else {
252                    write!(
253                        f,
254                        "Ambiguous reference to unqualified field {}",
255                        field.quoted_flat_name()
256                    )
257                }
258            }
259        }
260    }
261}
262
263impl Error for SchemaError {}
264
265impl From<std::fmt::Error> for DataFusionError {
266    fn from(_e: std::fmt::Error) -> Self {
267        DataFusionError::Execution("Fail to format".to_string())
268    }
269}
270
271impl From<io::Error> for DataFusionError {
272    fn from(e: io::Error) -> Self {
273        DataFusionError::IoError(e)
274    }
275}
276
277impl From<ArrowError> for DataFusionError {
278    fn from(e: ArrowError) -> Self {
279        DataFusionError::ArrowError(e, None)
280    }
281}
282
283impl From<DataFusionError> for ArrowError {
284    fn from(e: DataFusionError) -> Self {
285        match e {
286            DataFusionError::ArrowError(e, _) => e,
287            DataFusionError::External(e) => ArrowError::ExternalError(e),
288            other => ArrowError::ExternalError(Box::new(other)),
289        }
290    }
291}
292
293impl From<&Arc<DataFusionError>> for DataFusionError {
294    fn from(e: &Arc<DataFusionError>) -> Self {
295        if let DataFusionError::Shared(e_inner) = e.as_ref() {
296            // don't re-wrap
297            DataFusionError::Shared(Arc::clone(e_inner))
298        } else {
299            DataFusionError::Shared(Arc::clone(e))
300        }
301    }
302}
303
304#[cfg(feature = "parquet")]
305impl From<ParquetError> for DataFusionError {
306    fn from(e: ParquetError) -> Self {
307        DataFusionError::ParquetError(e)
308    }
309}
310
311#[cfg(feature = "avro")]
312impl From<AvroError> for DataFusionError {
313    fn from(e: AvroError) -> Self {
314        DataFusionError::AvroError(e)
315    }
316}
317
318#[cfg(feature = "object_store")]
319impl From<object_store::Error> for DataFusionError {
320    fn from(e: object_store::Error) -> Self {
321        DataFusionError::ObjectStore(e)
322    }
323}
324
325#[cfg(feature = "object_store")]
326impl From<object_store::path::Error> for DataFusionError {
327    fn from(e: object_store::path::Error) -> Self {
328        DataFusionError::ObjectStore(e.into())
329    }
330}
331
332impl From<ParserError> for DataFusionError {
333    fn from(e: ParserError) -> Self {
334        DataFusionError::SQL(e, None)
335    }
336}
337
338impl From<GenericError> for DataFusionError {
339    fn from(err: GenericError) -> Self {
340        // If the error is already a DataFusionError, not wrapping it.
341        if err.is::<DataFusionError>() {
342            if let Ok(e) = err.downcast::<DataFusionError>() {
343                *e
344            } else {
345                unreachable!()
346            }
347        } else {
348            DataFusionError::External(err)
349        }
350    }
351}
352
353impl Display for DataFusionError {
354    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
355        let error_prefix = self.error_prefix();
356        let message = self.message();
357        write!(f, "{error_prefix}{message}")
358    }
359}
360
361impl Error for DataFusionError {
362    fn source(&self) -> Option<&(dyn Error + 'static)> {
363        match self {
364            DataFusionError::ArrowError(e, _) => Some(e),
365            #[cfg(feature = "parquet")]
366            DataFusionError::ParquetError(e) => Some(e),
367            #[cfg(feature = "avro")]
368            DataFusionError::AvroError(e) => Some(e),
369            #[cfg(feature = "object_store")]
370            DataFusionError::ObjectStore(e) => Some(e),
371            DataFusionError::IoError(e) => Some(e),
372            DataFusionError::SQL(e, _) => Some(e),
373            DataFusionError::NotImplemented(_) => None,
374            DataFusionError::Internal(_) => None,
375            DataFusionError::Configuration(_) => None,
376            DataFusionError::Plan(_) => None,
377            DataFusionError::SchemaError(e, _) => Some(e),
378            DataFusionError::Execution(_) => None,
379            DataFusionError::ExecutionJoin(e) => Some(e),
380            DataFusionError::ResourcesExhausted(_) => None,
381            DataFusionError::External(e) => Some(e.as_ref()),
382            DataFusionError::Context(_, e) => Some(e.as_ref()),
383            DataFusionError::Substrait(_) => None,
384            DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
385            // Can't really make a Collection fit into the mold of "an error has
386            // at most one source", but returning the first one is probably good
387            // idea. Especially since `DataFusionError::Collection` is mostly
388            // meant for consumption by the end user, so shouldn't interfere
389            // with programmatic usage too much. Plus, having 1 or 5 errors
390            // doesn't really change the fact that the query is invalid and
391            // can't be executed.
392            DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error),
393            DataFusionError::Shared(e) => Some(e.as_ref()),
394        }
395    }
396}
397
398impl From<DataFusionError> for io::Error {
399    fn from(e: DataFusionError) -> Self {
400        io::Error::new(io::ErrorKind::Other, e)
401    }
402}
403
404impl DataFusionError {
405    /// The separator between the error message and the backtrace
406    pub const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: ";
407
408    /// Get deepest underlying [`DataFusionError`]
409    ///
410    /// [`DataFusionError`]s sometimes form a chain, such as `DataFusionError::ArrowError()` in order to conform
411    /// to the correct error signature. Thus sometimes there is a chain several layers deep that can obscure the
412    /// original error. This function finds the lowest level DataFusionError possible.
413    ///
414    /// For example,  `find_root` will return`DataFusionError::ResourceExhausted` given the input
415    /// ```text
416    /// DataFusionError::ArrowError
417    ///   ArrowError::External
418    ///    Box(DataFusionError::Context)
419    ///      DataFusionError::ResourceExhausted
420    /// ```
421    ///
422    /// This may be the same as `self`.
423    pub fn find_root(&self) -> &Self {
424        // Note: This is a non-recursive algorithm so we do not run
425        // out of stack space, even for long error chains.
426
427        let mut last_datafusion_error = self;
428        let mut root_error: &dyn Error = self;
429        while let Some(source) = root_error.source() {
430            // walk the next level
431            root_error = source;
432            // remember the lowest datafusion error so far
433            if let Some(e) = root_error.downcast_ref::<DataFusionError>() {
434                last_datafusion_error = e;
435            } else if let Some(e) = root_error.downcast_ref::<Arc<DataFusionError>>() {
436                // As `Arc<T>::source()` calls through to `T::source()` we need to
437                // explicitly match `Arc<DataFusionError>` to capture it
438                last_datafusion_error = e.as_ref();
439            }
440        }
441        // return last checkpoint (which may be the original error)
442        last_datafusion_error
443    }
444
445    /// wraps self in Self::Context with a description
446    pub fn context(self, description: impl Into<String>) -> Self {
447        Self::Context(description.into(), Box::new(self))
448    }
449
450    /// Strips backtrace out of the error message
451    /// If backtrace enabled then error has a format "message" [`Self::BACK_TRACE_SEP`] "backtrace"
452    /// The method strips the backtrace and outputs "message"
453    pub fn strip_backtrace(&self) -> String {
454        self.to_string()
455            .split(Self::BACK_TRACE_SEP)
456            .collect::<Vec<&str>>()
457            .first()
458            .unwrap_or(&"")
459            .to_string()
460    }
461
462    /// To enable optional rust backtrace in DataFusion:
463    /// - [`Setup Env Variables`]<https://doc.rust-lang.org/std/backtrace/index.html#environment-variables>
464    /// - Enable `backtrace` cargo feature
465    ///
466    /// Example:
467    /// cargo build --features 'backtrace'
468    /// RUST_BACKTRACE=1 ./app
469    #[inline(always)]
470    pub fn get_back_trace() -> String {
471        #[cfg(feature = "backtrace")]
472        {
473            let back_trace = Backtrace::capture();
474            if back_trace.status() == BacktraceStatus::Captured {
475                return format!("{}{}", Self::BACK_TRACE_SEP, back_trace);
476            }
477
478            "".to_owned()
479        }
480
481        #[cfg(not(feature = "backtrace"))]
482        "".to_owned()
483    }
484
485    /// Return a [`DataFusionErrorBuilder`] to build a [`DataFusionError`]
486    pub fn builder() -> DataFusionErrorBuilder {
487        DataFusionErrorBuilder::default()
488    }
489
490    fn error_prefix(&self) -> &'static str {
491        match self {
492            DataFusionError::ArrowError(_, _) => "Arrow error: ",
493            #[cfg(feature = "parquet")]
494            DataFusionError::ParquetError(_) => "Parquet error: ",
495            #[cfg(feature = "avro")]
496            DataFusionError::AvroError(_) => "Avro error: ",
497            #[cfg(feature = "object_store")]
498            DataFusionError::ObjectStore(_) => "Object Store error: ",
499            DataFusionError::IoError(_) => "IO error: ",
500            DataFusionError::SQL(_, _) => "SQL error: ",
501            DataFusionError::NotImplemented(_) => {
502                "This feature is not implemented: "
503            }
504            DataFusionError::Internal(_) => "Internal error: ",
505            DataFusionError::Plan(_) => "Error during planning: ",
506            DataFusionError::Configuration(_) => {
507                "Invalid or Unsupported Configuration: "
508            }
509            DataFusionError::SchemaError(_, _) => "Schema error: ",
510            DataFusionError::Execution(_) => "Execution error: ",
511            DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ",
512            DataFusionError::ResourcesExhausted(_) => {
513                "Resources exhausted: "
514            }
515            DataFusionError::External(_) => "External error: ",
516            DataFusionError::Context(_, _) => "",
517            DataFusionError::Substrait(_) => "Substrait error: ",
518            DataFusionError::Diagnostic(_, _) => "",
519            DataFusionError::Collection(errs) => {
520                errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix()
521            }
522            DataFusionError::Shared(_) => "",
523        }
524    }
525
526    pub fn message(&self) -> Cow<str> {
527        match *self {
528            DataFusionError::ArrowError(ref desc, ref backtrace) => {
529                let backtrace = backtrace.clone().unwrap_or("".to_owned());
530                Cow::Owned(format!("{desc}{backtrace}"))
531            }
532            #[cfg(feature = "parquet")]
533            DataFusionError::ParquetError(ref desc) => Cow::Owned(desc.to_string()),
534            #[cfg(feature = "avro")]
535            DataFusionError::AvroError(ref desc) => Cow::Owned(desc.to_string()),
536            DataFusionError::IoError(ref desc) => Cow::Owned(desc.to_string()),
537            DataFusionError::SQL(ref desc, ref backtrace) => {
538                let backtrace: String = backtrace.clone().unwrap_or("".to_owned());
539                Cow::Owned(format!("{desc:?}{backtrace}"))
540            }
541            DataFusionError::Configuration(ref desc) => Cow::Owned(desc.to_string()),
542            DataFusionError::NotImplemented(ref desc) => Cow::Owned(desc.to_string()),
543            DataFusionError::Internal(ref desc) => Cow::Owned(format!(
544                "{desc}.\nThis was likely caused by a bug in DataFusion's \
545            code and we would welcome that you file an bug report in our issue tracker"
546            )),
547            DataFusionError::Plan(ref desc) => Cow::Owned(desc.to_string()),
548            DataFusionError::SchemaError(ref desc, ref backtrace) => {
549                let backtrace: &str =
550                    &backtrace.as_ref().clone().unwrap_or("".to_owned());
551                Cow::Owned(format!("{desc}{backtrace}"))
552            }
553            DataFusionError::Execution(ref desc) => Cow::Owned(desc.to_string()),
554            DataFusionError::ExecutionJoin(ref desc) => Cow::Owned(desc.to_string()),
555            DataFusionError::ResourcesExhausted(ref desc) => Cow::Owned(desc.to_string()),
556            DataFusionError::External(ref desc) => Cow::Owned(desc.to_string()),
557            #[cfg(feature = "object_store")]
558            DataFusionError::ObjectStore(ref desc) => Cow::Owned(desc.to_string()),
559            DataFusionError::Context(ref desc, ref err) => {
560                Cow::Owned(format!("{desc}\ncaused by\n{}", *err))
561            }
562            DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
563            DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
564            // Returning the message of the first error is probably fine enough,
565            // and makes `DataFusionError::Collection` a transparent wrapped,
566            // unless the end user explicitly calls `DataFusionError::iter`.
567            DataFusionError::Collection(ref errs) => errs
568                .first()
569                .expect("cannot construct DataFusionError::Collection with 0 errors")
570                .message(),
571            DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()),
572        }
573    }
574
575    /// Wraps the error with contextual information intended for end users
576    pub fn with_diagnostic(self, diagnostic: Diagnostic) -> Self {
577        Self::Diagnostic(Box::new(diagnostic), Box::new(self))
578    }
579
580    /// Wraps the error with contextual information intended for end users.
581    /// Takes a function that inspects the error and returns the diagnostic to
582    /// wrap it with.
583    pub fn with_diagnostic_fn<F: FnOnce(&DataFusionError) -> Diagnostic>(
584        self,
585        f: F,
586    ) -> Self {
587        let diagnostic = f(&self);
588        self.with_diagnostic(diagnostic)
589    }
590
591    /// Gets the [`Diagnostic`] associated with the error, if any. If there is
592    /// more than one, only the outermost [`Diagnostic`] is returned.
593    pub fn diagnostic(&self) -> Option<&Diagnostic> {
594        struct DiagnosticsIterator<'a> {
595            head: &'a DataFusionError,
596        }
597
598        impl<'a> Iterator for DiagnosticsIterator<'a> {
599            type Item = &'a Diagnostic;
600
601            fn next(&mut self) -> Option<Self::Item> {
602                loop {
603                    if let DataFusionError::Diagnostic(diagnostics, source) = self.head {
604                        self.head = source.as_ref();
605                        return Some(diagnostics);
606                    }
607
608                    if let Some(source) = self
609                        .head
610                        .source()
611                        .and_then(|source| source.downcast_ref::<DataFusionError>())
612                    {
613                        self.head = source;
614                    } else {
615                        return None;
616                    }
617                }
618            }
619        }
620
621        DiagnosticsIterator { head: self }.next()
622    }
623
624    /// Return an iterator over this [`DataFusionError`] and any other
625    /// [`DataFusionError`]s in a [`DataFusionError::Collection`].
626    ///
627    /// Sometimes DataFusion is able to collect multiple errors in a SQL query
628    /// before terminating, e.g. across different expressions in a SELECT
629    /// statements or different sides of a UNION. This method returns an
630    /// iterator over all the errors in the collection.
631    ///
632    /// For this to work, the top-level error must be a
633    /// `DataFusionError::Collection`, not something that contains it.
634    pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> {
635        struct ErrorIterator<'a> {
636            queue: VecDeque<&'a DataFusionError>,
637        }
638
639        impl<'a> Iterator for ErrorIterator<'a> {
640            type Item = &'a DataFusionError;
641
642            fn next(&mut self) -> Option<Self::Item> {
643                loop {
644                    let popped = self.queue.pop_front()?;
645                    match popped {
646                        DataFusionError::Collection(errs) => self.queue.extend(errs),
647                        _ => return Some(popped),
648                    }
649                }
650            }
651        }
652
653        let mut queue = VecDeque::new();
654        queue.push_back(self);
655        ErrorIterator { queue }
656    }
657}
658
659/// A builder for [`DataFusionError`]
660///
661/// This builder can be used to collect multiple errors and return them as a
662/// [`DataFusionError::Collection`].
663///
664/// # Example: no errors
665/// ```
666/// # use datafusion_common::DataFusionError;
667/// let mut builder = DataFusionError::builder();
668/// // ok_or returns the value if no errors have been added
669/// assert_eq!(builder.error_or(42).unwrap(), 42);
670/// ```
671///
672/// # Example: with errors
673/// ```
674/// # use datafusion_common::{assert_contains, DataFusionError};
675/// let mut builder = DataFusionError::builder();
676/// builder.add_error(DataFusionError::Internal("foo".to_owned()));
677/// // ok_or returns the value if no errors have been added
678/// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
679/// ```
680#[derive(Debug, Default)]
681pub struct DataFusionErrorBuilder(Vec<DataFusionError>);
682
683impl DataFusionErrorBuilder {
684    /// Create a new [`DataFusionErrorBuilder`]
685    pub fn new() -> Self {
686        Default::default()
687    }
688
689    /// Add an error to the in progress list
690    ///
691    /// # Example
692    /// ```
693    /// # use datafusion_common::{assert_contains, DataFusionError};
694    /// let mut builder = DataFusionError::builder();
695    /// builder.add_error(DataFusionError::Internal("foo".to_owned()));
696    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
697    /// ```
698    pub fn add_error(&mut self, error: DataFusionError) {
699        self.0.push(error);
700    }
701
702    /// Add an error to the in progress list, returning the builder
703    ///
704    /// # Example
705    /// ```
706    /// # use datafusion_common::{assert_contains, DataFusionError};
707    /// let builder = DataFusionError::builder()
708    ///   .with_error(DataFusionError::Internal("foo".to_owned()));
709    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
710    /// ```
711    pub fn with_error(mut self, error: DataFusionError) -> Self {
712        self.0.push(error);
713        self
714    }
715
716    /// Returns `Ok(ok)` if no errors were added to the builder,
717    /// otherwise returns a `Result::Err`
718    pub fn error_or<T>(self, ok: T) -> Result<T, DataFusionError> {
719        match self.0.len() {
720            0 => Ok(ok),
721            1 => Err(self.0.into_iter().next().expect("length matched 1")),
722            _ => Err(DataFusionError::Collection(self.0)),
723        }
724    }
725}
726
727/// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`.
728/// In normal usage of DataFusion the unwrap should always succeed.
729///
730/// Example: `let values = unwrap_or_internal_err!(values)`
731#[macro_export]
732macro_rules! unwrap_or_internal_err {
733    ($Value: ident) => {
734        $Value.ok_or_else(|| {
735            DataFusionError::Internal(format!(
736                "{} should not be None",
737                stringify!($Value)
738            ))
739        })?
740    };
741}
742
743/// Add a macros for concise  DataFusionError::* errors declaration
744/// supports placeholders the same way as `format!`
745/// Examples:
746///     plan_err!("Error")
747///     plan_err!("Error {}", val)
748///     plan_err!("Error {:?}", val)
749///     plan_err!("Error {val}")
750///     plan_err!("Error {val:?}")
751///
752/// `NAME_ERR` -  macro name for wrapping Err(DataFusionError::*)
753/// `NAME_DF_ERR` -  macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity
754/// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc
755macro_rules! make_error {
756    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
757    (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
758        ::paste::paste!{
759            /// Macro wraps `$ERR` to add backtrace feature
760            #[macro_export]
761            macro_rules! $NAME_DF_ERR {
762                ($d($d args:expr),*) => {
763                    $crate::DataFusionError::$ERR(
764                        ::std::format!(
765                            "{}{}",
766                            ::std::format!($d($d args),*),
767                            $crate::DataFusionError::get_back_trace(),
768                        ).into()
769                    )
770                }
771            }
772
773            /// Macro wraps Err(`$ERR`) to add backtrace feature
774            #[macro_export]
775            macro_rules! $NAME_ERR {
776                ($d($d args:expr),*) => {
777                    Err($crate::[<_ $NAME_DF_ERR>]!($d($d args),*))
778                }
779            }
780
781
782            // Note: Certain macros are used in this  crate, but not all.
783            // This macro generates a use or all of them in case they are needed
784            // so we allow unused code to avoid warnings when they are not used
785            #[doc(hidden)]
786            #[allow(unused)]
787            pub use $NAME_ERR as [<_ $NAME_ERR>];
788            #[doc(hidden)]
789            #[allow(unused)]
790            pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
791        }
792    };
793}
794
795// Exposes a macro to create `DataFusionError::Plan` with optional backtrace
796make_error!(plan_err, plan_datafusion_err, Plan);
797
798// Exposes a macro to create `DataFusionError::Internal` with optional backtrace
799make_error!(internal_err, internal_datafusion_err, Internal);
800
801// Exposes a macro to create `DataFusionError::NotImplemented` with optional backtrace
802make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented);
803
804// Exposes a macro to create `DataFusionError::Execution` with optional backtrace
805make_error!(exec_err, exec_datafusion_err, Execution);
806
807// Exposes a macro to create `DataFusionError::Configuration` with optional backtrace
808make_error!(config_err, config_datafusion_err, Configuration);
809
810// Exposes a macro to create `DataFusionError::Substrait` with optional backtrace
811make_error!(substrait_err, substrait_datafusion_err, Substrait);
812
813// Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace
814make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
815
816// Exposes a macro to create `DataFusionError::SQL` with optional backtrace
817#[macro_export]
818macro_rules! sql_datafusion_err {
819    ($ERR:expr) => {
820        DataFusionError::SQL($ERR, Some(DataFusionError::get_back_trace()))
821    };
822}
823
824// Exposes a macro to create `Err(DataFusionError::SQL)` with optional backtrace
825#[macro_export]
826macro_rules! sql_err {
827    ($ERR:expr) => {
828        Err(datafusion_common::sql_datafusion_err!($ERR))
829    };
830}
831
832// Exposes a macro to create `DataFusionError::ArrowError` with optional backtrace
833#[macro_export]
834macro_rules! arrow_datafusion_err {
835    ($ERR:expr) => {
836        DataFusionError::ArrowError($ERR, Some(DataFusionError::get_back_trace()))
837    };
838}
839
840// Exposes a macro to create `Err(DataFusionError::ArrowError)` with optional backtrace
841#[macro_export]
842macro_rules! arrow_err {
843    ($ERR:expr) => {
844        Err(datafusion_common::arrow_datafusion_err!($ERR))
845    };
846}
847
848// Exposes a macro to create `DataFusionError::SchemaError` with optional backtrace
849#[macro_export]
850macro_rules! schema_datafusion_err {
851    ($ERR:expr) => {
852        $crate::error::DataFusionError::SchemaError(
853            $ERR,
854            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
855        )
856    };
857}
858
859// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional backtrace
860#[macro_export]
861macro_rules! schema_err {
862    ($ERR:expr) => {
863        Err($crate::error::DataFusionError::SchemaError(
864            $ERR,
865            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
866        ))
867    };
868}
869
870// To avoid compiler error when using macro in the same crate:
871// macros from the current crate cannot be referred to by absolute paths
872pub use schema_err as _schema_err;
873
874/// Create a "field not found" DataFusion::SchemaError
875pub fn field_not_found<R: Into<TableReference>>(
876    qualifier: Option<R>,
877    name: &str,
878    schema: &DFSchema,
879) -> DataFusionError {
880    schema_datafusion_err!(SchemaError::FieldNotFound {
881        field: Box::new(Column::new(qualifier, name)),
882        valid_fields: schema.columns().to_vec(),
883    })
884}
885
886/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
887pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
888    schema_datafusion_err!(SchemaError::FieldNotFound {
889        field: Box::new(Column::new_unqualified(name)),
890        valid_fields: schema.columns().to_vec(),
891    })
892}
893
894pub fn add_possible_columns_to_diag(
895    diagnostic: &mut Diagnostic,
896    field: &Column,
897    valid_fields: &[Column],
898) {
899    let field_names: Vec<String> = valid_fields
900        .iter()
901        .filter_map(|f| {
902            if normalized_levenshtein(f.name(), field.name()) >= 0.5 {
903                Some(f.flat_name())
904            } else {
905                None
906            }
907        })
908        .collect();
909
910    for name in field_names {
911        diagnostic.add_note(format!("possible column {}", name), None);
912    }
913}
914
915#[cfg(test)]
916mod test {
917    use std::sync::Arc;
918
919    use crate::error::{DataFusionError, GenericError};
920    use arrow::error::ArrowError;
921
922    #[test]
923    fn datafusion_error_to_arrow() {
924        let res = return_arrow_error().unwrap_err();
925        assert!(res
926            .to_string()
927            .starts_with("External error: Error during planning: foo"));
928    }
929
930    #[test]
931    fn arrow_error_to_datafusion() {
932        let res = return_datafusion_error().unwrap_err();
933        assert_eq!(res.strip_backtrace(), "Arrow error: Schema error: bar");
934    }
935
936    // To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
937    #[cfg(feature = "backtrace")]
938    #[test]
939    #[allow(clippy::unnecessary_literal_unwrap)]
940    fn test_enabled_backtrace() {
941        match std::env::var("RUST_BACKTRACE") {
942            Ok(val) if val == "1" => {}
943            _ => panic!("Environment variable RUST_BACKTRACE must be set to 1"),
944        };
945
946        let res: Result<(), DataFusionError> = plan_err!("Err");
947        let err = res.unwrap_err().to_string();
948        assert!(err.contains(DataFusionError::BACK_TRACE_SEP));
949        assert_eq!(
950            err.split(DataFusionError::BACK_TRACE_SEP)
951                .collect::<Vec<&str>>()
952                .first()
953                .unwrap(),
954            &"Error during planning: Err"
955        );
956        assert!(!err
957            .split(DataFusionError::BACK_TRACE_SEP)
958            .collect::<Vec<&str>>()
959            .get(1)
960            .unwrap()
961            .is_empty());
962    }
963
964    #[cfg(not(feature = "backtrace"))]
965    #[test]
966    #[allow(clippy::unnecessary_literal_unwrap)]
967    fn test_disabled_backtrace() {
968        let res: Result<(), DataFusionError> = plan_err!("Err");
969        let res = res.unwrap_err().to_string();
970        assert!(!res.contains(DataFusionError::BACK_TRACE_SEP));
971        assert_eq!(res, "Error during planning: Err");
972    }
973
974    #[test]
975    fn test_find_root_error() {
976        do_root_test(
977            DataFusionError::Context(
978                "it happened!".to_string(),
979                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
980            ),
981            DataFusionError::ResourcesExhausted("foo".to_string()),
982        );
983
984        do_root_test(
985            DataFusionError::ArrowError(
986                ArrowError::ExternalError(Box::new(DataFusionError::ResourcesExhausted(
987                    "foo".to_string(),
988                ))),
989                None,
990            ),
991            DataFusionError::ResourcesExhausted("foo".to_string()),
992        );
993
994        do_root_test(
995            DataFusionError::External(Box::new(DataFusionError::ResourcesExhausted(
996                "foo".to_string(),
997            ))),
998            DataFusionError::ResourcesExhausted("foo".to_string()),
999        );
1000
1001        do_root_test(
1002            DataFusionError::External(Box::new(ArrowError::ExternalError(Box::new(
1003                DataFusionError::ResourcesExhausted("foo".to_string()),
1004            )))),
1005            DataFusionError::ResourcesExhausted("foo".to_string()),
1006        );
1007
1008        do_root_test(
1009            DataFusionError::ArrowError(
1010                ArrowError::ExternalError(Box::new(ArrowError::ExternalError(Box::new(
1011                    DataFusionError::ResourcesExhausted("foo".to_string()),
1012                )))),
1013                None,
1014            ),
1015            DataFusionError::ResourcesExhausted("foo".to_string()),
1016        );
1017
1018        do_root_test(
1019            DataFusionError::External(Box::new(Arc::new(
1020                DataFusionError::ResourcesExhausted("foo".to_string()),
1021            ))),
1022            DataFusionError::ResourcesExhausted("foo".to_string()),
1023        );
1024
1025        do_root_test(
1026            DataFusionError::External(Box::new(Arc::new(ArrowError::ExternalError(
1027                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
1028            )))),
1029            DataFusionError::ResourcesExhausted("foo".to_string()),
1030        );
1031    }
1032
1033    #[test]
1034    #[allow(clippy::unnecessary_literal_unwrap)]
1035    fn test_make_error_parse_input() {
1036        let res: Result<(), DataFusionError> = plan_err!("Err");
1037        let res = res.unwrap_err();
1038        assert_eq!(res.strip_backtrace(), "Error during planning: Err");
1039
1040        let extra1 = "extra1";
1041        let extra2 = "extra2";
1042
1043        let res: Result<(), DataFusionError> = plan_err!("Err {} {}", extra1, extra2);
1044        let res = res.unwrap_err();
1045        assert_eq!(
1046            res.strip_backtrace(),
1047            "Error during planning: Err extra1 extra2"
1048        );
1049
1050        let res: Result<(), DataFusionError> =
1051            plan_err!("Err {:?} {:#?}", extra1, extra2);
1052        let res = res.unwrap_err();
1053        assert_eq!(
1054            res.strip_backtrace(),
1055            "Error during planning: Err \"extra1\" \"extra2\""
1056        );
1057
1058        let res: Result<(), DataFusionError> = plan_err!("Err {extra1} {extra2}");
1059        let res = res.unwrap_err();
1060        assert_eq!(
1061            res.strip_backtrace(),
1062            "Error during planning: Err extra1 extra2"
1063        );
1064
1065        let res: Result<(), DataFusionError> = plan_err!("Err {extra1:?} {extra2:#?}");
1066        let res = res.unwrap_err();
1067        assert_eq!(
1068            res.strip_backtrace(),
1069            "Error during planning: Err \"extra1\" \"extra2\""
1070        );
1071    }
1072
1073    #[test]
1074    fn external_error() {
1075        // assert not wrapping DataFusionError
1076        let generic_error: GenericError =
1077            Box::new(DataFusionError::Plan("test".to_string()));
1078        let datafusion_error: DataFusionError = generic_error.into();
1079        println!("{}", datafusion_error.strip_backtrace());
1080        assert_eq!(
1081            datafusion_error.strip_backtrace(),
1082            "Error during planning: test"
1083        );
1084
1085        // assert wrapping other Error
1086        let generic_error: GenericError =
1087            Box::new(std::io::Error::new(std::io::ErrorKind::Other, "io error"));
1088        let datafusion_error: DataFusionError = generic_error.into();
1089        println!("{}", datafusion_error.strip_backtrace());
1090        assert_eq!(
1091            datafusion_error.strip_backtrace(),
1092            "External error: io error"
1093        );
1094    }
1095
1096    #[test]
1097    fn external_error_no_recursive() {
1098        let generic_error_1: GenericError =
1099            Box::new(std::io::Error::new(std::io::ErrorKind::Other, "io error"));
1100        let external_error_1: DataFusionError = generic_error_1.into();
1101        let generic_error_2: GenericError = Box::new(external_error_1);
1102        let external_error_2: DataFusionError = generic_error_2.into();
1103
1104        println!("{}", external_error_2);
1105        assert!(external_error_2
1106            .to_string()
1107            .starts_with("External error: io error"));
1108    }
1109
1110    /// Model what happens when implementing SendableRecordBatchStream:
1111    /// DataFusion code needs to return an ArrowError
1112    fn return_arrow_error() -> arrow::error::Result<()> {
1113        // Expect the '?' to work
1114        Err(DataFusionError::Plan("foo".to_string()).into())
1115    }
1116
1117    /// Model what happens when using arrow kernels in DataFusion
1118    /// code: need to turn an ArrowError into a DataFusionError
1119    fn return_datafusion_error() -> crate::error::Result<()> {
1120        // Expect the '?' to work
1121        Err(ArrowError::SchemaError("bar".to_string()).into())
1122    }
1123
1124    fn do_root_test(e: DataFusionError, exp: DataFusionError) {
1125        let e = e.find_root();
1126
1127        // DataFusionError does not implement Eq, so we use a string comparison + some cheap "same variant" test instead
1128        assert_eq!(e.strip_backtrace(), exp.strip_backtrace());
1129        assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),)
1130    }
1131
1132    #[test]
1133    fn test_iter() {
1134        let err = DataFusionError::Collection(vec![
1135            DataFusionError::Plan("a".to_string()),
1136            DataFusionError::Collection(vec![
1137                DataFusionError::Plan("b".to_string()),
1138                DataFusionError::Plan("c".to_string()),
1139            ]),
1140        ]);
1141        let errs = err.iter().collect::<Vec<_>>();
1142        assert_eq!(errs.len(), 3);
1143        assert_eq!(errs[0].strip_backtrace(), "Error during planning: a");
1144        assert_eq!(errs[1].strip_backtrace(), "Error during planning: b");
1145        assert_eq!(errs[2].strip_backtrace(), "Error during planning: c");
1146    }
1147}