arrow_json/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Transfer data between the Arrow memory format and JSON line-delimited records.
//!
//! See the module level documentation for the
//! [`reader`] and [`writer`] for usage examples.
//!
//! # Binary Data
//!
//! As per [RFC7159] JSON cannot encode arbitrary binary data. A common approach to workaround
//! this is to use a [binary-to-text encoding] scheme, such as base64, to encode the
//! input data and then decode it on output.
//!
//! ```
//! # use std::io::Cursor;
//! # use std::sync::Arc;
//! # use arrow_array::{BinaryArray, RecordBatch, StringArray};
//! # use arrow_array::cast::AsArray;
//! # use arrow_cast::base64::{b64_decode, b64_encode, BASE64_STANDARD};
//! # use arrow_json::{LineDelimitedWriter, ReaderBuilder};
//! #
//! // The data we want to write
//! let input = BinaryArray::from(vec![b"\xDE\x00\xFF".as_ref()]);
//!
//! // Base64 encode it to a string
//! let encoded: StringArray = b64_encode(&BASE64_STANDARD, &input);
//!
//! // Write the StringArray to JSON
//! let batch = RecordBatch::try_from_iter([("col", Arc::new(encoded) as _)]).unwrap();
//! let mut buf = Vec::with_capacity(1024);
//! let mut writer = LineDelimitedWriter::new(&mut buf);
//! writer.write(&batch).unwrap();
//! writer.finish().unwrap();
//!
//! // Read the JSON data
//! let cursor = Cursor::new(buf);
//! let mut reader = ReaderBuilder::new(batch.schema()).build(cursor).unwrap();
//! let batch = reader.next().unwrap().unwrap();
//!
//! // Reverse the base64 encoding
//! let col: BinaryArray = batch.column(0).as_string::<i32>().clone().into();
//! let output = b64_decode(&BASE64_STANDARD, &col).unwrap();
//!
//! assert_eq!(input, output);
//! ```
//!
//! [RFC7159]: https://datatracker.ietf.org/doc/html/rfc7159#section-8.1
//! [binary-to-text encoding]: https://en.wikipedia.org/wiki/Binary-to-text_encoding
//!
#![deny(rustdoc::broken_intra_doc_links)]
#![warn(missing_docs)]
pub mod reader;
pub mod writer;
pub use self::reader::{Reader, ReaderBuilder};
pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer, WriterBuilder};
use half::f16;
use serde_json::{Number, Value};
/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
pub trait JsonSerializable: 'static {
/// Converts self into json value if its possible
fn into_json_value(self) -> Option<Value>;
}
macro_rules! json_serializable {
($t:ty) => {
impl JsonSerializable for $t {
fn into_json_value(self) -> Option<Value> {
Some(self.into())
}
}
};
}
json_serializable!(bool);
json_serializable!(u8);
json_serializable!(u16);
json_serializable!(u32);
json_serializable!(u64);
json_serializable!(i8);
json_serializable!(i16);
json_serializable!(i32);
json_serializable!(i64);
impl JsonSerializable for i128 {
fn into_json_value(self) -> Option<Value> {
// Serialize as string to avoid issues with arbitrary_precision serde_json feature
// - https://github.com/serde-rs/json/issues/559
// - https://github.com/serde-rs/json/issues/845
// - https://github.com/serde-rs/json/issues/846
Some(self.to_string().into())
}
}
impl JsonSerializable for f16 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(f64::round(f64::from(self) * 1000.0) / 1000.0).map(Value::Number)
}
}
impl JsonSerializable for f32 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(Value::Number)
}
}
impl JsonSerializable for f64 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(self).map(Value::Number)
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::Value::{Bool, Number as VNumber, String as VString};
#[test]
fn test_arrow_native_type_to_json() {
assert_eq!(Some(Bool(true)), true.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
assert_eq!(Some(VString("1".to_string())), 1i128.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
assert_eq!(
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
0.01.into_json_value()
);
assert_eq!(
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
0.01f64.into_json_value()
);
assert_eq!(None, f32::NAN.into_json_value());
}
}