diff --git a/src/dataset.rs b/src/dataset.rs index cf18fce4..5d845d9a 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -2,8 +2,9 @@ use std::{collections::HashMap, sync::Arc}; use crate::{ AddNodeError, ArrayIndices, AttributesTable, ChunkPayload, Dataset, ManifestRef, - ManifestsTable, NodeData, NodeId, NodeStructure, ObjectId, Path, Storage, StructureTable, - UpdateNodeError, UserAttributes, UserAttributesStructure, ZarrArrayMetadata, + ManifestsTable, NodeData, NodeId, NodeStructure, ObjectId, Path, Storage, + StructureTable, UpdateNodeError, UserAttributes, UserAttributesStructure, + ZarrArrayMetadata, }; /// FIXME: what do we want to do with implicit groups? @@ -62,10 +63,7 @@ impl Dataset { ) -> Result<(), UpdateNodeError> { match self.get_node(&path).await { None => Err(UpdateNodeError::NotFound), - Some(NodeStructure { - node_data: NodeData::Array(..), - .. - }) => { + Some(NodeStructure { node_data: NodeData::Array(..), .. }) => { self.updated_arrays.insert(path, metadata); Ok(()) } @@ -99,10 +97,7 @@ impl Dataset { ) -> Result<(), UpdateNodeError> { match self.get_node(&path).await { None => Err(UpdateNodeError::NotFound), - Some(NodeStructure { - node_data: NodeData::Array(..), - .. - }) => { + Some(NodeStructure { node_data: NodeData::Array(..), .. }) => { self.set_chunks.insert((path, coord), data); Ok(()) } @@ -114,17 +109,14 @@ impl Dataset { // FIXME: errors match self.storage.fetch_structure(&self.structure_id).await.ok() { None => 0, - Some(structure) => structure - .iter() - .max_by_key(|s| s.id) - .map_or(0, |node| node.id), + Some(structure) => { + structure.iter().max_by_key(|s| s.id).map_or(0, |node| node.id) + } } } async fn reserve_node_id(&mut self) -> NodeId { - let last = self - .last_node_id - .unwrap_or(self.compute_last_node_id().await); + let last = self.last_node_id.unwrap_or(self.compute_last_node_id().await); let new = last + 1; self.last_node_id = Some(new); new @@ -134,16 +126,11 @@ impl Dataset { // FIXME: we should have errros here, not only None pub async fn get_node(&self, path: &Path) -> Option { - self.get_new_node(path) - .or(self.get_existing_node(path).await) + self.get_new_node(path).or(self.get_existing_node(path).await) } async fn get_existing_node(&self, path: &Path) -> Option { - let structure = self - .storage - .fetch_structure(&self.structure_id) - .await - .ok()?; + let structure = self.storage.fetch_structure(&self.structure_id).await.ok()?; let session_atts = self .updated_attributes .get(path) @@ -199,7 +186,11 @@ impl Dataset { }) } - pub async fn get_chunk(&self, path: &Path, coords: &ArrayIndices) -> Option { + pub async fn get_chunk( + &self, + path: &Path, + coords: &ArrayIndices, + ) -> Option { // FIXME: better error type let node = self.get_node(path).await?; match node.node_data { @@ -207,15 +198,14 @@ impl Dataset { NodeData::Array(_, manifests) => { // check the chunks modified in this session first // TODO: I hate rust forces me to clone to search in a hashmap. How to do better? - let session_chunk = self - .set_chunks - .get(&(path.clone(), coords.clone())) - .cloned(); + let session_chunk = + self.set_chunks.get(&(path.clone(), coords.clone())).cloned(); // If session_chunk is not None we have to return it, because is the update the // user made in the current session // If session_chunk == None, user hasn't modified the chunk in this session and we // need to fallback to fetching the manifests - session_chunk.unwrap_or(self.get_old_chunk(manifests.as_slice(), coords).await) + session_chunk + .unwrap_or(self.get_old_chunk(manifests.as_slice(), coords).await) } } } @@ -227,11 +217,8 @@ impl Dataset { ) -> Option { // FIXME: use manifest extents for manifest in manifests { - let manifest_structure = self - .storage - .fetch_manifests(&manifest.object_id) - .await - .ok()?; + let manifest_structure = + self.storage.fetch_manifests(&manifest.object_id).await.ok()?; if let Some(payload) = manifest_structure .get_chunk_info(coords, &manifest.location) .map(|info| info.payload) @@ -250,11 +237,7 @@ impl Dataset { /// Files that are reused from previous commits are not returned because they don't need saving pub async fn consolidate( &mut self, - ) -> ( - Arc, - Vec>, - Vec>, - ) { + ) -> (Arc, Vec>, Vec>) { todo!() } } @@ -264,9 +247,10 @@ mod tests { use std::{error::Error, num::NonZeroU64, path::PathBuf}; use crate::{ - manifest::mk_manifests_table, storage::InMemoryStorage, structure::mk_structure_table, - ChunkInfo, ChunkKeyEncoding, ChunkRef, ChunkShape, Codecs, DataType, FillValue, Flags, - ManifestExtents, StorageTransformers, TableRegion, + manifest::mk_manifests_table, storage::InMemoryStorage, + structure::mk_structure_table, ChunkInfo, ChunkKeyEncoding, ChunkRef, ChunkShape, + Codecs, DataType, FillValue, Flags, ManifestExtents, StorageTransformers, + TableRegion, }; use super::*; @@ -335,7 +319,9 @@ mod tests { NodeStructure { path: array1_path.clone(), id: array_id, - user_attributes: Some(UserAttributesStructure::Inline("{foo:1}".to_string())), + user_attributes: Some(UserAttributesStructure::Inline( + "{foo:1}".to_string(), + )), node_data: NodeData::Array(zarr_meta1.clone(), vec![manifest_ref]), }, ]; @@ -394,7 +380,9 @@ mod tests { Some(NodeStructure { path: "/group/array2".into(), id: 4, - user_attributes: Some(UserAttributesStructure::Inline("{n:42}".to_string(),)), + user_attributes: Some(UserAttributesStructure::Inline( + "{n:42}".to_string(), + )), node_data: NodeData::Array(zarr_meta2.clone(), vec![]), }) ); @@ -422,16 +410,11 @@ mod tests { let node = ds.get_node(&array1_path).await.unwrap(); assert_eq!( node.user_attributes, - Some(UserAttributesStructure::Inline( - "{updated: true}".to_string() - )) + Some(UserAttributesStructure::Inline("{updated: true}".to_string())) ); // update old array zarr metadata and check it - let new_zarr_meta1 = ZarrArrayMetadata { - shape: vec![2, 2, 3], - ..zarr_meta1 - }; + let new_zarr_meta1 = ZarrArrayMetadata { shape: vec![2, 2, 3], ..zarr_meta1 }; ds.update_array(array1_path.clone(), new_zarr_meta1) .await .map_err(|err| format!("{err:#?}"))?; @@ -455,9 +438,7 @@ mod tests { .await .map_err(|err| format!("{err:#?}"))?; - let chunk = ds - .get_chunk(&array1_path, &ArrayIndices(vec![0, 0, 0])) - .await; + let chunk = ds.get_chunk(&array1_path, &ArrayIndices(vec![0, 0, 0])).await; assert_eq!(chunk, Some(ChunkPayload::Inline(vec![0, 0, 0, 99]))); Ok(()) diff --git a/src/lib.rs b/src/lib.rs index c0d6dafa..9d555475 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,16 +28,14 @@ pub mod structure; use async_trait::async_trait; use manifest::ManifestsTable; -use std::{collections::HashMap, fmt::Display, num::NonZeroU64, path::PathBuf, sync::Arc}; +use std::{ + collections::HashMap, fmt::Display, num::NonZeroU64, path::PathBuf, sync::Arc, +}; use structure::StructureTable; #[derive(Debug, Clone)] pub enum IcechunkFormatError { - FillValueDecodeError { - found_size: usize, - target_size: usize, - target_type: DataType, - }, + FillValueDecodeError { found_size: usize, target_size: usize, target_type: DataType }, NullFillValueError, } @@ -181,7 +179,10 @@ pub enum FillValue { } impl FillValue { - fn from_data_type_and_value(dt: &DataType, value: &[u8]) -> Result { + fn from_data_type_and_value( + dt: &DataType, + value: &[u8], + ) -> Result { use IcechunkFormatError::FillValueDecodeError; match dt { @@ -415,8 +416,7 @@ impl TryFrom<&[u8]> for ObjectId { fn try_from(value: &[u8]) -> Result { let buf = value.try_into(); - buf.map(ObjectId) - .map_err(|_| "Invalid ObjectId buffer length") + buf.map(ObjectId).map_err(|_| "Invalid ObjectId buffer length") } } @@ -543,9 +543,18 @@ pub enum StorageError { /// Implementations are free to assume files are never overwritten. #[async_trait] pub trait Storage { - async fn fetch_structure(&self, id: &ObjectId) -> Result, StorageError>; // FIXME: format flags - async fn fetch_attributes(&self, id: &ObjectId) -> Result, StorageError>; // FIXME: format flags - async fn fetch_manifests(&self, id: &ObjectId) -> Result, StorageError>; // FIXME: format flags + async fn fetch_structure( + &self, + id: &ObjectId, + ) -> Result, StorageError>; // FIXME: format flags + async fn fetch_attributes( + &self, + id: &ObjectId, + ) -> Result, StorageError>; // FIXME: format flags + async fn fetch_manifests( + &self, + id: &ObjectId, + ) -> Result, StorageError>; // FIXME: format flags async fn write_structure( &self, diff --git a/src/manifest.rs b/src/manifest.rs index 6541622d..1c6dd7c1 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use arrow::{ array::{ - Array, AsArray, BinaryArray, FixedSizeBinaryArray, GenericBinaryBuilder, RecordBatch, - StringArray, UInt32Array, UInt64Array, + Array, AsArray, BinaryArray, FixedSizeBinaryArray, GenericBinaryBuilder, + RecordBatch, StringArray, UInt32Array, UInt64Array, }, datatypes::{Field, Schema, UInt32Type, UInt64Type}, }; @@ -19,7 +19,11 @@ pub struct ManifestsTable { } impl ManifestsTable { - pub fn get_chunk_info(&self, coords: &ArrayIndices, region: &TableRegion) -> Option { + pub fn get_chunk_info( + &self, + coords: &ArrayIndices, + region: &TableRegion, + ) -> Option { // FIXME: make this fast, currently it's a linear search // FIXME: return error type let idx = self.get_chunk_info_index(coords, region)?; @@ -31,41 +35,27 @@ impl ManifestsTable { return None; } - let id_col = self - .batch - .column_by_name("array_id")? - .as_primitive_opt::()?; - let coords_col = self - .batch - .column_by_name("coords")? - .as_binary_opt::()?; - let offset_col = self - .batch - .column_by_name("offset")? - .as_primitive_opt::()?; - let length_col = self - .batch - .column_by_name("length")? - .as_primitive_opt::()?; - let inline_col = self - .batch - .column_by_name("inline_data")? - .as_binary_opt::()?; - let chunk_id_col = self - .batch - .column_by_name("chunk_id")? - .as_fixed_size_binary_opt()?; - let virtual_path_col = self - .batch - .column_by_name("virtual_path")? - .as_string_opt::()?; + let id_col = + self.batch.column_by_name("array_id")?.as_primitive_opt::()?; + let coords_col = self.batch.column_by_name("coords")?.as_binary_opt::()?; + let offset_col = + self.batch.column_by_name("offset")?.as_primitive_opt::()?; + let length_col = + self.batch.column_by_name("length")?.as_primitive_opt::()?; + let inline_col = + self.batch.column_by_name("inline_data")?.as_binary_opt::()?; + let chunk_id_col = + self.batch.column_by_name("chunk_id")?.as_fixed_size_binary_opt()?; + let virtual_path_col = + self.batch.column_by_name("virtual_path")?.as_string_opt::()?; // FIXME: do something with extras let _extra_col = self.batch.column_by_name("extra")?.as_string_opt::()?; // These arrays cannot contain null values, we don't need to check using `is_null` let idx = row as usize; let id = id_col.value(idx); - let coords = ArrayIndices::unchecked_try_from_slice(coords_col.value(idx)).ok()?; + let coords = + ArrayIndices::unchecked_try_from_slice(coords_col.value(idx)).ok()?; if inline_col.is_valid(idx) { // we have an inline chunk @@ -97,11 +87,7 @@ impl ManifestsTable { Some(ChunkInfo { node: id, coord: coords, - payload: ChunkPayload::Ref(ChunkRef { - id: chunk_id, - offset, - length, - }), + payload: ChunkPayload::Ref(ChunkRef { id: chunk_id, offset, length }), }) } } @@ -135,11 +121,7 @@ impl ArrayIndices { // FIXME: better error type pub fn try_from_slice(rank: usize, slice: &[u8]) -> Result { if slice.len() != rank * 8 { - Err(format!( - "Invalid slice length {}, expecting {}", - slice.len(), - rank - )) + Err(format!("Invalid slice length {}, expecting {}", slice.len(), rank)) } else { ArrayIndices::unchecked_try_from_slice(slice) } @@ -198,11 +180,7 @@ pub fn mk_manifests_table>(coll: T) -> Manifes virtual_paths.push(None); offsets.push(Some(offset)); } - ChunkPayload::Virtual(VirtualChunkRef { - location, - offset, - length, - }) => { + ChunkPayload::Virtual(VirtualChunkRef { location, offset, length }) => { lengths.push(length); inline_data.push(None); chunk_ids.push(None); @@ -246,7 +224,8 @@ pub fn mk_manifests_table>(coll: T) -> Manifes Field::new("virtual_path", arrow::datatypes::DataType::Utf8, true), Field::new("extra", arrow::datatypes::DataType::Utf8, true), ])); - let batch = RecordBatch::try_new(schema, columns).expect("Error creating record batch"); + let batch = + RecordBatch::try_new(schema, columns).expect("Error creating record batch"); ManifestsTable { batch } } @@ -254,7 +233,9 @@ fn mk_offsets_array>>(coll: T) -> UInt64Array coll.into_iter().collect() } -fn mk_virtual_paths_array>>(coll: T) -> StringArray { +fn mk_virtual_paths_array>>( + coll: T, +) -> StringArray { coll.into_iter().collect() } @@ -284,7 +265,9 @@ fn mk_coords_array>(coll: T) -> BinaryArray builder.finish() } -fn mk_chunk_ids_array>>(coll: T) -> FixedSizeBinaryArray { +fn mk_chunk_ids_array>>( + coll: T, +) -> FixedSizeBinaryArray { let iter = coll.into_iter().map(|oid| oid.map(|oid| oid.0)); FixedSizeBinaryArray::try_from_sparse_iter_with_size(iter, ObjectId::SIZE as i32) .expect("Bad ObjectId size") diff --git a/src/rustfmt.toml b/src/rustfmt.toml deleted file mode 100644 index 7d2cf549..00000000 --- a/src/rustfmt.toml +++ /dev/null @@ -1 +0,0 @@ -merge_imports = true diff --git a/src/storage.rs b/src/storage.rs index 6163d820..3d9b0a5d 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -5,7 +5,9 @@ use std::{ use async_trait::async_trait; -use crate::{AttributesTable, ManifestsTable, ObjectId, Storage, StorageError, StructureTable}; +use crate::{ + AttributesTable, ManifestsTable, ObjectId, Storage, StorageError, StructureTable, +}; #[derive(Default)] pub struct InMemoryStorage { @@ -26,7 +28,10 @@ impl InMemoryStorage { #[async_trait] impl Storage for InMemoryStorage { - async fn fetch_structure(&self, id: &ObjectId) -> Result, StorageError> { + async fn fetch_structure( + &self, + id: &ObjectId, + ) -> Result, StorageError> { self.struct_files .read() .or(Err(StorageError::Deadlock))? @@ -35,7 +40,10 @@ impl Storage for InMemoryStorage { .ok_or(StorageError::NotFound) } - async fn fetch_attributes(&self, id: &ObjectId) -> Result, StorageError> { + async fn fetch_attributes( + &self, + id: &ObjectId, + ) -> Result, StorageError> { self.attr_files .read() .or(Err(StorageError::Deadlock))? @@ -44,7 +52,10 @@ impl Storage for InMemoryStorage { .ok_or(StorageError::NotFound) } - async fn fetch_manifests(&self, id: &ObjectId) -> Result, StorageError> { + async fn fetch_manifests( + &self, + id: &ObjectId, + ) -> Result, StorageError> { self.man_files .read() .or(Err(StorageError::Deadlock))? diff --git a/src/structure.rs b/src/structure.rs index 8c8be553..3be3b8b1 100644 --- a/src/structure.rs +++ b/src/structure.rs @@ -2,9 +2,9 @@ use std::{num::NonZeroU64, sync::Arc}; use arrow::{ array::{ - Array, ArrayRef, AsArray, BinaryArray, FixedSizeBinaryArray, FixedSizeBinaryBuilder, - ListArray, ListBuilder, RecordBatch, StringArray, StringBuilder, StructArray, UInt32Array, - UInt32Builder, UInt8Array, + Array, ArrayRef, AsArray, BinaryArray, FixedSizeBinaryArray, + FixedSizeBinaryBuilder, ListArray, ListBuilder, RecordBatch, StringArray, + StringBuilder, StructArray, UInt32Array, UInt32Builder, UInt8Array, }, datatypes::{Field, Fields, Schema, UInt32Type, UInt64Type, UInt8Type}, }; @@ -12,9 +12,9 @@ use itertools::izip; use crate::{ ChunkKeyEncoding, ChunkShape, Codecs, DataType, DimensionName, FillValue, Flags, - ManifestExtents, ManifestRef, NodeData, NodeId, NodeStructure, NodeType, ObjectId, Path, - StorageTransformers, TableRegion, UserAttributes, UserAttributesRef, UserAttributesStructure, - ZarrArrayMetadata, + ManifestExtents, ManifestRef, NodeData, NodeId, NodeStructure, NodeType, ObjectId, + Path, StorageTransformers, TableRegion, UserAttributes, UserAttributesRef, + UserAttributesStructure, ZarrArrayMetadata, }; pub struct StructureTable { @@ -49,10 +49,7 @@ impl StructureTable { .flatten() .collect(); let data_type = DataType::try_from( - self.batch - .column_by_name("data_type")? - .as_string_opt::()? - .value(idx), + self.batch.column_by_name("data_type")?.as_string_opt::()?.value(idx), ) .ok()?; let chunk_shape = ChunkShape( @@ -81,22 +78,16 @@ impl StructureTable { .to_string(), ); - let storage_transformers = self - .batch - .column_by_name("storage_transformers")? - .as_string_opt::()?; + let storage_transformers = + self.batch.column_by_name("storage_transformers")?.as_string_opt::()?; let storage_transformers = if storage_transformers.is_null(idx) { None } else { - Some(StorageTransformers( - storage_transformers.value(idx).to_string(), - )) + Some(StorageTransformers(storage_transformers.value(idx).to_string())) }; - let dimension_names = self - .batch - .column_by_name("dimension_names")? - .as_list_opt::()?; + let dimension_names = + self.batch.column_by_name("dimension_names")?.as_list_opt::()?; let dimension_names = if dimension_names.is_null(idx) { None } else { @@ -110,11 +101,8 @@ impl StructureTable { ) }; - let encoded_fill_value = self - .batch - .column_by_name("fill_value")? - .as_binary_opt::()? - .value(idx); + let encoded_fill_value = + self.batch.column_by_name("fill_value")?.as_binary_opt::()?.value(idx); let fill_value = FillValue::from_data_type_and_value(&data_type, encoded_fill_value).ok()?; @@ -132,10 +120,8 @@ impl StructureTable { // FIXME: there should be a failure reason here, so return a Result fn build_manifest_refs(&self, idx: usize) -> Option> { - let manifest_refs_array = self - .batch - .column_by_name("manifest_references")? - .as_struct_opt()?; + let manifest_refs_array = + self.batch.column_by_name("manifest_references")?.as_struct_opt()?; if manifest_refs_array.is_valid(idx) { let refs = manifest_refs_array .column_by_name("reference")? @@ -176,21 +162,11 @@ impl StructureTable { } fn build_node_structure(&self, idx: usize) -> Option { - let node_type = self - .batch - .column_by_name("type")? - .as_string_opt::()? - .value(idx); - let id = self - .batch - .column_by_name("id")? - .as_primitive_opt::()? - .value(idx); - let path = self - .batch - .column_by_name("path")? - .as_string_opt::()? - .value(idx); + let node_type = + self.batch.column_by_name("type")?.as_string_opt::()?.value(idx); + let id = + self.batch.column_by_name("id")?.as_primitive_opt::()?.value(idx); + let path = self.batch.column_by_name("path")?.as_string_opt::()?.value(idx); let user_attributes = self.build_user_attributes(idx); match node_type { "group" => Some(NodeStructure { @@ -213,14 +189,10 @@ impl StructureTable { } fn build_user_attributes(&self, idx: usize) -> Option { - let inline = self - .batch - .column_by_name("user_attributes")? - .as_string_opt::()?; + let inline = + self.batch.column_by_name("user_attributes")?.as_string_opt::()?; if inline.is_valid(idx) { - Some(UserAttributesStructure::Inline( - inline.value(idx).to_string(), - )) + Some(UserAttributesStructure::Inline(inline.value(idx).to_string())) } else { self.build_user_attributes_ref(idx) } @@ -284,17 +256,11 @@ where T: IntoIterator>, P: IntoIterator, { - let iter = coll - .into_iter() - .map(|opt| opt.map(|p| p.into_iter().map(Some))); + let iter = coll.into_iter().map(|opt| opt.map(|p| p.into_iter().map(Some))); // I don't know how to create a ListArray that has not nullable elements let res = ListArray::from_iter_primitive::(iter); let (_, offsets, values, nulls) = res.into_parts(); - let field = Arc::new(Field::new( - "item", - arrow::datatypes::DataType::UInt64, - false, - )); + let field = Arc::new(Field::new("item", arrow::datatypes::DataType::UInt64, false)); ListArray::new(field, offsets, values, nulls) } @@ -312,11 +278,7 @@ where .map(|opt| opt.map(|p| p.0.iter().map(|n| Some(n.get())).collect::>())); let res = ListArray::from_iter_primitive::(iter); let (_, offsets, values, nulls) = res.into_parts(); - let field = Arc::new(Field::new( - "item", - arrow::datatypes::DataType::UInt64, - false, - )); + let field = Arc::new(Field::new("item", arrow::datatypes::DataType::UInt64, false)); ListArray::new(field, offsets, values, nulls) } @@ -332,9 +294,7 @@ fn mk_fill_values_array(coll: T) -> BinaryArray where T: IntoIterator>, { - let iter = coll - .into_iter() - .map(|fv| fv.as_ref().map(|f| f.to_be_bytes())); + let iter = coll.into_iter().map(|fv| fv.as_ref().map(|f| f.to_be_bytes())); BinaryArray::from_iter(iter) } @@ -376,7 +336,9 @@ fn mk_user_attributes_ref_array>>( .expect("Bad ObjectId size") } -fn mk_user_attributes_row_array>>(coll: T) -> UInt32Array { +fn mk_user_attributes_row_array>>( + coll: T, +) -> UInt32Array { UInt32Array::from_iter(coll) } @@ -385,7 +347,8 @@ where T: IntoIterator>, P: IntoIterator, { - let mut ref_array = ListBuilder::new(FixedSizeBinaryBuilder::new(ObjectId::SIZE as i32)); + let mut ref_array = + ListBuilder::new(FixedSizeBinaryBuilder::new(ObjectId::SIZE as i32)); let mut from_row_array = ListBuilder::new(UInt32Builder::new()); let mut to_row_array = ListBuilder::new(UInt32Builder::new()); @@ -425,19 +388,11 @@ where let ref_array = ListArray::new(field, offsets, values, nulls); let (_, offsets, values, nulls) = from_row_array.into_parts(); - let field = Arc::new(Field::new( - "item", - arrow::datatypes::DataType::UInt32, - false, - )); + let field = Arc::new(Field::new("item", arrow::datatypes::DataType::UInt32, false)); let from_row_array = ListArray::new(field, offsets, values, nulls); let (_, offsets, values, nulls) = to_row_array.into_parts(); - let field = Arc::new(Field::new( - "item", - arrow::datatypes::DataType::UInt32, - false, - )); + let field = Arc::new(Field::new("item", arrow::datatypes::DataType::UInt32, false)); let to_row_array = ListArray::new(field, offsets, values, nulls); StructArray::from(vec![ @@ -473,7 +428,9 @@ where } // For testing only -pub fn mk_structure_table>(coll: T) -> StructureTable { +pub fn mk_structure_table>( + coll: T, +) -> StructureTable { let mut ids = Vec::new(); let mut types = Vec::new(); let mut paths = Vec::new(); @@ -591,18 +548,10 @@ pub fn mk_structure_table>(coll: T) -> Str Field::new("item", arrow::datatypes::DataType::UInt64, false), true, ), - Field::new( - "chunk_key_encoding", - arrow::datatypes::DataType::UInt8, - true, - ), + Field::new("chunk_key_encoding", arrow::datatypes::DataType::UInt8, true), Field::new("fill_value", arrow::datatypes::DataType::Binary, true), Field::new("codecs", arrow::datatypes::DataType::Utf8, true), - Field::new( - "storage_transformers", - arrow::datatypes::DataType::Utf8, - true, - ), + Field::new("storage_transformers", arrow::datatypes::DataType::Utf8, true), Field::new_list( "dimension_names", Field::new("item", arrow::datatypes::DataType::Utf8, true), @@ -614,11 +563,7 @@ pub fn mk_structure_table>(coll: T) -> Str arrow::datatypes::DataType::FixedSizeBinary(ObjectId::SIZE as i32), true, ), - Field::new( - "user_attributes_row", - arrow::datatypes::DataType::UInt32, - true, - ), + Field::new("user_attributes_row", arrow::datatypes::DataType::UInt32, true), Field::new( "manifest_references", arrow::datatypes::DataType::Struct(Fields::from(vec![ @@ -626,7 +571,9 @@ pub fn mk_structure_table>(coll: T) -> Str "reference", Field::new( "item", - arrow::datatypes::DataType::FixedSizeBinary(ObjectId::SIZE as i32), + arrow::datatypes::DataType::FixedSizeBinary( + ObjectId::SIZE as i32, + ), false, ), true, @@ -645,7 +592,8 @@ pub fn mk_structure_table>(coll: T) -> Str true, ), ])); - let batch = RecordBatch::try_new(schema, columns).expect("Error creating record batch"); + let batch = + RecordBatch::try_new(schema, columns).expect("Error creating record batch"); StructureTable { batch } } @@ -671,13 +619,16 @@ mod strategies { any::().prop_map(FillValue::Float16), any::().prop_map(FillValue::Float32), any::().prop_map(FillValue::Float64), - (any::(), any::()).prop_map(|(real, imag)| FillValue::Complex64(real, imag)), - (any::(), any::()).prop_map(|(real, imag)| FillValue::Complex128(real, imag)), + (any::(), any::()) + .prop_map(|(real, imag)| FillValue::Complex64(real, imag)), + (any::(), any::()) + .prop_map(|(real, imag)| FillValue::Complex128(real, imag)), vec(any::(), 0..64).prop_map(FillValue::RawBits), ] } - pub(crate) fn fill_values_vec_strategy() -> impl Strategy>> { + pub(crate) fn fill_values_vec_strategy( + ) -> impl Strategy>> { use proptest::collection::vec; vec(proptest::option::of(fill_value_strategy()), 0..10) } @@ -718,10 +669,8 @@ mod tests { fill_value: FillValue::Int32(0i32), ..zarr_meta1.clone() }; - let zarr_meta3 = ZarrArrayMetadata { - dimension_names: None, - ..zarr_meta2.clone() - }; + let zarr_meta3 = + ZarrArrayMetadata { dimension_names: None, ..zarr_meta2.clone() }; let man_ref1 = ManifestRef { object_id: ObjectId::random(), location: TableRegion(0, 1), @@ -758,7 +707,9 @@ mod tests { NodeStructure { path: "/b/c".into(), id: 4, - user_attributes: Some(UserAttributesStructure::Inline("some inline".to_string())), + user_attributes: Some(UserAttributesStructure::Inline( + "some inline".to_string(), + )), node_data: NodeData::Group, }, NodeStructure { @@ -796,7 +747,9 @@ mod tests { Some(NodeStructure { path: "/b/c".into(), id: 4, - user_attributes: Some(UserAttributesStructure::Inline("some inline".to_string())), + user_attributes: Some(UserAttributesStructure::Inline( + "some inline".to_string() + )), node_data: NodeData::Group, }), ); @@ -889,10 +842,8 @@ mod tests { Some(FillValue::RawBits(vec![b'1'])), ]; - let dtypes: Vec> = fill_values - .iter() - .map(|x| x.as_ref().map(|x| x.get_data_type())) - .collect(); + let dtypes: Vec> = + fill_values.iter().map(|x| x.as_ref().map(|x| x.get_data_type())).collect(); let encoded = mk_fill_values_array(fill_values.clone()); let decoded = decode_fill_values_array(dtypes, encoded).unwrap();