Skip to content

Commit

Permalink
Default zarr.open to open_group if shape is not provided (zarr-develo…
Browse files Browse the repository at this point in the history
…pers#2158)

* Default zarr.open to open_group if shape is not provided

* linting

* Address failing tests

* Add check if store_path contains array to open()

* Allow AsyncArray constructor to accept dictionary metadata

* Explicitly construct array from metadata in open()

* Check if metadata input is dict rather than ArrayMetadata

* fixup

---------

Co-authored-by: Joe Hamman <[email protected]>
Co-authored-by: Joe Hamman <[email protected]>
  • Loading branch information
3 people authored Sep 27, 2024
1 parent 5ca080d commit 1560d21
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 48 deletions.
14 changes: 13 additions & 1 deletion src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import numpy.typing as npt

from zarr.core.array import Array, AsyncArray
from zarr.core.array import Array, AsyncArray, get_array_metadata
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
from zarr.core.config import config
from zarr.core.group import AsyncGroup
Expand Down Expand Up @@ -230,6 +230,18 @@ async def open(
if path is not None:
store_path = store_path / path

if "shape" not in kwargs and mode in {"a", "w", "w-"}:
try:
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
# for v2, the above would already have raised an exception if not an array
zarr_format = metadata_dict["zarr_format"]
is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array"
if is_v3_array or zarr_format == 2:
return AsyncArray(store_path=store_path, metadata=metadata_dict)
except (AssertionError, FileNotFoundError):
pass
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)

try:
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
except KeyError:
Expand Down
103 changes: 57 additions & 46 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,53 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP
raise TypeError


async def get_array_metadata(
store_path: StorePath, zarr_format: ZarrFormat | None = 3
) -> dict[str, Any]:
if zarr_format == 2:
zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
)
if zarray_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format == 3:
zarr_json_bytes = await (store_path / ZARR_JSON).get()
if zarr_json_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format is None:
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARR_JSON).get(),
(store_path / ZARRAY_JSON).get(),
(store_path / ZATTRS_JSON).get(),
)
if zarr_json_bytes is not None and zarray_bytes is not None:
# TODO: revisit this exception type
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zarray objects exist")
if zarr_json_bytes is None and zarray_bytes is None:
raise FileNotFoundError(store_path)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
else:
zarr_format = 2
else:
raise ValueError(f"unexpected zarr_format: {zarr_format}")

metadata_dict: dict[str, Any]
if zarr_format == 2:
# V2 arrays are comprised of a .zarray and .zattrs objects
assert zarray_bytes is not None
metadata_dict = json.loads(zarray_bytes.to_bytes())
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
metadata_dict["attributes"] = zattrs_dict
else:
# V3 arrays are comprised of a zarr.json object
assert zarr_json_bytes is not None
metadata_dict = json.loads(zarr_json_bytes.to_bytes())
return metadata_dict


@dataclass(frozen=True)
class AsyncArray:
metadata: ArrayMetadata
Expand All @@ -115,10 +162,17 @@ class AsyncArray:

def __init__(
self,
metadata: ArrayMetadata,
metadata: ArrayMetadata | dict[str, Any],
store_path: StorePath,
order: Literal["C", "F"] | None = None,
) -> None:
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
if zarr_format == 2:
metadata = ArrayV2Metadata.from_dict(metadata)
else:
metadata = ArrayV3Metadata.from_dict(metadata)

metadata_parsed = parse_array_metadata(metadata)
order_parsed = parse_indexing_order(order or config.get("array.order"))

Expand Down Expand Up @@ -341,51 +395,8 @@ async def open(
zarr_format: ZarrFormat | None = 3,
) -> AsyncArray:
store_path = await make_store_path(store)

if zarr_format == 2:
zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
)
if zarray_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format == 3:
zarr_json_bytes = await (store_path / ZARR_JSON).get()
if zarr_json_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format is None:
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARR_JSON).get(),
(store_path / ZARRAY_JSON).get(),
(store_path / ZATTRS_JSON).get(),
)
if zarr_json_bytes is not None and zarray_bytes is not None:
# TODO: revisit this exception type
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zarray objects exist")
if zarr_json_bytes is None and zarray_bytes is None:
raise FileNotFoundError(store_path)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
else:
zarr_format = 2
else:
raise ValueError(f"unexpected zarr_format: {zarr_format}")

if zarr_format == 2:
# V2 arrays are comprised of a .zarray and .zattrs objects
assert zarray_bytes is not None
zarray_dict = json.loads(zarray_bytes.to_bytes())
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
zarray_dict["attributes"] = zattrs_dict
return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict))
else:
# V3 arrays are comprised of a zarr.json object
assert zarr_json_bytes is not None
return cls(
store_path=store_path,
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
)
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
return cls(store_path=store_path, metadata=metadata_dict)

@property
def ndim(self) -> int:
Expand Down
7 changes: 6 additions & 1 deletion tests/v3/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,12 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None:
z2[:] = 3


def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
# Open without shape argument should default to group
g = zarr.open(store=tmp_path, mode="a")
assert isinstance(g, Group)
await g.store_path.delete()

# 'a' means read/write (create if doesn't exist)
arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3))
assert isinstance(arr, Array)
Expand Down

0 comments on commit 1560d21

Please sign in to comment.