Skip to content

Commit

Permalink
Backport PR #1774: (fix): python debugger dask h5 meta array (#1775)
Browse files Browse the repository at this point in the history
Co-authored-by: Ilan Gold <[email protected]>
  • Loading branch information
meeseeksmachine and ilan-gold authored Nov 22, 2024
1 parent 0143681 commit 0d43eb3
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
6 changes: 4 additions & 2 deletions src/anndata/_io/specs/lazy_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def read_h5_array(
c if c not in {None, -1} else s for c, s in zip(chunks, shape, strict=True)
)
if chunks is not None
else (_DEFAULT_STRIDE,) * len(shape)
else tuple(min(_DEFAULT_STRIDE, s) for s in shape)
)

chunk_layout = tuple(
Expand All @@ -159,7 +159,9 @@ def read_h5_array(
)

make_chunk = partial(make_dask_chunk, path, elem_name)
return da.map_blocks(make_chunk, dtype=dtype, chunks=chunk_layout)
return da.map_blocks(
make_chunk, dtype=dtype, chunks=chunk_layout, meta=np.array([])
)


@_LAZY_REGISTRY.register_read(ZarrArray, IOSpec("array", "0.2.0"))
Expand Down
27 changes: 19 additions & 8 deletions tests/test_io_elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,25 @@ def store(request, tmp_path) -> H5Group | ZarrGroup:

sparse_formats = ["csr", "csc"]
SIZE = 2500
DEFAULT_SHAPE = (SIZE, SIZE * 2)


@pytest.fixture(params=sparse_formats)
def sparse_format(request):
return request.param


def create_dense_store(store, n_dims: int = 2):
X = np.random.randn(*[SIZE * (i + 1) for i in range(n_dims)])
def create_dense_store(
store: str, *, shape: tuple[int, ...] = DEFAULT_SHAPE
) -> H5Group | ZarrGroup:
X = np.random.randn(*shape)

write_elem(store, "X", X)
return store


def create_sparse_store(
sparse_format: Literal["csc", "csr"], store: G, shape=(SIZE, SIZE * 2)
sparse_format: Literal["csc", "csr"], store: G, shape=DEFAULT_SHAPE
) -> G:
"""Returns a store
Expand Down Expand Up @@ -289,7 +292,7 @@ def test_read_lazy_2d_dask(sparse_format, store):
],
)
def test_read_lazy_subsets_nd_dask(store, n_dims, chunks):
arr_store = create_dense_store(store, n_dims)
arr_store = create_dense_store(store, shape=DEFAULT_SHAPE[:n_dims])
X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks)
X_from_disk = read_elem(arr_store["X"])
assert_equal(X_from_disk, X_dask_from_disk)
Expand Down Expand Up @@ -317,6 +320,14 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
assert_equal(X_from_disk, X_dask_from_disk)


def test_undersized_shape_to_default(store: H5Group | ZarrGroup):
shape = (3000, 50)
arr_store = create_dense_store(store, shape=shape)
X_dask_from_disk = read_elem_as_dask(arr_store["X"])
assert (c < s for c, s in zip(X_dask_from_disk.chunksize, shape))
assert X_dask_from_disk.shape == shape


@pytest.mark.parametrize(
("arr_type", "chunks", "expected_chunksize"),
[
Expand All @@ -329,10 +340,10 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path):
("csc", (-1, 10), (SIZE, 10)),
("csr", (10, None), (10, SIZE * 2)),
("csc", (None, 10), (SIZE, 10)),
("csc", (None, None), (SIZE, SIZE * 2)),
("csr", (None, None), (SIZE, SIZE * 2)),
("csr", (-1, -1), (SIZE, SIZE * 2)),
("csc", (-1, -1), (SIZE, SIZE * 2)),
("csc", (None, None), DEFAULT_SHAPE),
("csr", (None, None), DEFAULT_SHAPE),
("csr", (-1, -1), DEFAULT_SHAPE),
("csc", (-1, -1), DEFAULT_SHAPE),
],
)
def test_read_lazy_2d_chunk_kwargs(
Expand Down

0 comments on commit 0d43eb3

Please sign in to comment.