Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed problem when passing slices opened from NetCDF files #15

Merged
merged 1 commit into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
## Version 0.1.2 (in development)

### Enhancements

* Introduced new configuration flag `persist_mem_slices`.
If set, in-memory `xr.Dataset` instances will be first persisted to a
temporary Zarr, then reopened, and then appended to the target dataset. [#11]
* Fixed problem where info about closing slice was logged twice. [#9]
* Improved readability of generated configuration documentation.
* Using `requirements-dev.txt` for development package dependencies.

### Fixes

* Fixed problem when passing slices opened from NetCDF files. The error was
`TypeError: VariableEncoding.__init__() got an unexpected keyword argument 'chunksizes'`.
[#14]

* Fixed problem where info about closing slice was logged twice. [#9]


## Version 0.1.1

Metadata fixes in `setup.cfg`. No actual code changes.
Expand Down
34 changes: 34 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,40 @@ def test_variable_defaults(self):
).to_dict(),
)

def test_variable_encoding_from_netcdf(self):
a = xr.DataArray(np.zeros((2, 3, 4)), dims=("time", "y", "x"))
a.encoding.update(chunksizes=(16, 2, 2)) # turned into "chunks"
b = xr.DataArray(np.zeros((2, 3, 4)), dims=("time", "y", "x"))
b.encoding.update(contiguous=True, endian="big") # logs warning
self.assertEqual(
{
"attrs": {},
"dims": {"time": 2, "x": 4, "y": 3},
"variables": {
"a": {
"attrs": {},
"dims": ("time", "y", "x"),
"encoding": {"chunks": (16, 2, 2)},
"shape": (2, 3, 4),
},
"b": {
"attrs": {},
"dims": ("time", "y", "x"),
"encoding": {},
"shape": (2, 3, 4),
},
},
},
DatasetMetadata.from_dataset(
xr.Dataset(
{
"a": a,
"b": b,
}
)
).to_dict(),
)

def test_variable_encoding_normalisation(self):
def normalize(k, v):
metadata = DatasetMetadata.from_dataset(
Expand Down
8 changes: 8 additions & 0 deletions zappend/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import xarray as xr

from .config import merge_configs, DEFAULT_APPEND_DIM
from .log import logger


class Undefined:
Expand All @@ -34,6 +35,7 @@ def __init__(
calendar: str | Undefined = UNDEFINED,
compressor: Codec | None | Undefined = UNDEFINED,
filters: list[Codec] | None | Undefined = UNDEFINED,
**unknown_settings,
):
"""All arguments default to UNDEFINED, so they can be distinguished
from None, which is has a special meaning for some values.
Expand All @@ -47,6 +49,8 @@ def __init__(
self.calendar = calendar
self.compressor = compressor
self.filters = filters
if unknown_settings:
logger.warning("Ignoring unknown encoding settings: %s", unknown_settings)

def to_dict(self):
d = {
Expand Down Expand Up @@ -271,6 +275,10 @@ def _get_effective_variables(
encoding["fill_value"] = fill_value
if "preferred_chunks" in encoding:
encoding.pop("preferred_chunks")
if "chunksizes" in encoding:
chunk_sizes = encoding.pop("chunksizes")
if "chunks" not in encoding:
encoding["chunks"] = chunk_sizes
variables[var_name] = VariableMetadata(
dims=dims, shape=shape, encoding=VariableEncoding(**encoding), attrs=attrs
)
Expand Down