Skip to content

Commit

Permalink
DAS-2232 -small functions added to support the main solution in the t… (
Browse files Browse the repository at this point in the history
#16)

* DAS-2232 -small functions added to support the main solution in the ticket

* DAS-2232 - updated notebook version to 7.2.2

* DAS-2232 - Updates based on PR feedback

* DAS-2232 - updates to unit tests basd on PR feedback

* DAS-2232 - removed commented code

* DAS-2232 - updated get_valid_indices method

* DAS-2232 - updated get_valid_indices method and some unit tests based on PR feedback

* DAS-2232 - updated get_variables_with_anonymous_dims

* DAS-2232 - added unit tests for any_absent_dimension_variables funciton
  • Loading branch information
sudha-murthy authored Nov 1, 2024
1 parent 065415e commit 3fc06f0
Show file tree
Hide file tree
Showing 9 changed files with 4,176 additions and 50 deletions.
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@
#
harmony-py~=0.4.10
netCDF4~=1.6.4
notebook~=7.0.4
notebook~=7.2.2
xarray~=2023.9.0
220 changes: 220 additions & 0 deletions hoss/coordinate_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
""" This module contains utility functions used for
coordinate variables and functions to convert the
coordinate variable data to projected x/y dimension values
"""

import numpy as np
from netCDF4 import Dataset

# from numpy import ndarray
from varinfo import VariableFromDmr, VarInfoFromDmr

from hoss.exceptions import (
IncompatibleCoordinateVariables,
InvalidCoordinateDataset,
InvalidCoordinateVariable,
MissingCoordinateVariable,
MissingVariable,
)


def get_projected_dimension_names(varinfo: VarInfoFromDmr, variable_name: str) -> str:
"""returns the x-y projection variable names that would
match the group of the input variable. The 'projected_y' dimension
and 'projected_x' names are returned with the group pathname
"""
variable = varinfo.get_variable(variable_name)

if variable is not None:
projected_dimension_names = [
f'{variable.group_path}/projected_y',
f'{variable.group_path}/projected_x',
]
else:
raise MissingVariable(variable_name)

return projected_dimension_names


def get_projected_dimension_names_from_coordinate_variables(
varinfo: VarInfoFromDmr,
variable_name: str,
) -> list[str]:
"""
Returns the projected dimensions names from coordinate variables
"""
latitude_coordinates, longitude_coordinates = get_coordinate_variables(
varinfo, [variable_name]
)

if len(latitude_coordinates) == 1 and len(longitude_coordinates) == 1:
projected_dimension_names = get_projected_dimension_names(
varinfo, latitude_coordinates[0]
)

# if the override is the variable
elif (
varinfo.get_variable(variable_name).is_latitude()
or varinfo.get_variable(variable_name).is_longitude()
):
projected_dimension_names = get_projected_dimension_names(
varinfo, variable_name
)
else:
projected_dimension_names = []
return projected_dimension_names


def get_variables_with_anonymous_dims(
varinfo: VarInfoFromDmr, variables: set[str]
) -> set[str]:
"""
returns a set of variables without any dimensions
associated with it
"""

return set(
variable
for variable in variables
if (len(varinfo.get_variable(variable).dimensions) == 0)
or (any_absent_dimension_variables(varinfo, variable))
)


def any_absent_dimension_variables(varinfo: VarInfoFromDmr, variable: str) -> bool:
"""returns variable with fake dimensions - dimensions
that have been created by opendap, but are not really
dimension variables
"""
return any(
varinfo.get_variable(dimension) is None
for dimension in varinfo.get_variable(variable).dimensions
)


def get_coordinate_variables(
varinfo: VarInfoFromDmr,
requested_variables: list,
) -> tuple[list, list]:
"""This function returns latitude and longitude variables listed in the
CF-Convention coordinates metadata attribute. It returns them in a specific
order [latitude, longitude]"
"""

coordinate_variables_list = varinfo.get_references_for_attribute(
requested_variables, 'coordinates'
)
latitude_coordinate_variables = [
coordinate
for coordinate in coordinate_variables_list
if varinfo.get_variable(coordinate).is_latitude()
]

longitude_coordinate_variables = [
coordinate
for coordinate in coordinate_variables_list
if varinfo.get_variable(coordinate).is_longitude()
]

return latitude_coordinate_variables, longitude_coordinate_variables


def get_row_col_sizes_from_coordinate_datasets(
lat_arr: np.ndarray,
lon_arr: np.ndarray,
) -> tuple[int, int]:
"""
This function returns the row and column sizes of the coordinate datasets
"""
# ToDo - if the coordinates are 3D
if lat_arr.ndim > 1 and lon_arr.shape == lat_arr.shape:
col_size = lat_arr.shape[1]
row_size = lat_arr.shape[0]
elif (
lat_arr.ndim == 1
and lon_arr.ndim == 1
and lat_arr.size > 0
and lon_arr.size > 0
):
# Todo: The ordering needs to be checked
col_size = lon_arr.size
row_size = lat_arr.size
else:
raise IncompatibleCoordinateVariables(lon_arr.shape, lat_arr.shape)
return row_size, col_size


def get_coordinate_array(
prefetch_dataset: Dataset,
coordinate_name: str,
) -> np.ndarray:
"""This function returns the `numpy` array from a
coordinate dataset.
"""
try:
coordinate_array = prefetch_dataset[coordinate_name][:]
except IndexError as exception:
raise MissingCoordinateVariable(coordinate_name) from exception

return coordinate_array


def get_1D_dim_array_data_from_dimvalues(
dim_values: np.ndarray, dim_indices: np.ndarray, dim_size: int
) -> np.ndarray:
"""
return a full dimension data array based on the 2 projected points and
grid size
"""

if (dim_indices[1] != dim_indices[0]) and (dim_values[1] != dim_values[0]):
dim_resolution = (dim_values[1] - dim_values[0]) / (
dim_indices[1] - dim_indices[0]
)
else:
raise InvalidCoordinateDataset(dim_values[0], dim_indices[0])

dim_min = dim_values[0] - (dim_resolution * dim_indices[0])
dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1]))
return np.linspace(dim_min, dim_max, dim_size)


def get_valid_indices(
coordinate_row_col: np.ndarray, coordinate: VariableFromDmr
) -> np.ndarray:
"""
Returns indices of a valid array without fill values if the fill
value is provided. If it is not provided, we check for valid values
for latitude and longitude
"""
# get_attribute_value returns a value of type `str`
coordinate_fill = coordinate.get_attribute_value('_FillValue')
if coordinate_fill is not None:
is_not_fill = ~np.isclose(coordinate_row_col, float(coordinate_fill))
else:
# Creates an entire array of `True` values.
is_not_fill = np.ones_like(coordinate_row_col, dtype=bool)

if coordinate.is_longitude():
valid_indices = np.where(
np.logical_and(
is_not_fill,
np.logical_and(
coordinate_row_col >= -180.0, coordinate_row_col <= 360.0
),
)
)[0]
elif coordinate.is_latitude():
valid_indices = np.where(
np.logical_and(
is_not_fill,
np.logical_and(coordinate_row_col >= -90.0, coordinate_row_col <= 90.0),
)
)[0]
else:
valid_indices = np.empty((0, 0))

return valid_indices
75 changes: 74 additions & 1 deletion hoss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class InvalidRequestedRange(CustomError):
def __init__(self):
super().__init__(
'InvalidRequestedRange',
'Input request specified range outside supported ' 'dimension range',
'Input request specified range outside supported dimension range',
)


Expand Down Expand Up @@ -108,6 +108,79 @@ def __init__(self):
)


class MissingVariable(CustomError):
"""This exception is raised when HOSS tries to get variables and
they are missing or empty.
"""

def __init__(self, referring_variable):
super().__init__(
'MissingVariable',
f'"{referring_variable}" is ' 'not present in source granule file.',
)


class MissingCoordinateVariable(CustomError):
"""This exception is raised when HOSS tries to get latitude and longitude
variables and they are missing or empty. These variables are referred to
in the science variables with coordinate attributes.
"""

def __init__(self, referring_variable):
super().__init__(
'MissingCoordinateVariable',
f'Coordinate: "{referring_variable}" is '
'not present in source granule file.',
)


class InvalidCoordinateVariable(CustomError):
"""This exception is raised when HOSS tries to get latitude and longitude
variables and they have fill values to the extent that it cannot be used.
These variables are referred in the science variables with coordinate attributes.
"""

def __init__(self, referring_variable):
super().__init__(
'InvalidCoordinateVariable',
f'Coordinate: "{referring_variable}" is '
'not valid in source granule file.',
)


class IncompatibleCoordinateVariables(CustomError):
"""This exception is raised when HOSS tries to get latitude and longitude
coordinate variable and they do not match in shape or have a size of 0.
"""

def __init__(self, longitude_shape, latitude_shape):
super().__init__(
'IncompatibleCoordinateVariables',
f'Longitude coordinate shape: "{longitude_shape}"'
f'does not match the latitude coordinate shape: "{latitude_shape}"',
)


class InvalidCoordinateDataset(CustomError):
"""This exception is raised when the two values passed to
the function computing the resolution are equal. This could
occur when there are too many fill values and distinct valid
indices could not be obtained
"""

def __init__(self, dim_value, dim_index):
super().__init__(
'InvalidCoordinateDataset',
'Cannot compute the dimension resolution for '
f'dim_value: "{dim_value}" dim_index: "{dim_index}"',
)


class UnsupportedShapeFileFormat(CustomError):
"""This exception is raised when the shape file included in the input
Harmony message is not GeoJSON.
Expand Down
Loading

0 comments on commit 3fc06f0

Please sign in to comment.