Skip to content

Commit

Permalink
Merge pull request #52 from CDJellen/user/cjellen/joss-review-improve…
Browse files Browse the repository at this point in the history
…ments

JOSS Review Improvements
  • Loading branch information
CDJellen authored Nov 17, 2024
2 parents da192a3 + 308ec67 commit 249aa21
Show file tree
Hide file tree
Showing 58 changed files with 32,855 additions and 29,503 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10"]
steps:
- uses: actions/checkout@v2
with:
Expand Down
15 changes: 8 additions & 7 deletions ndbc_api/api/handlers/http/stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def historical_stations(cls, handler: Any) -> pd.DataFrame:
except (AttributeError, ValueError, TypeError) as e:
raise ResponseException(
'Failed to execute `station` request.') from e
return HistoricalStationsParser.df_from_response(resp, use_timestamp=False)
return HistoricalStationsParser.df_from_response(resp,
use_timestamp=False)

@classmethod
def nearest_station(
Expand Down Expand Up @@ -154,9 +155,9 @@ def _nearest(df: pd.DataFrame, lat_a: float, lon_a: float):

# Calculate distances using Haversine formula
df_filtered['distance'] = df_filtered.apply(
lambda row: StationsHandler._distance(lat_a, lon_a, row['Lat'], row['Lon']),
axis=1
)
lambda row: StationsHandler._distance(lat_a, lon_a, row['Lat'], row[
'Lon']),
axis=1)

# Find the index of the closest row
smallest_distance = df_filtered['distance'].min()
Expand All @@ -173,9 +174,9 @@ def _radial_search(df: pd.DataFrame, lat_a: float, lon_a: float,

# Calculate distances using Haversine formula
df_filtered['distance'] = df_filtered.apply(
lambda row: StationsHandler._distance(lat_a, lon_a, row['Lat'], row['Lon']),
axis=1
)
lambda row: StationsHandler._distance(lat_a, lon_a, row['Lat'], row[
'Lon']),
axis=1)

df_filtered.sort_values(by='distance', inplace=True)

Expand Down
34 changes: 17 additions & 17 deletions ndbc_api/api/handlers/opendap/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime, timedelta
from typing import Any

import netCDF4 as nc
import xarray

from ndbc_api.api.handlers._base import BaseHandler
from ndbc_api.api.parsers.opendap.adcp import AdcpParser
Expand Down Expand Up @@ -31,7 +31,7 @@ def adcp(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""adcp"""
try:
reqs = AdcpRequest.build_request(station_id=station_id,
Expand All @@ -54,7 +54,7 @@ def cwind(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""cwind"""
try:
reqs = CwindRequest.build_request(station_id=station_id,
Expand All @@ -77,7 +77,7 @@ def ocean(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""ocean"""
try:
reqs = OceanRequest.build_request(station_id=station_id,
Expand All @@ -100,20 +100,20 @@ def pwind(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""pwind"""
try:
reqs = PwindRequest.build_request(station_id=station_id,
start_time=start_time,
end_time=end_time)
start_time=start_time,
end_time=end_time)
except Exception as e:
raise RequestException('Failed to build request.') from e
try:
resps = handler.handle_requests(station_id=station_id, reqs=reqs)
except Exception as e:
raise ResponseException('Failed to execute requests.') from e
return PwindParser.nc_from_responses(responses=resps,
use_timestamp=use_timestamp)
use_timestamp=use_timestamp)

@classmethod
def stdmet(
Expand All @@ -123,7 +123,7 @@ def stdmet(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""stdmet"""
try:
reqs = StdmetRequest.build_request(station_id=station_id,
Expand All @@ -146,20 +146,20 @@ def swden(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""swden"""
try:
reqs = SwdenRequest.build_request(station_id=station_id,
start_time=start_time,
end_time=end_time)
start_time=start_time,
end_time=end_time)
except Exception as e:
raise RequestException('Failed to build request.') from e
try:
resps = handler.handle_requests(station_id=station_id, reqs=reqs)
except Exception as e:
raise ResponseException('Failed to execute requests.') from e
return SwdenParser.nc_from_responses(responses=resps,
use_timestamp=use_timestamp)
use_timestamp=use_timestamp)

@classmethod
def wlevel(
Expand All @@ -169,17 +169,17 @@ def wlevel(
start_time: datetime = datetime.now() - timedelta(days=30),
end_time: datetime = datetime.now(),
use_timestamp: bool = True,
) -> 'nc.Dataset':
) -> xarray.Dataset:
"""wlevel"""
try:
reqs = WlevelRequest.build_request(station_id=station_id,
start_time=start_time,
end_time=end_time)
start_time=start_time,
end_time=end_time)
except Exception as e:
raise RequestException('Failed to build request.') from e
try:
resps = handler.handle_requests(station_id=station_id, reqs=reqs)
except Exception as e:
raise ResponseException('Failed to execute requests.') from e
return WlevelParser.nc_from_responses(responses=resps,
use_timestamp=use_timestamp)
use_timestamp=use_timestamp)
5 changes: 3 additions & 2 deletions ndbc_api/api/parsers/http/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def df_from_responses(cls,
if use_timestamp:
try:
df = df.reset_index().drop_duplicates(subset='timestamp',
keep='first')
keep='first')
df = df.set_index('timestamp').sort_index()
except KeyError as e:
raise ParserException from e
Expand All @@ -43,7 +43,8 @@ def _read_response(cls, response: dict,
if not data:
return pd.DataFrame()
# check that parsed names match parsed values or revert
if len([v.strip() for v in data[0].strip('\n').split(' ') if v]) != len(names):
if len([v.strip() for v in data[0].strip('\n').split(' ') if v
]) != len(names):
names = cls.REVERT_COL_NAMES
if '(' in data[0]:
data = cls._clean_data(data)
Expand Down
5 changes: 3 additions & 2 deletions ndbc_api/api/parsers/http/_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def root_from_response(cls, response: dict) -> ET.ElementTree:

try:
root = ET.fromstring(body)
return ET.ElementTree(root)
return ET.ElementTree(root)
except Exception as e:
raise ParserException("failed to obtain XML root from response body") from e
raise ParserException(
"failed to obtain XML root from response body") from e
46 changes: 31 additions & 15 deletions ndbc_api/api/parsers/http/active_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ class ActiveStationsParser(XMLParser):
"""

@classmethod
def df_from_response(cls, response: dict, use_timestamp: bool = False) -> pd.DataFrame:
def df_from_response(cls,
response: dict,
use_timestamp: bool = False) -> pd.DataFrame:
"""
Reads the response body and parses it into a DataFrame.
Expand All @@ -27,24 +29,38 @@ def df_from_response(cls, response: dict, use_timestamp: bool = False) -> pd.Dat
station_data = []
for station in root.findall('station'):
station_info = {
'Station': station.get('id'),
'Lat': float(station.get('lat')),
'Lon': float(station.get('lon')),
'Elevation': float(station.get('elev')) if station.get('elev') else pd.NA,
'Name': station.get('name'),
'Owner': station.get('owner'),
'Program': station.get('pgm'),
'Type': station.get('type'),
'Includes Meteorology': station.get('met') == 'y',
'Includes Currents': station.get('currents') == 'y',
'Includes Water Quality': station.get('waterquality') == 'y',
'DART Program': station.get('dart') == 'y'
'Station':
station.get('id'),
'Lat':
float(station.get('lat')),
'Lon':
float(station.get('lon')),
'Elevation':
float(station.get('elev'))
if station.get('elev') else pd.NA,
'Name':
station.get('name'),
'Owner':
station.get('owner'),
'Program':
station.get('pgm'),
'Type':
station.get('type'),
'Includes Meteorology':
station.get('met') == 'y',
'Includes Currents':
station.get('currents') == 'y',
'Includes Water Quality':
station.get('waterquality') == 'y',
'DART Program':
station.get('dart') == 'y'
}
station_data.append(station_info)

df = pd.DataFrame(station_data) # Create DataFrame from the extracted data
df = pd.DataFrame(
station_data) # Create DataFrame from the extracted data

except ET.ParseError as e:
raise ParserException(f"Error parsing XML data: {e}") from e

return df
return df
51 changes: 34 additions & 17 deletions ndbc_api/api/parsers/http/historical_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ class HistoricalStationsParser(XMLParser):
"""
Parser for active station information from XML data.
"""

@classmethod
def df_from_response(cls, response: dict, use_timestamp: bool = False) -> pd.DataFrame:
def df_from_response(cls,
response: dict,
use_timestamp: bool = False) -> pd.DataFrame:
"""
Reads the response body and parses it into a DataFrame.
Expand All @@ -31,22 +33,37 @@ def df_from_response(cls, response: dict, use_timestamp: bool = False) -> pd.Dat
station_owner = station.get('owner')
station_program = station.get('pgm')
station_type = station.get('type')

for history in station.findall('history'):
station_info = {
'Station': station_id,
'Lat': float(history.get('lat')),
'Lon': float(history.get('lng')),
'Elevation': float(history.get('elev')) if history.get('elev') else pd.NA,
'Name': station_name,
'Owner': station_owner,
'Program': station_program,
'Type': station_type,
'Includes Meteorology': history.get('met') == 'y',
'Hull Type': history.get('hull'),
'Anemometer Height': float(history.get('anemom_height')) if history.get('anemom_height') else pd.NA,
'Start Date': history.get('start'),
'End Date': history.get('stop'),
'Station':
station_id,
'Lat':
float(history.get('lat')),
'Lon':
float(history.get('lng')),
'Elevation':
float(history.get('elev'))
if history.get('elev') else pd.NA,
'Name':
station_name,
'Owner':
station_owner,
'Program':
station_program,
'Type':
station_type,
'Includes Meteorology':
history.get('met') == 'y',
'Hull Type':
history.get('hull'),
'Anemometer Height':
float(history.get('anemom_height'))
if history.get('anemom_height') else pd.NA,
'Start Date':
history.get('start'),
'End Date':
history.get('stop'),
}
station_data.append(station_info)

Expand All @@ -55,4 +72,4 @@ def df_from_response(cls, response: dict, use_timestamp: bool = False) -> pd.Dat
except ET.ParseError as e:
raise ParserException(f"Error parsing XML data: {e}") from e

return df
return df
Loading

0 comments on commit 249aa21

Please sign in to comment.