Skip to content

Commit

Permalink
Merge pull request #129 from ecmwf-projects/COPDS-1859-overrides
Browse files Browse the repository at this point in the history
more supported override keys
  • Loading branch information
alex75 authored Aug 28, 2024
2 parents c8e3f94 + 6fe9a48 commit a6d740b
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 30 deletions.
60 changes: 54 additions & 6 deletions cads_catalogue/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
-------
dict: dictionary of metadata collected
"""
metadata = dict()
metadata: dict[str, Any] = dict()
metadata_file_path = os.path.join(folder_path, "metadata.json")
if not os.path.isfile(metadata_file_path):
# some fields are required
Expand Down Expand Up @@ -297,7 +297,9 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
"ds_responsible_organisation_role"
)
end_date = data.get("end_date")
if end_date != "now":
if end_date == "now":
metadata["end_date"] = None
else:
metadata["end_date"] = end_date
metadata["file_format"] = data.get("file_format")
metadata["format_version"] = data.get("format_version")
Expand Down Expand Up @@ -372,6 +374,8 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
dict: dictionary of metadata extracted
"""
ret_value: dict[str, Any] = dict()

# base extraction and validation
if not override_path:
return ret_value
if not os.path.exists(override_path):
Expand All @@ -380,30 +384,74 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
logger.warning(f"detected override file {override_path}")
with open(override_path) as fp:
try:
data = yaml.safe_load(fp)
data = yaml.load(fp.read(), Loader=yaml.loader.BaseLoader)
except Exception: # noqa
logger.exception(f"override file {override_path} is not a valid YAML")
return ret_value
if data is None:
logger.warning(f"override file {override_path} is empty")
return ret_value
if not isinstance(data, dict):
logger.error(
f"override file {override_path} has a wrong format and cannot be parsed"
)
return ret_value

# normalization
supported_keys_str = (
"abstract",
"begin_date",
"contactemail",
"disabled_reason",
"doi",
"ds_contactemail",
"ds_responsible_organisation",
"ds_responsible_organisation_role",
"format_version",
"high_priority_terms",
"lineage",
"portal",
"publication_date",
"responsible_organisation",
"responsible_organisation_role",
"responsible_organisation_website",
"title",
"topic",
"unit_measure",
"use_limitation",
)
supported_keys_bool = (
"api_enforce_constraints",
"qa_flag",
"hidden",
)
supported_keys_int = ("popularity",)
supported_keys_floats = ("representative_fraction",)
for dataset_uid in data:
ret_value[dataset_uid] = dict()
dataset_md = data[dataset_uid]
if not dataset_md:
continue
for key, value in dataset_md.items():
if key in ("qa_flag", "disabled_reason", "portal"):
ret_value[dataset_uid][key] = value
elif key == "hidden":
if value == "null":
ret_value[dataset_uid][key] = None
continue
if key in supported_keys_bool:
if isinstance(value, bool):
ret_value[dataset_uid][key] = value # type: ignore
else:
ret_value[dataset_uid][key]: bool = utils.str2bool(value) # type: ignore
elif key in supported_keys_str:
ret_value[dataset_uid][key] = value
elif key in supported_keys_int:
ret_value[dataset_uid][key] = int(value)
elif key in supported_keys_floats:
ret_value[dataset_uid][key] = float(value)
else:
logger.warning(
f"unknown key '{key}' found in override file for {dataset_uid}. It will be ignored"
)
continue
return ret_value


Expand Down
42 changes: 21 additions & 21 deletions tests/data/dumped_resources7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1486,8 +1486,8 @@
{
"resource_id": 1,
"resource_uid": "reanalysis-era5-land",
"popularity": 500,
"api_enforce_constraints": true,
"popularity": 200,
"api_enforce_constraints": false,
"constraints": "an url",
"form": "an url",
"layout": "an url",
Expand All @@ -1502,43 +1502,43 @@
"bboxS": -89,
"bboxW": 0
},
"begin_date": "1950-01-01",
"begin_date": "1960-11-02",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"publication_date": "2022-06-01",
"record_update": "2023-12-11 08:50:52.748454+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"abstract": "an abstract",
"citation": null,
"contactemail": "https://support.ecmwf.int",
"contactemail": "an@email",
"description": [],
"disabled_reason": "A reason",
"documentation": [],
"doi": "10.24381/cds.e2161bac",
"ds_contactemail": "https://support.ecmwf.int",
"ds_responsible_organisation": "ECMWF",
"ds_responsible_organisation_role": "publisher",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": null,
"file_format": "{grib,netcdf}",
"format_version": null,
"hidden": true,
"high_priority_terms": "reanalysis ERA5 land",
"fts": "'era5':2 'land':3 'reanalysi':1",
"lineage": "EC Copernicus program",
"representative_fraction": 0.25,
"responsible_organisation": "ECMWF",
"responsible_organisation_role": "pointOfContact",
"responsible_organisation_website": "https://www.ecmwf.int/",
"high_priority_terms": "ERA5 reanalysis temperature",
"fts": "'era5':1 'reanalysi':2 'temperatur':3",
"lineage": "Copernicus Atmospheric Monitoring Service",
"representative_fraction": 0.5,
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"portal": "c3s2",
"qa_flag": false,
"qos_tags": [
"tag1",
"tag2",
"tag3"
],
"title": "ERA5-Land hourly data from 1950 to present",
"topic": "climatologyMeteorologyAtmosphere",
"title": "a title",
"topic": "a topic",
"type": "dataset",
"unit_measure": "dd",
"use_limitation": "Content accessible through the CDS may only be used under the terms of the licenses attributed to each particular resource.",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"variables": [],
"fulltext": null,
"search_field": "'1950':7A 'accur':88B 'across':61B 'back':83B 'climat':52B,92B 'combin':55B 'compar':34B 'complet':67B 'compon':47B 'consist':19B,69B 'data':5A,57B,78B 'dataset':16B,70B 'decad':29B,82B 'descript':89B 'ecmwf':50B 'enhanc':32B 'era5':2A,11B,36B,38B,51B 'era5-land':1A,10B,37B 'evolut':23B 'global':66B 'goe':80B 'hour':4A 'land':3A,12B,25B,39B,46B 'law':73B 'model':56B 'observ':59B 'past':95B 'physic':75B 'present':9A 'produc':42B,77B 'provid':17B,86B 'reanalysi':15B,53B,54B,76B 'replay':44B 'resolut':33B 'sever':28B,81B 'time':85B 'use':71B 'variabl':26B 'view':20B 'world':63B"
Expand Down
21 changes: 21 additions & 0 deletions tests/data/override2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,27 @@ reanalysis-era5-land:
hidden: True
disabled_reason: A reason
portal: c3s2
abstract: an abstract
begin_date: 1960-11-02
contactemail: an@email
doi: 20.24381/cds.22161bac
ds_contactemail: https://support.ecmwf.com
ds_responsible_organisation: ORG1
ds_responsible_organisation_role: null
format_version: null
high_priority_terms: ERA5 reanalysis temperature
lineage: Copernicus Atmospheric Monitoring Service
popularity: 200
publication_date: 2022-06-01
representative_fraction: 0.5
responsible_organisation: Org2
responsible_organisation_role: pointOfContact2
responsible_organisation_website: http://a/website.com
title: a title
topic: a topic
unit_measure: degree
use_limitation: Content accessible through the CDS
api_enforce_constraints: False
reanalysis-era5-pressure-levels:
reanalysis-era5-single-levels:
hidden: False
Expand Down
27 changes: 24 additions & 3 deletions tests/test_40_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,34 @@ def test_parse_override_md() -> None:
# consistent override info
overrides_path = os.path.join(TESTDATA_PATH, "override2.yaml")
expected = {
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-land": {
"disabled_reason": "A reason",
"qa_flag": False,
"hidden": True,
"disabled_reason": "A reason",
"portal": "c3s2",
"qa_flag": False,
"abstract": "an abstract",
"begin_date": "1960-11-02",
"contactemail": "an@email",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": None,
"format_version": None,
"high_priority_terms": "ERA5 reanalysis temperature",
"lineage": "Copernicus Atmospheric Monitoring Service",
"popularity": 200,
"publication_date": "2022-06-01",
"representative_fraction": 0.5,
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"title": "a title",
"topic": "a topic",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"api_enforce_constraints": False,
},
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-single-levels": {
"hidden": False,
"portal": "ads",
Expand Down

0 comments on commit a6d740b

Please sign in to comment.