Skip to content

Commit

Permalink
extended keys parsed by override yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
alex75 committed Aug 23, 2024
1 parent b513fee commit 54f854e
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 32 deletions.
40 changes: 30 additions & 10 deletions cads_catalogue/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,34 +382,52 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
logger.warning(f"detected override file {override_path}")
with open(override_path) as fp:
try:
data = yaml.safe_load(fp)
data = yaml.load(fp.read(), Loader=yaml.loader.BaseLoader)
except Exception: # noqa
logger.exception(f"override file {override_path} is not a valid YAML")
return ret_value
if data is None:
logger.warning(f"override file {override_path} is empty")
return ret_value
if not isinstance(data, dict):
logger.error(f"override file {override_path} has a wrong format and cannot be parsed")
logger.error(
f"override file {override_path} has a wrong format and cannot be parsed"
)
return ret_value

# normalization
supported_keys_str = (
"abstract", "begin_date", "contactemail", "disabled_reason", "doi", "ds_contactemail",
"ds_responsible_organisation", "ds_responsible_organisation_role", "file_format",
"format_version", "high_priority_terms", "lineage", "portal", "publication_date",
"responsible_organisation", "responsible_organisation_role", "responsible_organisation_website",
"title", "topic", "unit_measure", "use_limitation",

"abstract",
"begin_date",
"contactemail",
"disabled_reason",
"doi",
"ds_contactemail",
"ds_responsible_organisation",
"ds_responsible_organisation_role",
"format_version",
"high_priority_terms",
"lineage",
"portal",
"publication_date",
"responsible_organisation",
"responsible_organisation_role",
"responsible_organisation_website",
"title",
"topic",
"unit_measure",
"use_limitation",
)
supported_keys_bool = (
"api_enforce_constraints", "qa_flag", "hidden",
"api_enforce_constraints",
"qa_flag",
"hidden",
)
# integers = ("popularity",)
# floats = ("representative_fraction",)
# jsons = ("description", "geo_extent",) DO NOT WANT
# arrays = ("qos_tags", "related_resources_keywords",) DO NOT WANT
# to_be_managed_apart = ("end_date", "keywords", "licence_uids", "update_date", "type")
# to_be_managed_apart = ("end_date", "keywords", "licence_uids", "update_date", "type", "file_format",)
for dataset_uid in data:
ret_value[dataset_uid] = dict()
dataset_md = data[dataset_uid]
Expand All @@ -422,6 +440,8 @@ def parse_override_md(override_path: str | pathlib.Path | None) -> dict[str, Any
else:
ret_value[dataset_uid][key]: bool = utils.str2bool(value) # type: ignore
elif key in supported_keys_str:
if value == "null":
value = None
ret_value[dataset_uid][key] = value
else:
logger.warning(
Expand Down
38 changes: 19 additions & 19 deletions tests/data/dumped_resources7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,7 @@
"resource_id": 1,
"resource_uid": "reanalysis-era5-land",
"popularity": 500,
"api_enforce_constraints": true,
"api_enforce_constraints": false,
"constraints": "an url",
"form": "an url",
"layout": "an url",
Expand All @@ -1502,43 +1502,43 @@
"bboxS": -89,
"bboxW": 0
},
"begin_date": "1950-01-01",
"begin_date": "1960-11-02",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"publication_date": "2022-06-01",
"record_update": "2023-12-11 08:50:52.748454+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"abstract": "an abstract",
"citation": null,
"contactemail": "https://support.ecmwf.int",
"contactemail": "an@email",
"description": [],
"disabled_reason": "A reason",
"documentation": [],
"doi": "10.24381/cds.e2161bac",
"ds_contactemail": "https://support.ecmwf.int",
"ds_responsible_organisation": "ECMWF",
"ds_responsible_organisation_role": "publisher",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": null,
"file_format": "{grib,netcdf}",
"format_version": null,
"hidden": true,
"high_priority_terms": "reanalysis ERA5 land",
"fts": "'era5':2 'land':3 'reanalysi':1",
"lineage": "EC Copernicus program",
"high_priority_terms": "ERA5 reanalysis temperature",
"fts": "'era5':1 'reanalysi':2 'temperatur':3",
"lineage": "Copernicus Atmospheric Monitoring Service",
"representative_fraction": 0.25,
"responsible_organisation": "ECMWF",
"responsible_organisation_role": "pointOfContact",
"responsible_organisation_website": "https://www.ecmwf.int/",
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"portal": "c3s2",
"qa_flag": false,
"qos_tags": [
"tag1",
"tag2",
"tag3"
],
"title": "ERA5-Land hourly data from 1950 to present",
"topic": "climatologyMeteorologyAtmosphere",
"title": "a title",
"topic": "a topic",
"type": "dataset",
"unit_measure": "dd",
"use_limitation": "Content accessible through the CDS may only be used under the terms of the licenses attributed to each particular resource.",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"variables": [],
"fulltext": null,
"search_field": "'1950':7A 'accur':88B 'across':61B 'back':83B 'climat':52B,92B 'combin':55B 'compar':34B 'complet':67B 'compon':47B 'consist':19B,69B 'data':5A,57B,78B 'dataset':16B,70B 'decad':29B,82B 'descript':89B 'ecmwf':50B 'enhanc':32B 'era5':2A,11B,36B,38B,51B 'era5-land':1A,10B,37B 'evolut':23B 'global':66B 'goe':80B 'hour':4A 'land':3A,12B,25B,39B,46B 'law':73B 'model':56B 'observ':59B 'past':95B 'physic':75B 'present':9A 'produc':42B,77B 'provid':17B,86B 'reanalysi':15B,53B,54B,76B 'replay':44B 'resolut':33B 'sever':28B,81B 'time':85B 'use':71B 'variabl':26B 'view':20B 'world':63B"
Expand Down
19 changes: 19 additions & 0 deletions tests/data/override2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ reanalysis-era5-land:
hidden: True
disabled_reason: A reason
portal: c3s2
abstract: an abstract
begin_date: 1960-11-02
contactemail: an@email
doi: 20.24381/cds.22161bac
ds_contactemail: https://support.ecmwf.com
ds_responsible_organisation: ORG1
ds_responsible_organisation_role: null
format_version: null
high_priority_terms: ERA5 reanalysis temperature
lineage: Copernicus Atmospheric Monitoring Service
publication_date: 2022-06-01
responsible_organisation: Org2
responsible_organisation_role: pointOfContact2
responsible_organisation_website: http://a/website.com
title: a title
topic: a topic
unit_measure: degree
use_limitation: Content accessible through the CDS
api_enforce_constraints: False
reanalysis-era5-pressure-levels:
reanalysis-era5-single-levels:
hidden: False
Expand Down
25 changes: 22 additions & 3 deletions tests/test_40_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,32 @@ def test_parse_override_md() -> None:
# consistent override info
overrides_path = os.path.join(TESTDATA_PATH, "override2.yaml")
expected = {
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-land": {
"disabled_reason": "A reason",
"qa_flag": False,
"hidden": True,
"disabled_reason": "A reason",
"portal": "c3s2",
"qa_flag": False,
"abstract": "an abstract",
"begin_date": "1960-11-02",
"contactemail": "an@email",
"doi": "20.24381/cds.22161bac",
"ds_contactemail": "https://support.ecmwf.com",
"ds_responsible_organisation": "ORG1",
"ds_responsible_organisation_role": None,
"format_version": None,
"high_priority_terms": "ERA5 reanalysis temperature",
"lineage": "Copernicus Atmospheric Monitoring Service",
"publication_date": "2022-06-01",
"responsible_organisation": "Org2",
"responsible_organisation_role": "pointOfContact2",
"responsible_organisation_website": "http://a/website.com",
"title": "a title",
"topic": "a topic",
"unit_measure": "degree",
"use_limitation": "Content accessible through the CDS",
"api_enforce_constraints": False,
},
"reanalysis-era5-pressure-levels": {},
"reanalysis-era5-single-levels": {
"hidden": False,
"portal": "ads",
Expand Down

0 comments on commit 54f854e

Please sign in to comment.