diff --git a/kgforge/core/archetypes/read_only_store.py b/kgforge/core/archetypes/read_only_store.py index 3f0560a1..a28b2acf 100644 --- a/kgforge/core/archetypes/read_only_store.py +++ b/kgforge/core/archetypes/read_only_store.py @@ -26,7 +26,7 @@ ) from kgforge.core.commons.execution import not_supported from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder -from kgforge.core.reshaping import collect_values +from kgforge.core.reshaping import collect_values, collect_values_jp from kgforge.core.wrappings import Filter from kgforge.core.wrappings.dict import DictWrapper @@ -107,9 +107,13 @@ def download( # path: DirPath. urls = [] store_metadata = [] + constraint_dict = None + if content_type: + constraint_dict = {'encodingFormat': content_type} to_download = [data] if isinstance(data, Resource) else data for d in to_download: - collected_values = collect_values(d, follow, DownloadingError) + # collected_values = collect_values(d, follow, DownloadingError) + collected_values = collect_values_jp(d, follow, DownloadingError, constraint_dict) urls.extend(collected_values) store_metadata.extend( [d._store_metadata for _ in range(len(collected_values))] diff --git a/kgforge/core/reshaping.py b/kgforge/core/reshaping.py index 8744bb1d..7fc1d596 100644 --- a/kgforge/core/reshaping.py +++ b/kgforge/core/reshaping.py @@ -12,7 +12,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with Blue Brain Nexus Forge. If not, see . -from typing import Dict, Iterator, List, Union, Type +from typing import Dict, Iterator, List, Union, Type, Optional +import jsonpath_ng as jp from kgforge.core.resource import Resource from kgforge.core.commons.attributes import repr_class @@ -106,3 +107,28 @@ def _collect(things: List) -> Iterator[str]: raise exception( f"An error occur when collecting values for path to follow '{follow}': {str(e)}" ) from e + + +def collect_values_jp(data: Resource, follow: str, + exception: Type[Exception] = Exception, + constraint_dict: Optional[Dict] = None) -> List[str]: + try: + properties = follow.split('.') + pattern = f"$." + "[*].".join(properties) + jp_query = jp.parse(pattern) + data = as_json(data, False, False, None, None, None) + results = jp_query.find(data) + if len(results) == 0: + raise exception(f"Path not found") + if constraint_dict: + if len(constraint_dict) != 1: + raise NotImplementedError("Only one constraint can be impossed at the moment") + [(k, v)] = list(constraint_dict.items()) + return [result.value for result in results if result.context.value[k] == v] + else: + return [result.value for result in results] + + except Exception as e: + raise exception( + f"An error occur when collecting values for path to follow '{follow}': {str(e)}" + ) from e diff --git a/kgforge/specializations/stores/bluebrain_nexus.py b/kgforge/specializations/stores/bluebrain_nexus.py index 4a9f603c..c9bb127d 100644 --- a/kgforge/specializations/stores/bluebrain_nexus.py +++ b/kgforge/specializations/stores/bluebrain_nexus.py @@ -601,8 +601,6 @@ def _download_one( params_download = copy.deepcopy(self.service.params.get("download", {})) headers = ( self.service.headers_download - if not content_type - else update_dict(self.service.headers_download, {"Accept": content_type}) ) response = requests.get( diff --git a/setup.py b/setup.py index 7c2a8b66..288db0db 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,8 @@ "owlrl>=5.2.3", "elasticsearch_dsl==7.4.0", "requests==2.32.0", - "typing-extensions" + "typing-extensions", + "jsonpath-ng" ], extras_require={ "dev": [ diff --git a/tests/core/test_reshaping.py b/tests/core/test_reshaping.py index 4a000886..d0078a4e 100644 --- a/tests/core/test_reshaping.py +++ b/tests/core/test_reshaping.py @@ -16,7 +16,7 @@ import pytest from kgforge.core.resource import Resource from kgforge.core.forge import KnowledgeGraphForge -from kgforge.core.reshaping import collect_values, Reshaper +from kgforge.core.reshaping import collect_values, collect_values_jp, Reshaper def test_collect_values(): @@ -43,6 +43,32 @@ def test_collect_values(): collect_values(None, "hasPart.url",ValueError) +def test_collect_values_jasonpath(): + simple = Resource(type="Experiment", url="file.gz") + r = collect_values_jp(simple, "url") + assert simple.url in r, "url should be in the list" + deep = Resource(type="Experiment", level1=Resource(level2=Resource(url="file.gz"))) + r = collect_values_jp(deep, "level1.level2.url") + assert deep.level1.level2.url in r, "url should be in the list" + files = [Resource(type="Experiment", url=f"file{i}") for i in range(3)] + files.append(Resource(type="Experiment", contentUrl=f"file3")) + data_set = Resource(type="Dataset", hasPart=files) + r = collect_values_jp(data_set, "hasPart.contentUrl") + assert ["file3"] == r, "one element should be in the list" + r = collect_values_jp(data_set, "hasPart.url") + assert ["file0", "file1", "file2"] == r, "three elements should be in the list" + files = [Resource(type="Experiment", url=f"file{i}", encodingFormat=f"application/{ext}") for i, ext in enumerate(['csv', 'swc'])] + data_set = Resource(type="Dataset", hasPart=files) + r = collect_values_jp(data_set, "hasPart.url", constraint_dict={'encodingFormat': 'application/swc'}) + assert ["file1"] == r, "only the file with encodingFormat `application/swc` must be returned" + with pytest.raises(Exception): + collect_values_jp(data_set, "hasPart.url", constraint_dict={'encodingFormat': 'application/swc', 'contentUrl': 'something'}) + with pytest.raises(Exception): + collect_values_jp(data_set, "fake.path") + with pytest.raises(ValueError): + collect_values_jp(None, "hasPart.url", ValueError) + + def test_reshape(config): forge = KnowledgeGraphForge(config) reshaper = Reshaper(versioned_id_template="{x.id}?_version={x._store_metadata.version}")