diff --git a/.gitignore b/.gitignore index 0cf36ee..eb3f315 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .DS_STORE -output/** \ No newline at end of file +output/** +.idea/** +**/__pycache__/** diff --git a/README.md b/README.md index 7200690..473b9d0 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,25 @@ Docker should be used to generate the s2 coverings, which can be done with ```bash docker compose up -d docker exec -it s2 bash -python3 src/s2.py +python3 src/s2.py --level +``` + +Cell integration can be disabled by adding the `--ni` flag to the command, + +```bash +python3 src/s2.py --level --ni +``` + +A complete list of options can be found by running +```bash +python3 src/s2.py --help +options: + -h, --help show this help message and exit + --level LEVEL Level at which the s2 cells are generated for + --format [FORMAT] The format to write the RDF in. Options are xml, n3, turtle, nt, pretty-xml, trix, trig, nquads, json-ld, hext + --ni [NI] When used, s2 integration is disabled + --compressed [COMPRESSED] + use the S2 hierarchy to write a compressed collection of relations at various levels ``` Results will be written to the `output/` folder. The results can then be loaded into your graph database and queried. For more information on querying with the KnowWhereGraph ontology, visit the [docs site](https://knowwheregraph.github.io/#/). @@ -28,6 +46,25 @@ Results will be written to the `output/` folder. The results can then be loaded Due to the steps involved with installing the s2 libray bindings and different approaches needed for each architecture - running outside of Docker isn't supported. If you're inspired, the Dockerfile has all necessary steps to install the requirements to run the tool. -## Contributing +## Development + +### Linting + +This repository adheres to the Black tool's default settings. It also makes use of isort for import sorting. Run these before each pull request. + +**Black and isort are competing for formatting the imports differently**. Run isort _after_ running black. + +```commandline +black . +isort . +``` + +### Tests + +Unit tests should be run before each pull request. To run them, + +`python -m pytest .` + +### Contributing Contributions are welcome! Before working on any new features, please file an issue to make sure the work is in scope with this project. New code should be submitted as a pull request to the `develop` branch. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 41a7c5f..a24eb32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ rasterstats==0.19.0 rdflib==7.0.0 s2sphere==0.2.5 Shapely==2.0.5 -SPARQLWrapper==2.0.0 \ No newline at end of file +SPARQLWrapper==2.0.0 diff --git a/src/lib/__pycache__/__init__.cpython-312.pyc b/src/lib/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 61366f1..0000000 Binary files a/src/lib/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/src/lib/__pycache__/geometric_feature.cpython-312.pyc b/src/lib/__pycache__/geometric_feature.cpython-312.pyc deleted file mode 100644 index 96c9fb7..0000000 Binary files a/src/lib/__pycache__/geometric_feature.cpython-312.pyc and /dev/null differ diff --git a/src/lib/__pycache__/kwg_ont.cpython-312.pyc b/src/lib/__pycache__/kwg_ont.cpython-312.pyc deleted file mode 100644 index 62258fd..0000000 Binary files a/src/lib/__pycache__/kwg_ont.cpython-312.pyc and /dev/null differ diff --git a/src/lib/config.py b/src/lib/config.py new file mode 100644 index 0000000..4f8d942 --- /dev/null +++ b/src/lib/config.py @@ -0,0 +1,10 @@ +import os + + +class Config: + max_level = os.getenv("MAXLEVEL", 13) + min_level = os.getenv("MIN_LEVEL", 13) + tolerance = os.getenv("TOLERANCE", 1e-2) + + +config = Config() diff --git a/src/lib/geometric_feature.py b/src/lib/geometric_feature.py index 320112b..3902f26 100644 --- a/src/lib/geometric_feature.py +++ b/src/lib/geometric_feature.py @@ -1,14 +1,311 @@ +import os +from functools import partial +from pathlib import Path +from typing import Generator + +from rdflib import Graph, URIRef from rdflib.query import ResultRow +from s2geometry import (S2Cell, S2CellId, S2LatLng, S2Loop, S2Point, S2Polygon, + S2Polyline, S2RegionCoverer) +from shapely import (LinearRing, LineString, MultiLineString, MultiPolygon, + Point, Polygon, buffer) +from shapely.geometry.polygon import signed_area from shapely.wkt import loads +from .config import config +from .kwg_ont import KWGOnt, generate_cell_iri + class GeometricFeature: """ Represents an abstract geometric feature with an IRI """ + def __init__(self, query_solution: ResultRow) -> None: - self.iri = query_solution['feature_iri'] - self.geometry = loads(query_solution['wkt']) + self.iri = query_solution["feature_iri"] + self.geometry = loads(query_solution["wkt"]) def geometry(self): return self.geometry + + def yield_overlapping_ids( + self, geometry: Polygon | MultiPolygon, tolerance: float = config.tolerance + ) -> Generator[S2CellId, None, None]: + """yields the cell IDs of those level 13 cells that overlap + the given 2D geometry to a certain value of tolerance + + Args: + geometry (Polygon | MultiPolygon): a 2-dimensional geometry + tolerance (float, optional): maximal segment width. Defaults to 1e-2. + + Yields: + Generator[S2CellId, None, None]: a generator through the overlapping IDs + """ + homogeneous_coverer = S2RegionCoverer() + homogeneous_coverer.set_min_level(config.min_level) + homogeneous_coverer.set_max_level(config.max_level) + for boundary in self.boundaries(geometry): + segmented_boundary = boundary.segmentize(tolerance) + buff = buffer(segmented_boundary, tolerance / 100, 2) + for cell_id in self.covering( + buff, coverer=homogeneous_coverer, tolerance=tolerance + ): + yield cell_id + + def yield_crossing_ids( + self, + line_obj: LineString | MultiLineString, + tolerance: float = config.tolerance, + ) -> Generator[S2CellId, None, None]: + """Yields those Cell IDs in a level 13 covering of a + small (multi)polygon buffer around the given (multi)line string + to a certain degree of tolerance. The buffer is constructed + so that its border is at a distance of tolerance/100 from the + (multi)line string. + + Args: + line_obj (LineString | MultiLineString): a 1D geometry + tolerance (float, optional): maximal segment width for the buffer. Defaults to 1e-2. + + Yields: + Generator[S2CellId, None, None]: a generator of crossing cell IDs + """ + homogeneous_coverer = S2RegionCoverer() + homogeneous_coverer.set_min_level(config.min) + homogeneous_coverer.set_max_level(config.max_level) + buff = buffer(line_obj, tolerance / 100, 2) + for cell_id in self.covering(buff, homogeneous_coverer, tolerance=tolerance): + yield cell_id + + def yield_s2_relations( + self, coverer: S2RegionCoverer, tolerance: float = config.tolerance + ) -> Generator[tuple[URIRef, URIRef, URIRef], None, None]: + if isinstance(self.geometry, (Polygon, MultiPolygon)): + predicate = KWGOnt.sfContains + inverse = KWGOnt.sfWithin + for cell_id in self.filling(self.geometry, coverer, tolerance): + yield self.iri, predicate, generate_cell_iri(cell_id) + yield generate_cell_iri(cell_id), inverse, self.iri + + predicate = KWGOnt.sfOverlaps + for cell_id in self.yield_overlapping_ids(self.geometry, tolerance): + yield self.iri, predicate, generate_cell_iri(cell_id) + yield generate_cell_iri(cell_id), predicate, self.iri + + elif isinstance(self.geometry, (LineString, MultiLineString)): + predicate = KWGOnt.sfCrosses + for cell_id in self.yield_crossing_ids(self.geometry, tolerance): + yield self.iri, predicate, generate_cell_iri(cell_id) + yield generate_cell_iri(cell_id), predicate, self.iri + + elif isinstance(self.geometry, Point): + s2_point = self.s2_from_coords(self.geometry) + cell_id = S2CellId(s2_point).parent(config.max_level) + yield self.iri, KWGOnt.sfWithin, generate_cell_iri(cell_id) + yield generate_cell_iri(cell_id), KWGOnt.sfContains, self.iri + + else: + geom_type = self.geometry.geom_type + msg = f"Geometry of type {geom_type} not supported for s2 relations" + raise ValueError(msg) + + def s2_graph( + self, coverer: S2RegionCoverer, tolerance: float = config.tolerance + ) -> Graph: + graph = Graph() + for triple in self.yield_s2_relations(coverer, tolerance): + graph.add(triple) + return graph + + def covering( + self, + geometry: Polygon | MultiPolygon, + coverer: S2RegionCoverer, + tolerance: float = config.tolerance, + ) -> list[S2CellId]: + """Returns a list of s2 cell IDs appearing in a homogeneous + covering of a 2-dimensional geometry by level 13 cells to a + certain value of tolerance + Args: + geometry: + coverer: + tolerance (float, optional): maximal segment width. Defaults to 1e-2. + Returns: + list[S2CellId]: a list of cell IDs in a level 13 covering + """ + s2_obj = self.s2_approximation(geometry, tolerance) + covering = coverer.GetCovering(s2_obj) + return covering + + def s2_from_coords( + self, + geometry: Point | LinearRing | LineString | Polygon | MultiPolygon, + ) -> S2Point | S2Loop | S2Polyline | S2Polygon: + """ + Returns a corresponding S2 object whose vertices + are obtained from the coordinates of the given geometry. + Note that the returned object lives on the sphere and any + linear edges are replaced by geodesic curves. As such, the resulting + S2 object is an APPROXIMATION of the given geometry living on + the sphere, and the approximation is worse when vertices are farther apart + (because geodesics will curve more in that case). + Args: + geometry (tuple | Point | LinearRing | LineString | Polygon | MultiPolygon): a geometry + Returns: + S2Point | S2Loop | S2Polyline | S2Polygon: an S2 geometric object + """ + if isinstance(geometry, tuple): + return S2LatLng.FromDegrees(*geometry[::-1]).ToPoint() + elif isinstance(geometry, Point): + return S2LatLng.FromDegrees(*geometry.coords[0][::-1]).ToPoint() + elif isinstance(geometry, LinearRing): + s2_loop = S2Loop() + s2_loop.Init(list(map(self.s2_from_coords, list(geometry.coords)[:-1]))) + return s2_loop + elif isinstance(geometry, LineString): + polyline = S2Polyline() + polyline.InitFromS2Points(list(map(self.s2_from_coords, geometry.coords))) + return polyline + elif isinstance(geometry, (Polygon, MultiPolygon)): + loops = map( + self.s2_from_coords, map(self.orient, self.boundaries(geometry)) + ) + s2_polygon = S2Polygon() + s2_polygon.InitNested(list(loops)) + return s2_polygon + + def s2_approximation( + self, + geometry: S2Point | LinearRing | LineString | Polygon | MultiPolygon, + tolerance: float = config.tolerance, + ) -> S2Point | S2Loop | S2Polyline | S2Polygon: + """ + Returns a corresponding S2 object that approximates the given + Shapely object to a certain value of tolerance. Precisely, a given + Shapely geometry is first segmentized, meaning extra vertices are added + so that the width between any two adjacent vertices never exceeds the + tolerance, and then these new coordinates are used as the vertices + of an S2 object (see s2_from_coords). + Args: + geometry (LinearRing | LineString | Polygon | MultiPolygon): a Shapely geometry + tolerance (float, optional): a maximal segment width Defaults to 1e-2. + Returns: + S2Point | S2Loop | S2Polyline | S2Polygon: an S2 geometry object + """ + return self.s2_from_coords(geometry.segmentize(tolerance)) + + def orient( + self, geometry: LinearRing | Polygon | MultiPolygon, sign: float = 1.0 + ) -> LinearRing | Polygon | MultiPolygon: + """ + Returns a copy of the geometry with the specified orientation + Args: + geometry (LinearRing | Polygon | MultiPolygon): a ring or (multi)polygon + sign (float, optional): an orientation. Defaults to 1.0. + Returns: + LinearRing | Polygon | MultiPolygon: a copy of the geometry with specified orientation + """ + sign = float(sign) + if isinstance(geometry, LinearRing): + return geometry if signed_area(geometry) / sign >= 0 else geometry.reverse() + elif isinstance(geometry, Polygon): + exterior = self.orient(geometry.exterior, sign) + oppositely_orient = partial(self.orient, sign=-sign) + interiors = list(map(oppositely_orient, geometry.interiors)) + return Polygon(exterior, interiors) + elif isinstance(geometry, MultiPolygon): + orient_with_sign = partial(self.orient, sign=sign) + return MultiPolygon(list(map(orient_with_sign, geometry.geoms))) + + @staticmethod + def boundaries( + geometry: Polygon | MultiPolygon, + ) -> Generator[LinearRing, None, None]: + """ + Yields the boundary rings of a geometry with boundaries + Args: + geometry (Polygon | MultiPolygon): a shapely geometry with boundaries + Yields: + Generator[LinearRing, None, None]: a generator through the boundary rings + """ + polygons = [] + if isinstance(geometry, Polygon): + polygons = [geometry] + elif isinstance(geometry, MultiPolygon): + polygons = geometry.geoms + for polygon in polygons: + yield polygon.exterior + for interior in polygon.interiors: + yield interior + + def filling( + self, + polygon: Polygon | MultiPolygon, + coverer: S2RegionCoverer, + tolerance: float = 1e-2, + ) -> list[S2CellId]: + """ + Returns a list of cell IDs that constitute a filling + of a 2-dimensional geometry that is saturated to the 13th level + within a certain value of tolerance + + Args: + polygon (Polygon | MultiPolygon): a 2-dimensional geometry + coverer (S2RegionCoverer): + tolerance (float, optional): maximal segment width. Defaults to 1e-2. + + Returns: + list[S2CellId]: a list of s2 cell IDs in a saturated fill + """ + filling = [] + s2_obj = self.s2_approximation(polygon, tolerance) + for exponent in range(4, 9): + max_cells = 10**exponent + coverer.set_max_cells(max_cells) + filling = coverer.GetInteriorCovering(s2_obj) + num_cells = len(filling) + if num_cells < 10 ** (exponent - 1): + break # iterate until the filling is saturated + return filling + + +def yield_geometric_features(path: Path) -> Generator[GeometricFeature, None, None]: + print("====") + if os.path.isfile(path): + graph = Graph() + with open(path, "r") as read_stream: + graph.parse(read_stream) + result = graph.query( + """ + PREFIX geo: + SELECT ?feature_iri ?wkt + WHERE { + ?feature_iri geo:hasGeometry ?geometry . + ?geometry geo:asWKT ?wkt . + } + """ + ) + for query_solution in result: + geometric_feature = GeometricFeature(query_solution) + yield geometric_feature + elif os.path.isdir(path): + for file_path in yield_file_paths(path): + for feature in yield_geometric_features(file_path): + yield feature + + +def yield_file_paths(input_dir: Path) -> Generator[Path, None, None]: + """ + Yields file_paths in input_dir + + Args: + input_dir (Path): the name of the directory hosting graphical data + + Yields: + Generator[str, None, None]: a generator of file paths + """ + for path, _, files in os.walk(input_dir): + for file in files: + if not file.startswith("."): + file_path = Path(os.path.join(path, file)) + yield file_path diff --git a/src/lib/integrator.py b/src/lib/integrator.py new file mode 100644 index 0000000..6b30580 --- /dev/null +++ b/src/lib/integrator.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import os +from functools import partial +from multiprocessing import Pool +from pathlib import Path + +from s2geometry import S2CellId, S2RegionCoverer +from shapely.geometry import MultiPolygon, Polygon + +from .config import config +from .geometric_feature import GeometricFeature, yield_geometric_features +from .kwg_ont import namespace_prefix + + +class Integrator: + """ + Abstraction over the process for integrating s2 cells together with spatial relations. + """ + + def __init__(self, compressed: bool): + if compressed: + print( + "Compression is on. Relations will be compressed using the S2 hierarchy..." + ) + data_path = Path("./output/") + output_folder = f"./output/{data_path.stem}" + + if compressed: + output_folder += "_compressed" + + os.makedirs(output_folder, exist_ok=True) + + write = partial( + self.write_all_relations, + output_folder=output_folder, + is_compressed=compressed, + ) + + with Pool() as pool: + pool.map(write, enumerate(yield_geometric_features(data_path))) + + print(f"Done! \nRelations written in path '{output_folder}'.") + + @staticmethod + def homogeneous_covering( + geometry: Polygon | MultiPolygon, + level: int, + tolerance: float = config.tolerance, + ) -> list[S2CellId]: + homogeneous_coverer = S2RegionCoverer() + homogeneous_coverer.set_min_level(level) + homogeneous_coverer.set_max_level(level) + return GeometricFeature().covering( + geometry=geometry, coverer=homogeneous_coverer, tolerance=tolerance + ) + + @staticmethod + def write_all_relations( + indexed_feature: tuple[int, GeometricFeature], + output_folder: str, + is_compressed: bool, + tolerance: float = config.tolerance, + ) -> None: + idx, feature = indexed_feature + coverer = S2RegionCoverer() + coverer.set_max_level(config.max_level) + if not is_compressed: + coverer.set_min_level(config.min_level) + else: + coverer.set_min_level(0) + graph = feature.s2_graph(coverer, tolerance=tolerance) + if graph: + for pfx in namespace_prefix: + graph.bind(pfx, namespace_prefix[pfx]) + file_name = f"{idx}.ttl" + destination = os.path.join(output_folder, file_name) + graph.serialize(destination=destination, format="ttl") diff --git a/src/lib/kwg_ont.py b/src/lib/kwg_ont.py index d0d2ce5..4f1460d 100644 --- a/src/lib/kwg_ont.py +++ b/src/lib/kwg_ont.py @@ -1,5 +1,6 @@ -from rdflib import URIRef +from rdflib import RDF, RDFS, XSD, URIRef from rdflib.namespace import DefinedNamespace, Namespace +from s2geometry import S2Cell, S2CellId class KWGOnt(DefinedNamespace): @@ -32,4 +33,29 @@ class KWGOnt(DefinedNamespace): sfCrosses: URIRef vertexPolygon: URIRef - _NS = Namespace(f"{kwg_endpoint}lod/ontology/") \ No newline at end of file + _NS = Namespace(f"{kwg_endpoint}lod/ontology/") + + +def generate_cell_iri(cell_id: S2CellId) -> URIRef: + """ + Creates an IRI for an individual cell, with a KnowWhereGraph domain + + Args: + cell_id: The ID of the s2 cell + Returns: + A URI of the s2 cell + """ + level = cell_id.level() + id_str = cell_id.id() + return KWGOnt.KWGR[f"{'s2.level'}{level}.{id_str}"] + + +namespace_prefix = { + "kwgr": KWGOnt.KWGR, + "kwg-ont": KWGOnt._NS, + "geo": Namespace("http://www.opengis.net/ont/geosparql#"), + "sf": Namespace("http://www.opengis.net/ont/sf#"), + "rdf": RDF, + "rdfs": RDFS, + "xsd": XSD, +} diff --git a/src/s2.py b/src/s2.py index 823875a..0a6ba24 100644 --- a/src/s2.py +++ b/src/s2.py @@ -1,238 +1,67 @@ from __future__ import annotations -from pathlib import Path -from typing import Generator -import os + import argparse +import os from functools import partial from multiprocessing import Pool +from pathlib import Path -from rdflib import Graph, Namespace, Literal, URIRef, RDF, RDFS, XSD +from rdflib import RDF, RDFS, XSD, Graph, Literal from rdflib.namespace._GEO import GEO +from s2geometry import (S2Cell, S2CellId, S2LatLng, S2Loop, S2Point, S2Polygon, + S2Polyline, S2RegionCoverer) +from shapely.geometry import Polygon -from shapely.geometry import Point, LinearRing, Polygon, MultiPolygon, LineString -from shapely.geometry.polygon import signed_area -from s2geometry import S2CellId, S2RegionCoverer, S2Point, S2LatLng, S2Loop, S2Polyline, S2Polygon, S2Cell - -from lib.geometric_feature import GeometricFeature -from lib.kwg_ont import KWGOnt - -TOLERANCE = 1e-2 - -_PREFIX = { - "kwgr": KWGOnt.KWGR, - "kwg-ont": KWGOnt._NS, - "geo": Namespace("http://www.opengis.net/ont/geosparql#"), - "sf": Namespace("http://www.opengis.net/ont/sf#"), - "rdf": RDF, - "rdfs": RDFS, - "xsd": XSD, -} - +from lib.integrator import Integrator +from lib.kwg_ont import KWGOnt, generate_cell_iri, namespace_prefix -def create_cell_iri(cell_id: S2CellId) -> URIRef: - """ - Creates an IRI for an individual cell, with a KnowWhereGraph domain - - Args: - cell_id: The ID of the s2 cell - Returns: - A URI of the s2 cell - """ - level = cell_id.level() - id_str = cell_id.id() - return KWGOnt.KWGR[f"{'s2.level'}{level}.{id_str}"] - -def s2_from_coords( - geometry: tuple | Point | LinearRing | LineString | Polygon | MultiPolygon -) -> S2Point | S2Loop | S2Polyline | S2Polygon: - """ - Returns a corresponding S2 object whose vertices - are obtained from the coordinates of the given geometry. - Note that the returned object lives on the sphere and any - linear edges are replaced by geodesic curves. As such, the resulting - S2 object is an APPROXIMATION of the given geometry living on - the sphere, and the approximation is worse when vertices are farther apart - (because geodesics will curve more in that case). - Args: - geometry (tuple | Point | LinearRing | LineString | Polygon | MultiPolygon): a geometry - Returns: - S2Point | S2Loop | S2Polyline | S2Polygon: an S2 geometric object +def write_to_rdf(cell_id_int: int, out_path: str, rdf_format: str) -> None: """ - if isinstance(geometry, tuple): - return S2LatLng.FromDegrees(*geometry[::-1]).ToPoint() - elif isinstance(geometry, Point): - return S2LatLng.FromDegrees(*geometry.coords[0][::-1]).ToPoint() - elif isinstance(geometry, LinearRing): - s2_loop = S2Loop() - s2_loop.Init(list(map(s2_from_coords, list(geometry.coords)[:-1]))) - return s2_loop - elif isinstance(geometry, LineString): - polyline = S2Polyline() - polyline.InitFromS2Points(list(map(s2_from_coords, geometry.coords))) - return polyline - elif isinstance(geometry, (Polygon, MultiPolygon)): - loops = map(s2_from_coords, map(orient, boundaries(geometry))) - s2_polygon = S2Polygon() - s2_polygon.InitNested(list(loops)) - return s2_polygon - - -def s2_approximation( - geometry: S2Point | LinearRing | LineString | Polygon | MultiPolygon, - tolerance: float = TOLERANCE -) -> S2Point | S2Loop | S2Polyline | S2Polygon: - """ - Returns a corresponding S2 object that approximates the given - Shapely object to a certain value of tolerance. Precisely, a given - Shapely geometry is first segmentized, meaning extra vertices are added - so that the width between any two adjacent vertices never exceeds the - tolerance, and then these new coordinates are used as the vertices - of an S2 object (see s2_from_coords). - Args: - geometry (LinearRing | LineString | Polygon | MultiPolygon): a Shapely geometry - tolerance (float, optional): a maximal segment width Defaults to 1e-2. - Returns: - S2Point | S2Loop | S2Polyline | S2Polygon: an S2 geometry object - """ - return s2_from_coords(geometry.segmentize(tolerance)) - - -def orient( - geometry: LinearRing | Polygon | MultiPolygon, - sign: float = 1.0 -) -> LinearRing | Polygon | MultiPolygon: + Writes a s2 cell to disk + + Args + cell_id_int: ID of the cell being written + out_path: The location where the cell will be written + rdf_format: Format of the RDF. Depends on the formats rdflib supports + returns + None """ - Returns a copy of the geometry with the specified orientation - Args: - geometry (LinearRing | Polygon | MultiPolygon): a ring or (multi)polygon - sign (float, optional): an orientation. Defaults to 1.0. - Returns: - LinearRing | Polygon | MultiPolygon: a copy of the geometry with specified orientation - """ - sign = float(sign) - if isinstance(geometry, LinearRing): - return geometry if signed_area(geometry) / sign >= 0 else geometry.reverse() - elif isinstance(geometry, Polygon): - exterior = orient(geometry.exterior, sign) - oppositely_orient = partial(orient, sign=-sign) - interiors = list(map(oppositely_orient, geometry.interiors)) - return Polygon(exterior, interiors) - elif isinstance(geometry, MultiPolygon): - orient_with_sign = partial(orient, sign=sign) - return MultiPolygon(list(map(orient_with_sign, geometry.geoms))) - - -def boundaries( - geometry: Polygon | MultiPolygon, -) -> Generator[LinearRing, None, None]: - """ - Yields the boundary rings of a geometry with boundaries - Args: - geometry (Polygon | MultiPolygon): a shapely geometry with boundaries - Yields: - Generator[LinearRing, None, None]: a generator through the boundary rings - """ - if isinstance(geometry, Polygon): - polygons = [geometry] - elif isinstance(geometry, MultiPolygon): - polygons = geometry.geoms - for polygon in polygons: - yield polygon.exterior - for interior in polygon.interiors: - yield interior - - -def covering( - geometry: Polygon | MultiPolygon, - coverer: S2RegionCoverer, - tolerance: float = TOLERANCE -) -> list[S2CellId]: - """Returns a list of s2 cell IDs appearing in a homogeneous - covering of a 2-dimensional geometry by level 13 cells to a - certain value of tolerance - Args: - polygon (Polygon): a 2 dimensional geometry - tolerance (float, optional): maximal segment width. Defaults to 1e-2. - Returns: - list[S2CellId]: a list of cell IDs in a level 13 covering - """ - s2_obj = s2_approximation(geometry, tolerance) - covering = coverer.GetCovering(s2_obj) - return covering - - -def yield_file_paths(input_dir: str) -> Generator[str, None, None]: - """yields those file_paths in input_dir except for .DS_Store, - which is a file created by walking - - Args: - input_dir (str): the name of the directory hosting graphical data - - Yields: - Generator[str, None, None]: a generator of file paths - """ - for (path, _, files) in os.walk(input_dir): - for file in files: - if not file == ".DS_Store": - file_path = os.path.join(path, file) - yield file_path - - -def yield_geometric_features(path: str) -> Generator[GeometricFeature, None, None]: - if os.path.isfile(path): - graph = Graph() - with open(path, 'r') as read_stream: - graph.parse(read_stream) - result = graph.query(""" - PREFIX geo: - SELECT ?feature_iri ?wkt - WHERE { - ?feature_iri geo:hasGeometry ?geometry . - ?geometry geo:asWKT ?wkt . - } - """) - for query_solution in result: - geometric_feature = GeometricFeature(query_solution) - yield geometric_feature - elif os.path.isdir(path): - for file_path in yield_file_paths(path): - for feature in yield_geometric_features(file_path): - yield feature - - -def get_parents(cell_ids: list[S2CellId]) -> list[S2CellId]: - if cell_ids[0].level() == 0: - return - parents = set() - for cell_id in cell_ids: - parents.add(cell_id.parent()) - return list(parents) - - -def write_to_rdf(cell_id_int: int, output_path: str) -> None: cell_id = S2CellId(cell_id_int) graph = graphify(cell_id) if graph: - for prefix in _PREFIX: - graph.bind(prefix, _PREFIX[prefix]) - file_name = str(cell_id.id()) + ".ttl" - destination = os.path.join(output_path, file_name) - graph.serialize(destination=destination, format="ttl") + for pfx in namespace_prefix: + print(pfx) + graph.bind(pfx, namespace_prefix[pfx]) + file_extensions = { + "ttl": ".ttl", + "turtle": ".ttl", + "xml": ".xml", + "nq": ".nq", + "n3": "n3", + "nt": ".nt", + "trix": ".trix", + "trig": ".trig", + "nquads": ".nq", + "json-ld": ".jsonld", + } + file_name = str(cell_id.id()) + file_extensions[rdf_format] + destination = os.path.join(out_path, file_name) + graph.serialize(destination=destination, format=rdf_format) def graphify(cell_id: S2CellId) -> Graph: graph = Graph() - level = cell_id.level() + cell_level = cell_id.level() id_int = cell_id.id() - cell_iri = create_cell_iri(cell_id) + cell_iri = generate_cell_iri(cell_id) p = RDF.type - o = KWGOnt[f"S2Cell_Level{level}"] + o = KWGOnt[f"S2Cell_Level{cell_level}"] graph.add((cell_iri, p, o)) - label = f"S2 Cell at level {level} with ID {id_int}" + label = f"S2 Cell at level {cell_level} with ID {id_int}" p = RDFS.label o = Literal(label, datatype=XSD.string) graph.add((cell_iri, p, o)) @@ -241,34 +70,30 @@ def graphify(cell_id: S2CellId) -> Graph: o = Literal(id_int, datatype=XSD.integer) graph.add((cell_iri, p, o)) - # p = KWG_ONT.cellLevel - # o = Literal(level, datatype=XSD.integer) - # graph.add((cell_iri, p, o)) - cell = S2Cell(cell_id) area_on_sphere = cell.ApproxArea() - area_on_earth = area_on_sphere * (6.3781e6) * (6.3781e6) + area_on_earth = area_on_sphere * 6.3781e6 * 6.3781e6 - p = _PREFIX["geo"]["hasMetricArea"] + p = namespace_prefix["geo"]["hasMetricArea"] o = Literal(area_on_earth, datatype=XSD.float) graph.add((cell_iri, p, o)) geometry = get_vertex_polygon(cell=cell) - geometry_iri = KWGOnt.KWGR[f"geometry.polygon.s2.level{level}.{id_int}"] + geometry_iri = KWGOnt.KWGR[f"geometry.polygon.s2.level{cell_level}.{id_int}"] p = GEO.hasGeometry graph.add((cell_iri, p, geometry_iri)) - p = _PREFIX["geo"]["hasDefaultGeometry"] + p = namespace_prefix["geo"]["hasDefaultGeometry"] graph.add((cell_iri, p, geometry_iri)) p = RDF.type o = GEO.Geometry graph.add((geometry_iri, p, o)) - o = _PREFIX["sf"]["Polygon"] + o = namespace_prefix["sf"]["Polygon"] graph.add((geometry_iri, p, o)) - label = f"Geometry of the polygon formed from the vertices of the S2 Cell at level {level} with ID {id_int}" + label = f"Geometry of the polygon formed from the vertices of the S2 Cell at level {cell_level} with ID {id_int}" p = RDFS.label o = Literal(label, datatype=XSD.string) graph.add((geometry_iri, p, o)) @@ -278,16 +103,16 @@ def graphify(cell_id: S2CellId) -> Graph: o = Literal(wkt, datatype=GEO.wktLiteral) graph.add((geometry_iri, p, o)) - neighbors = cell_id.GetAllNeighbors(level) + neighbors = cell_id.GetAllNeighbors(cell_level) for neighbor in neighbors: p = KWGOnt.sfTouches - neighbor_iri = create_cell_iri(neighbor) + neighbor_iri = generate_cell_iri(neighbor) graph.add((cell_iri, p, neighbor_iri)) graph.add((neighbor_iri, p, cell_iri)) - if level > 0: + if cell_level > 0: parent = cell_id.parent() - parent_iri = create_cell_iri(parent) + parent_iri = generate_cell_iri(parent) p = KWGOnt.sfWithin graph.add((cell_iri, p, parent_iri)) @@ -300,14 +125,40 @@ def graphify(cell_id: S2CellId) -> Graph: def get_vertex_polygon(cell: S2Cell) -> Polygon: vertices = map(cell.GetVertex, range(4)) lat_lngs = [S2LatLng(vertex) for vertex in vertices] - coords = [[lat_lng.lng().degrees(), lat_lng.lat().degrees()] for lat_lng in lat_lngs] + coords = [ + [lat_lng.lng().degrees(), lat_lng.lat().degrees()] for lat_lng in lat_lngs + ] vertex_polygon = Polygon(coords) return vertex_polygon if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("level", type=int, help="Level at which the s2 cells are generated for") + parser.add_argument( + "--level", type=int, help="Level at which the s2 cells are generated for" + ) + parser.add_argument( + "--format", + help="The format to write the RDF in. Options are xml, n3, turtle, nt, pretty-xml, trix, trig, nquads, " + "json-ld, hext", + type=str, + nargs="?", + default="ttl", + ) + parser.add_argument( + "--ni", + help="When used, s2 integration is disabled", + nargs="?", + const=1, + type=int, + ) + parser.add_argument( + "--compressed", + help="use the S2 hierarchy to write a compressed collection of relations at various levels", + type=bool, + nargs="?", + default=True, + ) args = parser.parse_args() level = args.level @@ -322,7 +173,9 @@ def get_vertex_polygon(cell: S2Cell) -> Polygon: current_id = beginning_id cell_id_integers = [] while True: - id_as_integer = current_id.id() # note that current_id is an instance of S2CellId + id_as_integer = ( + current_id.id() + ) # note that current_id is an instance of S2CellId print(id_as_integer) cell_id_integers.append(id_as_integer) current_id = current_id.next() @@ -330,10 +183,17 @@ def get_vertex_polygon(cell: S2Cell) -> Polygon: break parents = set() print(f"Writing data for cells at level {level}...") - write = partial(write_to_rdf, output_path=output_path) + write = partial(write_to_rdf, out_path=output_path, rdf_format=args.format) with Pool() as pool: pool.map(write, cell_id_integers) if level > 0: - parents = set(S2CellId(cell_id_integer).parent() for cell_id_integer in cell_id_integers) + parents = set( + S2CellId(cell_id_integer).parent() for cell_id_integer in cell_id_integers + ) cell_id_integers = [parent.id() for parent in parents] - level -= 1 \ No newline at end of file + level -= 1 + + # Handle integration + if not args.ni: + print("ni not present") + Integrator(args.compressed)