Skip to content

Commit

Permalink
[Core-257] Catalog: Don't pass default image cutline for single image…
Browse files Browse the repository at this point in the history
…s (#12400)

GitOrigin-RevId: b283618892f78f0db5eff8282f25cb59a482c98a
  • Loading branch information
stephencpope authored and Descartes Labs Build committed Jan 18, 2024
1 parent c413b71 commit acd67bc
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 150 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ Changelog
- The `Blob.get_or_create` method didn't allow supplying `storage_type`, `namespace`, or `name` parameters.
Now it works as expected, either returning a saved Blob from the Catalog, or an unsaved blob that
you can use to upload and save its data.
- Image methods `ndarray` and `download` no longer pass the image's default context geometry as a cutline.
This is to avoid problems when trying to raster a complete single image in its native CRS and resolution
where imperfect geometries (due to a simplistic projection to EPSG:4326) can cause some boundary pixels
to be masked. When passing in an explicit `GeoContext` to these methods, consider whether any cutline
geometry is required or not, to avoid these issues.

### Compute

Expand All @@ -54,6 +59,7 @@ Changelog
- *Breaking Change*: The deprecated `Scenes` client API has been removed.
- *Breaking Change*: The deprecated `Metadata` client API has been removed.
- The minimum required version of `urllib3` has been bumped to 1.26.18 to address a security vulnerability.
- The minimum required version of `shapely` has been bumped to 2.0.0 to address thread safety issues.

## [2.1.2] - 2023-10-31

Expand Down
10 changes: 8 additions & 2 deletions descarteslabs/core/catalog/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,7 +1089,10 @@ def ndarray(
If the Descartes Labs Platform is given invalid parameters
"""
if geocontext is None:
geocontext = self.geocontext
# Lose the image's geometry (which is only used as a cutline),
# as it might cause some unexpected clipping when rasterizing, due
# to imperfect simplified geometries used when native image CRS is not WGS84.
geocontext = self.geocontext.assign(geometry=None)
if crs is not None or resolution is not None:
try:
params = {}
Expand Down Expand Up @@ -1334,7 +1337,10 @@ def download(
If the Descartes Labs Platform is given invalid parameters
"""
if geocontext is None:
geocontext = self.geocontext
# Lose the image's geometry (which is only used as a cutline),
# as it might cause some unexpected clipping when rasterizing, due
# to imperfect simplified geometries used when native image CRS is not WGS84.
geocontext = self.geocontext.assign(geometry=None)
if crs is not None or resolution is not None:
try:
params = {}
Expand Down
21 changes: 21 additions & 0 deletions descarteslabs/core/catalog/tests/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import datetime
import functools
import json
import os.path
import textwrap
Expand Down Expand Up @@ -971,6 +972,23 @@ def test_load_one_band(self):
with pytest.raises(TypeError):
image.ndarray("blue", invalid_argument=True)

@patch.object(Image, "get", _image_get)
@patch.object(
image_module,
"cached_bands_by_product",
_cached_bands_by_product,
)
@patch.object(image_module.Raster, "ndarray")
def test_ndarray_geocontext(self, mock_raster):
mock_raster.side_effect = functools.partial(_raster_ndarray, None)
image = Image.get("landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1")
arr, info = image.ndarray("red", resolution=1000, raster_info=True)

assert mock_raster.called_once()
assert image.geocontext.geometry is not None
print(mock_raster.call_args)
assert mock_raster.call_args[1]["cutline"] is None

@patch.object(Image, "get", _image_get)
@patch.object(
image_module,
Expand Down Expand Up @@ -1197,6 +1215,9 @@ def test_download(self, mock_download):
image = Image.get("landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1")
image.download("red green blue", resolution=120.0)
mock_download.assert_called_once()
# verify we nulled out the cutline
assert image.geocontext.geometry is not None
assert mock_download.call_args[1]["geocontext"].geometry is None

@patch.object(Image, "get", _image_get)
@patch.object(
Expand Down
43 changes: 9 additions & 34 deletions descarteslabs/core/common/geo/geocontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


import copy
import threading
import warnings
import math

Expand Down Expand Up @@ -55,10 +54,7 @@ class GeoContext(object):
GeoContexts are immutable.
"""

__slots__ = (
"_geometry_lock_",
"_all_touched",
)
__slots__ = ("_all_touched",)
# slots *suffixed* with an underscore will be ignored by `__eq__` and `__repr__`.
# a double-underscore prefix would be more conventional, but that actually breaks as a slot name.

Expand All @@ -74,27 +70,18 @@ def __init__(self, all_touched=False):
situations will return no result at all (i.e. entirely masked).
"""

# Shapely objects are not thread-safe, due to the way the underlying GEOS library is used.
# Specifically, accessing `__geo_interface__` on the same geometry across threads
# can cause bizzare exceptions. This makes `raster_params` and `__geo_interface__` thread-unsafe.
# Subclasses of GeoContext can use this lock to ensure `self._geometry.__geo_interface__`
# is accessed from at most 1 thread at a time.
self._geometry_lock_ = threading.Lock()
self._all_touched = bool(all_touched)

def __getstate__(self):
# Lock objects shouldn't be pickled or deepcopied, but recursively get all the other slots
return {
attr: getattr(self, attr)
for s in self.__class__.__mro__
for attr in getattr(s, "__slots__", [])
if not attr.endswith("_")
for attr in getattr(s, "__slots__", tuple())
}

def __setstate__(self, state):
for attr, val in state.items():
setattr(self, attr, val)
self._geometry_lock_ = threading.Lock()

@property
def all_touched(self):
Expand Down Expand Up @@ -130,8 +117,6 @@ def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
for attr in self.__slots__:
if attr.endswith("_"):
continue
if getattr(self, attr) != getattr(other, attr):
return False
return True
Expand All @@ -142,8 +127,7 @@ def __repr__(self):
props = delim.join(
"{}={}".format(attr.lstrip("_"), reprlib.repr(getattr(self, attr)))
for s in self.__class__.__mro__
for attr in getattr(s, "__slots__", [])
if not attr.endswith("_")
for attr in getattr(s, "__slots__", tuple())
)
return "{}({})".format(classname, props)

Expand Down Expand Up @@ -251,10 +235,10 @@ def __init__(

super(AOI, self).__init__(all_touched=all_touched)

# If no bounds were given, use the bounds of the geometry
if bounds is None and geometry is not None:
bounds = "update"

# If no bounds were given, use the bounds of the geometry
self._assign(
geometry,
resolution,
Expand Down Expand Up @@ -380,12 +364,9 @@ def raster_params(self):
# align_pixels will always be True or False based on resolution
# all_touched doesn't affect the spatial equivalence

with self._geometry_lock_:
# see comment in `GeoContext.__init__` for why we need to prevent
# parallel access to `self._geometry.__geo_interface__`
cutline = (
self._geometry.__geo_interface__ if self._geometry is not None else None
)
cutline = (
self._geometry.__geo_interface__ if self._geometry is not None else None
)

dimensions = (
(self._shape[1], self._shape[0]) if self._shape is not None else None
Expand Down Expand Up @@ -417,10 +398,7 @@ def __geo_interface__(self):
"""

if self._geometry is not None:
with self._geometry_lock_:
# see comment in `GeoContext.__init__` for why we need to prevent
# parallel access to `self._geometry.__geo_interface__`
return self._geometry.__geo_interface__
return self._geometry.__geo_interface__
elif self._bounds is not None and is_wgs84_crs(self._bounds_crs):
return polygon_from_bounds(self._bounds)
else:
Expand Down Expand Up @@ -1315,10 +1293,7 @@ def wkt(self):
def __geo_interface__(self):
"""dict: :py:attr:`~descarteslabs.common.geo.geocontext.DLTile.geometry` as a GeoJSON Polygon"""

with self._geometry_lock_:
# see comment in `GeoContext.__init__` for why we need to prevent
# parallel access to `self._geometry.__geo_interface__`
return self._geometry.__geo_interface__
return self._geometry.__geo_interface__


class XYZTile(GeoContext):
Expand Down
113 changes: 0 additions & 113 deletions descarteslabs/core/common/geo/tests/test_geocontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@

import pytest
import unittest
import multiprocessing
import platform
import concurrent.futures
import copy
import warnings

Expand All @@ -33,10 +30,6 @@
from ..geocontext import EARTH_CIRCUMFERENCE_WGS84


if platform.system() == "Darwin":
multiprocessing.set_start_method("fork")


class SimpleContext(geocontext.GeoContext):
__slots__ = ("foo", "_bar")

Expand Down Expand Up @@ -68,7 +61,6 @@ def test_eq(self):
def test_deepcopy(self):
simple = SimpleContext(1, False)
simple_copy = copy.deepcopy(simple)
assert simple._geometry_lock_ is not simple_copy._geometry_lock_
assert simple == simple_copy


Expand Down Expand Up @@ -483,108 +475,3 @@ def test_raster_params(self):
def test_children_parent(self):
tile = geocontext.XYZTile(1, 1, 2)
assert tile == tile.children()[0].parent()


# can't use the word `test` in the function name otherwise nose tries to run it...
def run_threadsafe_experiment(geoctx_factory, property, n=80000):
"In a subprocess, test whether parallel access to a property on a GeoContext fails (due to Shapely thread-unsafety)"
conn_ours, conn_theirs = multiprocessing.Pipe(duplex=False)

# Run actual test in a separate process, because unsafe use of Shapely objects
# across threads can occasionally cause segfaults, so we want to check the exit
# code of the process doing the testing.
def threadsafe_test(geoctx_factory, property, conn, n):
ctx = geoctx_factory()
with concurrent.futures.ThreadPoolExecutor(
max_workers=multiprocessing.cpu_count()
) as executor:
futures = [
executor.submit(lambda: getattr(ctx, property)) for i in range(n)
]

errors = []
for future in concurrent.futures.as_completed(futures):
if future.exception() is not None:
errors.append("exception: {}".format(future.exception()))
conn.send(errors)

p = multiprocessing.Process(
target=threadsafe_test, args=(geoctx_factory, property, conn_theirs, n)
)
p.start()
p.join()
if p.exitcode < 0:
errors = ["failed with exit code {}".format(p.exitcode)]
else:
errors = conn_ours.recv()
return errors


@unittest.skip(
"Slow test. Un-skip this and run manually if touching any code related to `_geometry_lock_`!"
)
class TestShapelyThreadSafe(unittest.TestCase):
@staticmethod
def aoi_factory():
return geocontext.AOI(
{
"coordinates": [
[
[-93.52300099792355, 41.241436141055345],
[-93.7138666, 40.703737],
[-94.37053769704536, 40.83098709945576],
[-94.2036617, 41.3717716],
[-93.52300099792355, 41.241436141055345],
]
],
"type": "Polygon",
},
crs="EPSG:3857",
resolution=10,
)

@staticmethod
def dltile_factory():
return geocontext.DLTile(
{
"geometry": {
"coordinates": [
[
[-94.64171754779824, 40.9202359006794],
[-92.81755164322226, 40.93177944075989],
[-92.81360932958779, 42.31528732533928],
[-94.6771717075502, 42.303172487087394],
[-94.64171754779824, 40.9202359006794],
]
],
"type": "Polygon",
},
"properties": {
"cs_code": "EPSG:32615",
"key": "128:16:960.0:15:-1:37",
"outputBounds": [361760.0, 4531200.0, 515360.0, 4684800.0],
"pad": 16,
"resolution": 960.0,
"ti": -1,
"tilesize": 128,
"tj": 37,
"zone": 15,
"geotrans": [361760.0, 960.0, 0, 4684800.0, 0, -960.0],
"proj4": "+proj=utm +zone=15 +datum=WGS84 +units=m +no_defs ",
"wkt": 'PROJCS["WGS 84 / UTM zone 15N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-93],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32615"]]', # noqa
},
"type": "Feature",
}
)

def test_aoi_raster_params_threadsafe(self):
errors = run_threadsafe_experiment(self.aoi_factory, "raster_params")
assert errors == []

def test_aoi_geo_interface_threadsafe(self):
errors = run_threadsafe_experiment(self.aoi_factory, "__geo_interface__")
assert errors == []

def test_dltile_geo_interface_threadsafe(self):
errors = run_threadsafe_experiment(self.dltile_factory, "__geo_interface__")
assert errors == []
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def do_setup():
"requests>=2.28.1,<3",
# It is not obvious but dynaconf requires pkg_resources from setuptools.
"setuptools>=65.6.3",
"shapely>=1.8.1",
"shapely>=2.0.0",
"strenum>=0.4.8",
"tifffile==2023.4.12;python_version=='3.8'",
"tifffile>=2023.9.26;python_version>='3.9'",
Expand Down

0 comments on commit acd67bc

Please sign in to comment.