From 7dddd15af9af283edcbf5dc02a0b4271afe2596d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 16 Jan 2021 18:29:22 +0100 Subject: [PATCH 01/18] Support spatialpandas DaskGeoDataFrame --- holoviews/core/data/spatialpandas.py | 63 ++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 9426811071..8015471055 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -30,20 +30,51 @@ def loaded(cls): def applies(cls, obj): if not cls.loaded(): return False - from spatialpandas import GeoDataFrame, GeoSeries - is_sdf = isinstance(obj, (GeoDataFrame, GeoSeries)) - if 'geopandas' in sys.modules and not 'geoviews' in sys.modules: + is_sdf = isinstance(obj, cls.types()) + if 'geopandas' in sys.modules and not 'geoviews' in sys.modules and geopandas: import geopandas as gpd is_sdf |= isinstance(obj, (gpd.GeoDataFrame, gpd.GeoSeries)) return is_sdf @classmethod - def geo_column(cls, data): + def types(cls): + from spatialpandas import GeoDataFrame, GeoSeries + stypes = (GeoDataFrame, GeoSeries) + if 'spatialpandas.dask' in sys.modules: + from spatialpandas.dask import DaskGeoDataFrame, DaskGeoSeries + stypes = stypes + (DaskGeoDataFrame, DaskGeoSeries) + return stypes + + @classmethod + def series_types(cls): from spatialpandas import GeoSeries + if 'spatialpandas.dask' in sys.modules: + from spatialpandas.dask import DaskGeoSeries + return (GeoSeries, DaskGeoSeries) + return (GeoSeries,) + + @classmethod + def frame_types(cls): + from spatialpandas import GeoDataFrame + if 'spatialpandas.dask' in sys.modules: + from spatialpandas.dask import DaskGeoDataFrame + return (GeoDataFrame, DaskGeoDataFrame) + return (GeoDataFrame,) + + @classmethod + def dask_types(cls): + if 'spatialpandas.dask' in sys.modules: + from spatialpandas.dask import DaskGeoDataFrame, DaskGeoSeries + return (DaskGeoSeries, DaskGeoDataFrame) + return () + + @classmethod + def geo_column(cls, data): col = 'geometry' - if col in data and isinstance(data[col], GeoSeries): + stypes = cls.series_types() + if col in data and isinstance(data[col], stypes): return col - cols = [c for c in data.columns if isinstance(data[c], GeoSeries)] + cols = [c for c in data.columns if isinstance(data[c], stypes)] if not cols: raise ValueError('No geometry column found in spatialpandas.GeoDataFrame, ' 'use the PandasInterface instead.') @@ -52,7 +83,6 @@ def geo_column(cls, data): @classmethod def init(cls, eltype, data, kdims, vdims): import pandas as pd - from spatialpandas import GeoDataFrame, GeoSeries if kdims is None: kdims = eltype.kdims @@ -60,7 +90,7 @@ def init(cls, eltype, data, kdims, vdims): if vdims is None: vdims = eltype.vdims - if isinstance(data, GeoSeries): + if isinstance(data, cls.series_types()): data = data.to_frame() if 'geopandas' in sys.modules: @@ -74,7 +104,7 @@ def init(cls, eltype, data, kdims, vdims): data = from_shapely(data) if isinstance(data, list): data = from_multi(eltype, data, kdims, vdims) - elif not isinstance(data, GeoDataFrame): + elif not isinstance(data, cls.frame_types()): raise ValueError("SpatialPandasInterface only support spatialpandas DataFrames.") elif 'geometry' not in data: cls.geo_column(data) @@ -116,7 +146,7 @@ def dtype(cls, dataset, dimension): dim = dataset.get_dimension(dimension, strict=True) if dim in cls.geom_dims(dataset): col = cls.geo_column(dataset.data) - return dataset.data[col].values.numpy_dtype + return dataset.data[col].dtype.subtype return dataset.data[dim.name].dtype @classmethod @@ -203,13 +233,7 @@ def geom_dims(cls, dataset): @classmethod def dimension_type(cls, dataset, dim): dim = dataset.get_dimension(dim) - col = cls.geo_column(dataset.data) - if dim in cls.geom_dims(dataset) and len(dataset.data): - arr = geom_to_array(dataset.data[col].iloc[0]) - ds = dataset.clone(arr, datatype=cls.subtypes, vdims=[]) - return ds.interface.dimension_type(ds, dim) - else: - return cls.dtype(dataset, dim).type + return cls.dtype(dataset, dim).type @classmethod def isscalar(cls, dataset, dim, per_geom=False): @@ -392,7 +416,10 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep geom_type = cls.geom_type(dataset) index = geom_dims.index(dimension) - return geom_array_to_array(data[geom_col].values, index, expanded, geom_type) + geom_series = data[geom_col] + if compute and isinstance(geom_series, cls.dask_types()): + geom_series = geom_series.compute() + return geom_array_to_array(geom_series.values, index, expanded, geom_type) @classmethod def split(cls, dataset, start, end, datatype, **kwargs): From f7b61966373192656d05bb65a89c5dafcc1adfd7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 16 Jan 2021 18:33:38 +0100 Subject: [PATCH 02/18] Allow using spatialpandas for Point datashading --- holoviews/operation/datashader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index ec5d17cc15..f975b2d8dc 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -1429,8 +1429,7 @@ class rasterize(AggregationOperation): _transforms = [(Image, regrid), (Polygons, geometry_rasterize), - (lambda x: (isinstance(x, Path) and - x.interface.datatype == 'spatialpandas'), + (lambda x: (x.interface.datatype == 'spatialpandas'), geometry_rasterize), (TriMesh, trimesh_rasterize), (QuadMesh, quadmesh_rasterize), From 0328053c65608c42ed7e0ef20910c678a24864da Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 16 Jan 2021 18:46:56 +0100 Subject: [PATCH 03/18] Implement more methods --- holoviews/core/data/spatialpandas.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 8015471055..f226a63bd0 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -9,6 +9,7 @@ from ..dimension import dimension_name from ..util import isscalar, unique_iterator, pd, unique_array +from .dask import DaskInterface from .interface import DataError, Interface from .multipath import MultiInterface, ensure_ring from .pandas import PandasInterface @@ -261,8 +262,9 @@ def range(cls, dataset, dim): return (bounds[0], bounds[2]) else: return (bounds[1], bounds[3]) - else: - return Interface.range(dataset, dim) + elif isinstance(dataset.data, cls.dask_types()): + return DaskInterface.range(dataset, dim) + return PandasInterface.range(dataset, dim) @classmethod def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): @@ -270,6 +272,8 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): if any(d in geo_dims for d in dimensions): raise DataError("SpatialPandasInterface does not allow grouping " "by geometry dimension.", cls) + elif isinstance(dataset.data, cls.dask_types()): + return DaskInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) @classmethod From 82c5c0834704a0a3ba975f7ff2ca576c77759aae Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 16 Jan 2021 19:25:24 +0100 Subject: [PATCH 04/18] Fix flakes --- holoviews/core/data/spatialpandas.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index f226a63bd0..11cd52dca4 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -32,7 +32,7 @@ def applies(cls, obj): if not cls.loaded(): return False is_sdf = isinstance(obj, cls.types()) - if 'geopandas' in sys.modules and not 'geoviews' in sys.modules and geopandas: + if 'geopandas' in sys.modules and not 'geoviews' in sys.modules: import geopandas as gpd is_sdf |= isinstance(obj, (gpd.GeoDataFrame, gpd.GeoSeries)) return is_sdf @@ -84,6 +84,7 @@ def geo_column(cls, data): @classmethod def init(cls, eltype, data, kdims, vdims): import pandas as pd + from spatialpandas import GeoDataFrame if kdims is None: kdims = eltype.kdims From 6b8a62d601d8a3b76d9ccaa92b06d607d660dde7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 16 Jan 2021 19:41:43 +0100 Subject: [PATCH 05/18] Fix Spatialpandas.types --- holoviews/core/data/spatialpandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 11cd52dca4..a7e14a3db9 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -31,14 +31,14 @@ def loaded(cls): def applies(cls, obj): if not cls.loaded(): return False - is_sdf = isinstance(obj, cls.types()) + is_sdf = isinstance(obj, cls.data_types()) if 'geopandas' in sys.modules and not 'geoviews' in sys.modules: import geopandas as gpd is_sdf |= isinstance(obj, (gpd.GeoDataFrame, gpd.GeoSeries)) return is_sdf @classmethod - def types(cls): + def data_types(cls): from spatialpandas import GeoDataFrame, GeoSeries stypes = (GeoDataFrame, GeoSeries) if 'spatialpandas.dask' in sys.modules: From 89a4cea5f79f7363b93bdad1775b25338e1ce57b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 18 Jan 2021 18:13:03 +0100 Subject: [PATCH 06/18] Speed up spatialpandas length --- holoviews/core/data/spatialpandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index a7e14a3db9..20c7c11d4b 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -319,7 +319,7 @@ def length(cls, dataset): @classmethod def nonzero(cls, dataset): - return bool(cls.length(dataset)) + return bool(len(dataset.data.head(1))) @classmethod def redim(cls, dataset, dimensions): From 70a328559166ebf2b386e251076cb5b6f5876919 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 18 Jan 2021 18:13:19 +0100 Subject: [PATCH 07/18] Give spatialpandas precedence --- holoviews/core/data/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index d4b5377e05..dc7d32a99c 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -41,8 +41,8 @@ default_datatype = 'dataframe' -datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'dask', - 'cuDF', 'spatialpandas', 'array', 'multitabular', 'ibis'] +datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'spatialpandas', + 'dask', 'cuDF', 'array', 'multitabular', 'ibis'] def concat(datasets, datatype=None): From cac870396d7226296b489b0d2bbc5fad44a962f5 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 19 Jan 2021 12:24:34 +0100 Subject: [PATCH 08/18] Handle Points/Paths using spatialpandas datashader path --- holoviews/operation/datashader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index f975b2d8dc..5b5f28db1a 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -1429,7 +1429,8 @@ class rasterize(AggregationOperation): _transforms = [(Image, regrid), (Polygons, geometry_rasterize), - (lambda x: (x.interface.datatype == 'spatialpandas'), + (lambda x: (isinstance(x, Path, Points) and + x.interface.datatype == 'spatialpandas'), geometry_rasterize), (TriMesh, trimesh_rasterize), (QuadMesh, quadmesh_rasterize), From 8888966fc1ba21d39b4a184bff570a6d900a544f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 19 Jan 2021 12:46:27 +0100 Subject: [PATCH 09/18] Fix typo --- holoviews/operation/datashader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index 5b5f28db1a..41c5562675 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -1429,7 +1429,7 @@ class rasterize(AggregationOperation): _transforms = [(Image, regrid), (Polygons, geometry_rasterize), - (lambda x: (isinstance(x, Path, Points) and + (lambda x: (isinstance(x, (Path, Points)) and x.interface.datatype == 'spatialpandas'), geometry_rasterize), (TriMesh, trimesh_rasterize), From 2e7deeb626ecd2ea8560d8f0f68635a69bc4fb49 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jan 2021 16:55:40 +0100 Subject: [PATCH 10/18] Fix values if points --- holoviews/core/data/spatialpandas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 20c7c11d4b..4e915eb2a4 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -415,6 +415,8 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep if isgeom and keep_index: return data[geom_col] elif not isgeom: + if is_points: + return data[dimension.name].values return get_value_array(data, dimension, expanded, keep_index, geom_col, is_points) elif not len(data): return np.array([]) @@ -636,7 +638,7 @@ def get_value_array(data, dimension, expanded, keep_index, geom_col, all_scalar = True arrays, scalars = [], [] for i, geom in enumerate(data[geom_col]): - length = geom_length(geom) + length = 1 if is_points else geom_length(geom) val = column.iloc[i] scalar = isscalar(val) if scalar: From 4c4e72e904cfee760897d3de1c0937b5a0fd3ddb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 12:38:11 +0100 Subject: [PATCH 11/18] Split out DaskSpatialPandasInterface --- holoviews/core/data/__init__.py | 6 +- holoviews/core/data/spatialpandas.py | 59 ++++++--------- holoviews/core/data/spatialpandas_dask.py | 75 +++++++++++++++++++ .../tests/core/data/testspatialpandas.py | 16 +++- 4 files changed, 115 insertions(+), 41 deletions(-) create mode 100644 holoviews/core/data/spatialpandas_dask.py diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index dc7d32a99c..1288b36a46 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -36,13 +36,15 @@ from .multipath import MultiInterface # noqa (API import) from .image import ImageInterface # noqa (API import) from .pandas import PandasInterface # noqa (API import) -from .spatialpandas import SpatialPandasInterface # noqa (API import) +from .spatialpandas import SpatialPandasInterface # noqa (API import) +from .spatialpandas_dask import DaskSpatialPandasInterface # noqa (API import) from .xarray import XArrayInterface # noqa (API import) default_datatype = 'dataframe' datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'spatialpandas', - 'dask', 'cuDF', 'array', 'multitabular', 'ibis'] + 'dask', 'cuDF', 'array', 'multitabular', 'dask_spatialpandas', + 'ibis'] def concat(datasets, datatype=None): diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 4e915eb2a4..8c93abc4b3 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -6,6 +6,7 @@ from collections import defaultdict import numpy as np +import pandas as pd from ..dimension import dimension_name from ..util import isscalar, unique_iterator, pd, unique_array @@ -17,12 +18,14 @@ class SpatialPandasInterface(MultiInterface): - types = () + base_interface = PandasInterface datatype = 'spatialpandas' multi = True + types = () + @classmethod def loaded(cls): return 'spatialpandas' in sys.modules @@ -40,39 +43,23 @@ def applies(cls, obj): @classmethod def data_types(cls): from spatialpandas import GeoDataFrame, GeoSeries - stypes = (GeoDataFrame, GeoSeries) - if 'spatialpandas.dask' in sys.modules: - from spatialpandas.dask import DaskGeoDataFrame, DaskGeoSeries - stypes = stypes + (DaskGeoDataFrame, DaskGeoSeries) - return stypes + return (GeoDataFrame, GeoSeries) @classmethod - def series_types(cls): + def series_type(cls): from spatialpandas import GeoSeries - if 'spatialpandas.dask' in sys.modules: - from spatialpandas.dask import DaskGeoSeries - return (GeoSeries, DaskGeoSeries) - return (GeoSeries,) + return GeoSeries @classmethod - def frame_types(cls): + def frame_type(cls): from spatialpandas import GeoDataFrame - if 'spatialpandas.dask' in sys.modules: - from spatialpandas.dask import DaskGeoDataFrame - return (GeoDataFrame, DaskGeoDataFrame) - return (GeoDataFrame,) - - @classmethod - def dask_types(cls): - if 'spatialpandas.dask' in sys.modules: - from spatialpandas.dask import DaskGeoDataFrame, DaskGeoSeries - return (DaskGeoSeries, DaskGeoDataFrame) - return () + return GeoDataFrame @classmethod def geo_column(cls, data): col = 'geometry' - stypes = cls.series_types() + stypes = cls.series_type() + print(data[col], stypes) if col in data and isinstance(data[col], stypes): return col cols = [c for c in data.columns if isinstance(data[c], stypes)] @@ -83,7 +70,6 @@ def geo_column(cls, data): @classmethod def init(cls, eltype, data, kdims, vdims): - import pandas as pd from spatialpandas import GeoDataFrame if kdims is None: @@ -92,7 +78,7 @@ def init(cls, eltype, data, kdims, vdims): if vdims is None: vdims = eltype.vdims - if isinstance(data, cls.series_types()): + if isinstance(data, cls.series_type()): data = data.to_frame() if 'geopandas' in sys.modules: @@ -106,8 +92,8 @@ def init(cls, eltype, data, kdims, vdims): data = from_shapely(data) if isinstance(data, list): data = from_multi(eltype, data, kdims, vdims) - elif not isinstance(data, cls.frame_types()): - raise ValueError("SpatialPandasInterface only support spatialpandas DataFrames.") + elif not isinstance(data, cls.frame_type()): + raise ValueError("%s only support spatialpandas DataFrames." % cls.__name__) elif 'geometry' not in data: cls.geo_column(data) @@ -263,9 +249,8 @@ def range(cls, dataset, dim): return (bounds[0], bounds[2]) else: return (bounds[1], bounds[3]) - elif isinstance(dataset.data, cls.dask_types()): - return DaskInterface.range(dataset, dim) - return PandasInterface.range(dataset, dim) + else: + return cls.base_interface.range(dataset, dim) @classmethod def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): @@ -273,9 +258,7 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): if any(d in geo_dims for d in dimensions): raise DataError("SpatialPandasInterface does not allow grouping " "by geometry dimension.", cls) - elif isinstance(dataset.data, cls.dask_types()): - return DaskInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) - return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) + return cls.base_interface.groupby(dataset, dimensions, container_type, group_type, **kwargs) @classmethod def aggregate(cls, columns, dimensions, function, **kwargs): @@ -299,7 +282,7 @@ def sort(cls, dataset, by=[], reverse=False): if any(d in geo_dims for d in by): raise DataError("SpatialPandasInterface does not allow sorting " "by geometry dimension.", cls) - return PandasInterface.sort(dataset, by, reverse) + return cls.base_interface.sort(dataset, by, reverse) @classmethod def length(cls, dataset): @@ -308,7 +291,7 @@ def length(cls, dataset): column = dataset.data[col_name] geom_type = cls.geom_type(dataset) if not isinstance(column.dtype, MultiPointDtype) and geom_type != 'Point': - return PandasInterface.length(dataset) + return cls.base_interface.length(dataset) length = 0 for i, geom in enumerate(column): if isinstance(geom, Point): @@ -323,7 +306,7 @@ def nonzero(cls, dataset): @classmethod def redim(cls, dataset, dimensions): - return PandasInterface.redim(dataset, dimensions) + return cls.base_interface.redim(dataset, dimensions) @classmethod def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): @@ -424,7 +407,7 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep geom_type = cls.geom_type(dataset) index = geom_dims.index(dimension) geom_series = data[geom_col] - if compute and isinstance(geom_series, cls.dask_types()): + if compute and hasattr(geom_series, 'compute'): geom_series = geom_series.compute() return geom_array_to_array(geom_series.values, index, expanded, geom_type) diff --git a/holoviews/core/data/spatialpandas_dask.py b/holoviews/core/data/spatialpandas_dask.py new file mode 100644 index 0000000000..92d90233c3 --- /dev/null +++ b/holoviews/core/data/spatialpandas_dask.py @@ -0,0 +1,75 @@ +import sys + +import numpy as np + +from .dask import DaskInterface +from .interface import Interface +from .spatialpandas import SpatialPandasInterface + + +class DaskSpatialPandasInterface(SpatialPandasInterface): + + base_interface = DaskInterface + + datatype = 'dask_spatialpandas' + + @classmethod + def loaded(cls): + return 'spatialpandas.dask' in sys.modules + + @classmethod + def data_types(cls): + from spatialpandas.dask import DaskGeoDataFrame, DaskGeoSeries + return (DaskGeoDataFrame, DaskGeoSeries) + + @classmethod + def series_type(cls): + from spatialpandas.dask import DaskGeoSeries + return DaskGeoSeries + + @classmethod + def frame_type(cls): + from spatialpandas.dask import DaskGeoDataFrame + return DaskGeoDataFrame + + @classmethod + def init(cls, eltype, data, kdims, vdims): + import dask.dataframe as dd + data, dims, params = super(DaskSpatialPandasInterface, cls).init( + eltype, data, kdims, vdims + ) + if not isinstance(data, cls.frame_type()): + data = dd.from_pandas(data, npartitions=1) + return data, dims, params + + @classmethod + def partition_values(cls, df, dataset, dimension, expanded, flat): + ds = dataset.clone(df, datatype=['spatialpandas']) + return ds.interface.values(ds, dimension, expanded, flat) + + @classmethod + def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep_index=False): + if compute and not keep_index: + meta = np.array([], dtype=cls.dtype(dataset, dimension)) + return dataset.data.map_partitions( + cls.partition_values, meta=meta, dataset=dataset, + dimension=dimension, expanded=expanded, flat=flat + ).compute() + values = super(DaskSpatialPandasInterface, cls).values( + dataset, dimension, expanded, flat, compute, keep_index + ) + if compute and not keep_index and hasattr(values, 'compute'): + return values.compute() + return values + + @classmethod + def split(cls, dataset, start, end, datatype, **kwargs): + if datatype is None: + raise NotImplementedError + ds = dataset.clone(dataset.data.compute(), datatype=['spatialpandas']) + return ds.interface.split(ds, start, end, datatype, **kwargs) + + + + +Interface.register(DaskSpatialPandasInterface) diff --git a/holoviews/tests/core/data/testspatialpandas.py b/holoviews/tests/core/data/testspatialpandas.py index f251b63226..aecf2ca13c 100644 --- a/holoviews/tests/core/data/testspatialpandas.py +++ b/holoviews/tests/core/data/testspatialpandas.py @@ -16,7 +16,9 @@ except: spatialpandas = None -from holoviews.core.data import Dataset, SpatialPandasInterface +from holoviews.core.data import ( + Dataset, SpatialPandasInterface, DaskSpatialPandasInterface +) from holoviews.core.data.interface import DataError from holoviews.element import Path, Points, Polygons from holoviews.element.comparison import ComparisonTestCase @@ -217,3 +219,15 @@ def test_multi_polygon_constructor(self): 2., 7., 5., 6., 7. ])) self.assertEqual(path.data.iloc[1, 0].buffer_values, np.array([3, 2, 7, 5, 6, 7, 3, 2, 3, 7, 1, 2, 2, 0, 3, 7])) + + +class DaskSpatialPandasTest(GeomTests, RoundTripTests): + """ + Test of the DaskSpatialPandasInterface. + """ + + datatype = 'dask_spatialpandas' + + interface = DaskSpatialPandasInterface + + __test__ = True From 4de3eb643230b48a88e494cd87ef08166d7202b6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 13:00:15 +0100 Subject: [PATCH 12/18] Get DaskSpatialPandasInterface test suite passing --- holoviews/core/data/spatialpandas.py | 33 +--------------- holoviews/core/data/spatialpandas_dask.py | 13 +++++-- .../tests/core/data/testspatialpandas.py | 39 +++++++++++++++++++ 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index 8c93abc4b3..e1d9e817cc 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -175,43 +175,14 @@ def select(cls, dataset, selection_mask=None, **selection): elif selection_mask is None: selection_mask = cls.select_mask(dataset, selection) indexed = cls.indexed(dataset, selection) - df = df.iloc[selection_mask] + df = df[selection_mask] if indexed and len(df) == 1 and len(dataset.vdims) == 1: return df[dataset.vdims[0].name].iloc[0] return df @classmethod def select_mask(cls, dataset, selection): - mask = np.ones(len(dataset.data), dtype=np.bool) - for dim, k in selection.items(): - if isinstance(k, tuple): - k = slice(*k) - arr = dataset.data[dim].values - if isinstance(k, slice): - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'invalid value encountered') - if k.start is not None: - mask &= k.start <= arr - if k.stop is not None: - mask &= arr < k.stop - elif isinstance(k, (set, list)): - iter_slcs = [] - for ik in k: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'invalid value encountered') - iter_slcs.append(arr == ik) - mask &= np.logical_or.reduce(iter_slcs) - elif callable(k): - mask &= k(arr) - else: - index_mask = arr == k - if dataset.ndims == 1 and np.sum(index_mask) == 0: - data_index = np.argmin(np.abs(arr - k)) - mask = np.zeros(len(dataset), dtype=np.bool) - mask[data_index] = True - else: - mask &= index_mask - return mask + return cls.base_interface.select_mask(dataset, selection) @classmethod def geom_dims(cls, dataset): diff --git a/holoviews/core/data/spatialpandas_dask.py b/holoviews/core/data/spatialpandas_dask.py index 92d90233c3..aa293b65f0 100644 --- a/holoviews/core/data/spatialpandas_dask.py +++ b/holoviews/core/data/spatialpandas_dask.py @@ -64,12 +64,19 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep @classmethod def split(cls, dataset, start, end, datatype, **kwargs): - if datatype is None: - raise NotImplementedError ds = dataset.clone(dataset.data.compute(), datatype=['spatialpandas']) return ds.interface.split(ds, start, end, datatype, **kwargs) - + @classmethod + def iloc(cls, dataset, index): + rows, cols = index + if rows is not None: + raise NotImplementedError + return super(DaskSpatialPandasInterface, cls).iloc(dataset, index) + + @classmethod + def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): + return cls.base_interface.add_dimension(dataset, dimension, dim_pos, values, vdim) Interface.register(DaskSpatialPandasInterface) diff --git a/holoviews/tests/core/data/testspatialpandas.py b/holoviews/tests/core/data/testspatialpandas.py index aecf2ca13c..5013020b36 100644 --- a/holoviews/tests/core/data/testspatialpandas.py +++ b/holoviews/tests/core/data/testspatialpandas.py @@ -231,3 +231,42 @@ class DaskSpatialPandasTest(GeomTests, RoundTripTests): interface = DaskSpatialPandasInterface __test__ = True + + def test_array_dataset_add_dimension_scalar(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_index_row(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_index_rows(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_index_rows_index_cols(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_slice_rows(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_slice_rows_no_start(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_slice_rows_no_end(self): + raise SkipTest("Not supported") + + def test_array_points_iloc_slice_rows_no_stop(self): + raise SkipTest("Not supported") + + def test_multi_polygon_iloc_index_row(self): + raise SkipTest("Not supported") + + def test_multi_polygon_iloc_index_rows(self): + raise SkipTest("Not supported") + + def test_multi_polygon_iloc_slice_rows(self): + raise SkipTest("Not supported") + + def test_dict_dataset_add_dimension_values(self): + raise SkipTest("Not supported") + + def test_sort_by_value(self): + raise SkipTest("Not supported") From 40b5ef486f933c398b96b149f491dfc93d7aea1c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 13:01:37 +0100 Subject: [PATCH 13/18] Add test --- holoviews/tests/core/data/testspatialpandas.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/holoviews/tests/core/data/testspatialpandas.py b/holoviews/tests/core/data/testspatialpandas.py index 5013020b36..03617625d0 100644 --- a/holoviews/tests/core/data/testspatialpandas.py +++ b/holoviews/tests/core/data/testspatialpandas.py @@ -232,9 +232,6 @@ class DaskSpatialPandasTest(GeomTests, RoundTripTests): __test__ = True - def test_array_dataset_add_dimension_scalar(self): - raise SkipTest("Not supported") - def test_array_points_iloc_index_row(self): raise SkipTest("Not supported") From acc9359d44b9235353d07b9b697cc751f7f8308b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 14:11:41 +0100 Subject: [PATCH 14/18] Fix flakes --- holoviews/core/data/spatialpandas.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index e1d9e817cc..a287435b3d 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -1,16 +1,13 @@ from __future__ import absolute_import, division import sys -import warnings from collections import defaultdict import numpy as np -import pandas as pd from ..dimension import dimension_name from ..util import isscalar, unique_iterator, pd, unique_array -from .dask import DaskInterface from .interface import DataError, Interface from .multipath import MultiInterface, ensure_ring from .pandas import PandasInterface From f8071eafb5c917676c0ceda2d4ee8c2a945553fe Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 14:25:36 +0100 Subject: [PATCH 15/18] Fix absolute import --- holoviews/core/data/spatialpandas_dask.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/core/data/spatialpandas_dask.py b/holoviews/core/data/spatialpandas_dask.py index aa293b65f0..664852a808 100644 --- a/holoviews/core/data/spatialpandas_dask.py +++ b/holoviews/core/data/spatialpandas_dask.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import + import sys import numpy as np From 8b0e06e06245476769b11529158bc35d954126a6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 14:50:39 +0100 Subject: [PATCH 16/18] Skip tests on py2 --- holoviews/tests/core/data/testspatialpandas.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/holoviews/tests/core/data/testspatialpandas.py b/holoviews/tests/core/data/testspatialpandas.py index 03617625d0..3a1a16d51d 100644 --- a/holoviews/tests/core/data/testspatialpandas.py +++ b/holoviews/tests/core/data/testspatialpandas.py @@ -13,9 +13,14 @@ LineDtype, PointDtype, PolygonDtype, MultiLineDtype, MultiPointDtype, MultiPolygonDtype ) -except: +except Exception: spatialpandas = None +try: + import dask.dataframe as dd +except Exception: + dd = None + from holoviews.core.data import ( Dataset, SpatialPandasInterface, DaskSpatialPandasInterface ) @@ -232,6 +237,13 @@ class DaskSpatialPandasTest(GeomTests, RoundTripTests): __test__ = True + def setUp(self): + if spatialpandas is None: + raise SkipTest('DaskSpatialPandasInterface requires spatialpandas, skipping tests') + elif dd is None: + raise SkipTest('DaskSpatialPandasInterface requires dask, skipping tests') + super(GeomTests, self).setUp() + def test_array_points_iloc_index_row(self): raise SkipTest("Not supported") From 9374b59544b4e0f74efeb103ed3ffa55dea88dd6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 15:42:58 +0100 Subject: [PATCH 17/18] Fixes --- holoviews/core/data/__init__.py | 4 ++-- holoviews/core/data/spatialpandas.py | 1 - holoviews/operation/datashader.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 1288b36a46..5971c7b2fd 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -42,8 +42,8 @@ default_datatype = 'dataframe' -datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'spatialpandas', - 'dask', 'cuDF', 'array', 'multitabular', 'dask_spatialpandas', +datatypes = ['dataframe', 'dictionary', 'grid', 'xarray', 'multitabular', + 'spatialpandas', 'dask_spatialpandas', 'dask', 'cuDF', 'array', 'ibis'] diff --git a/holoviews/core/data/spatialpandas.py b/holoviews/core/data/spatialpandas.py index a287435b3d..7717fa6c14 100644 --- a/holoviews/core/data/spatialpandas.py +++ b/holoviews/core/data/spatialpandas.py @@ -56,7 +56,6 @@ def frame_type(cls): def geo_column(cls, data): col = 'geometry' stypes = cls.series_type() - print(data[col], stypes) if col in data and isinstance(data[col], stypes): return col cols = [c for c in data.columns if isinstance(data[c], stypes)] diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index 41c5562675..ae8882be1b 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -1430,7 +1430,7 @@ class rasterize(AggregationOperation): _transforms = [(Image, regrid), (Polygons, geometry_rasterize), (lambda x: (isinstance(x, (Path, Points)) and - x.interface.datatype == 'spatialpandas'), + 'spatialpandas' in x.interface.datatype), geometry_rasterize), (TriMesh, trimesh_rasterize), (QuadMesh, quadmesh_rasterize), From bd9b8300775923b5fa92b90260213908231f3723 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 21 Jan 2021 15:44:21 +0100 Subject: [PATCH 18/18] Datashade fix --- holoviews/operation/datashader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index ae8882be1b..f2770fb7c5 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -1366,7 +1366,7 @@ def _process(self, element, key=None): if element._plot_id in self._precomputed: data, col = self._precomputed[element._plot_id] else: - if element.interface.datatype != 'spatialpandas': + if 'spatialpandas' not in element.interface.datatype: element = element.clone(datatype=['spatialpandas']) data = element.data col = element.interface.geo_column(data)