Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mark slow tests, update __eq__ methods, pin reqs #60

Merged
merged 32 commits into from
Feb 9, 2021
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
35c834e
Typing for read parquet
brl0 Jan 11, 2021
2e0e00f
Ignore monkeytype
brl0 Jan 31, 2021
94347e9
Fix for pandas update, mark slow tests
brl0 Jan 31, 2021
9227303
Merge branch 'master' into fix_fillna
brl0 Jan 31, 2021
abc9d6e
Fix duplicate import
brl0 Jan 31, 2021
50ec3b8
Pin numpy
brl0 Feb 2, 2021
f8b5970
Pin pyarrow
brl0 Feb 2, 2021
d37f138
Pin pyarrow < 2
brl0 Feb 3, 2021
79bf0ba
Remove pins for numpy and pyarrow
brl0 Feb 6, 2021
c400e36
Suppress hypothesis health check causing failures
brl0 Feb 6, 2021
1334731
Temporarily skip slow tests, try 3.8 on MacOS
brl0 Feb 6, 2021
82bd02f
Try pinning numpy on MacOS
brl0 Feb 6, 2021
7627f5d
Check platform for numpy requirement
brl0 Feb 6, 2021
afbc75c
Fix warnings about np.object
brl0 Feb 6, 2021
ca1ece6
Run slow tests by default again
brl0 Feb 6, 2021
e6b8cf3
Merge branch 'remove_pins' into initial_typing
brl0 Feb 6, 2021
065ebff
Sort imports, add more type info
brl0 Feb 6, 2021
d6e26cb
Unmark dask test, update pins, skip slow
brl0 Feb 6, 2021
da5cc87
Pin pyarrow < 2 on Mac, try conda-forge for others
brl0 Feb 6, 2021
498e7fc
Add snappy, swap channels, pin 3.6
brl0 Feb 6, 2021
72f4995
Correct channels, try pyarrow 3
brl0 Feb 6, 2021
351c5c9
Unpin pyarrow except Mac
brl0 Feb 6, 2021
292c46d
Fix pyarrow req, enable slow tests
brl0 Feb 6, 2021
b93a406
Relative imports, isort, cleanup
brl0 Feb 7, 2021
a5ca5eb
Fix pin
brl0 Feb 7, 2021
9b33a12
Try pyarrow from conda-forge on Mac
brl0 Feb 7, 2021
7140e33
Try pinning Dask on Mac
brl0 Feb 8, 2021
70553e3
Try lower Dask version on Mac
brl0 Feb 8, 2021
3abb8d5
Try lower Dask version on Mac, skip slow tests
brl0 Feb 8, 2021
60715f0
Undo skip slow tests
brl0 Feb 8, 2021
fc14db0
Revert pyarrow pin
brl0 Feb 8, 2021
2f16777
Update change log
brl0 Feb 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
timeout-minutes: 40
defaults:
run:
shell: bash -l {0}
shell: bash -l {0}
env:
PYTHON_VERSION: ${{ matrix.python-version }}
CHANS_DEV: "-c pyviz/label/dev"
Expand Down Expand Up @@ -64,7 +64,7 @@ jobs:
conda activate test-environment
conda list
doit develop_install ${{ env.CHANS_DEV }} -o tests
pip install hilbertcurve
pip install hilbertcurve
- name: doit env_capture
run: |
eval "$(conda shell.bash hook)"
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,9 @@ venv.bak/
*.parq
.idea/
spatialpandas/.version

.vscode/

monkeytype.sqlite3

.doit.db
33 changes: 33 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Configuration for pytest."""
import pytest


_DEFAULT_SKIPSLOW = False


def pytest_addoption(parser):
"""Add command-line flags for pytest."""
parser.addoption(
"--skip-slow",
action="store_true",
help="skips slow tests",
default=_DEFAULT_SKIPSLOW,
)
parser.addoption(
"--runslow",
action="store_true",
default=False, # Only used for cli override
help="run slow tests",
)


def pytest_configure(config):
config.addinivalue_line("markers", "slow: mark test as slow to run")


def pytest_collection_modifyitems(config, items):
if not config.getoption("--runslow") and config.getoption("--skip-slow"):
skip_slow = pytest.mark.skip(reason="Skipping slow tests")
for item in items:
if "slow" in item.keywords:
item.add_marker(skip_slow)
44 changes: 30 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,58 @@
from setuptools import setup, find_packages
import sys

import param
from setuptools import find_packages, setup

extras_require = {
'tests': [
'pytest',
'codecov',
'pytest-cov',
'flake8',
'geopandas',
'hypothesis',
'pytest-cov',
'pytest',
'scipy',
'shapely',
'geopandas',
],
'examples': [
'geopandas',
'matplotlib',
'descartes',
'datashader',
'descartes',
'geopandas',
'holoviews',
'matplotlib',
]
}

install_requires = [
'pandas>=0.25',
'dask[complete] >=2.0',
'fsspec',
'numba',
'numpy',
'pyarrow>=0.15',
'pandas>=0.25',
'param',
'fsspec',
'retrying',
'snappy',
]

# Checking for platform explicitly because
# pyctdev does not handle dependency conditions
# such as 'numpy<1.20;platform_system=="Darwin"'
if sys.platform == 'darwin':
install_requires.extend([
'numpy<1.20',
'pyarrow>=0.15,<2',
brl0 marked this conversation as resolved.
Show resolved Hide resolved
])
else:
install_requires.extend([
'numpy',
'pyarrow>=0.15',
brl0 marked this conversation as resolved.
Show resolved Hide resolved
])

setup_args = dict(
name='spatialpandas',
version=param.version.get_setup_version(
__file__, "spatialpandas", archive_commit="$Format:%h$"
__file__,
"spatialpandas",
archive_commit="$Format:%h$",
),
description='Pandas extension arrays for spatial/geometric operations',
long_description=open("README.md").read(),
Expand All @@ -49,9 +66,8 @@
tests_require=extras_require['tests'],
license='BSD-2-Clause',
packages=find_packages(exclude=('tests', 'tests.*')),
include_package_data=True
include_package_data=True,
)

if __name__ == '__main__':
setup(**setup_args)

30 changes: 18 additions & 12 deletions spatialpandas/geometry/base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import re
from collections.abc import Container, Iterable
from numbers import Integral
from collections.abc import Iterable

import numpy as np
import pandas as pd
import pyarrow as pa
from pandas.api.extensions import ExtensionArray, ExtensionDtype
from pandas.api.types import is_array_like

from spatialpandas.spatialindex import HilbertRtree
from spatialpandas.spatialindex.rtree import _distances_from_bounds
import re

from spatialpandas.utils import ngjit
from .._optional_imports import sg, gp

from .._optional_imports import gp, sg


def _unwrap_geometry(a, element_dtype):
Expand Down Expand Up @@ -144,6 +143,8 @@ def __hash__(self):
return hash((self.__class__, np.array(self.data.as_py()).tobytes()))

def __eq__(self, other):
if isinstance(other, Container):
return other == self
if type(other) is not type(self):
return False
return self.data == other.data
Expand Down Expand Up @@ -336,11 +337,18 @@ def __eq__(self, other):
for i in range(len(self)):
result[i] = self[i] == other[i]
return result
else:
raise ValueError("""
if isinstance(other, (self.dtype.type, type(None))):
result = np.zeros(len(self), dtype=np.bool_)
for i in range(len(self)):
result[i] = self[i] == other
return result
raise ValueError("""
Cannot check equality of {typ} of length {a_len} with:
{other}""".format(typ=type(self).__name__, a_len=len(self), other=repr(other)))

def __contains__(self, item) -> bool:
raise NotImplementedError

def __len__(self):
return len(self.data)

Expand Down Expand Up @@ -499,8 +507,8 @@ def _concat_same_type(cls, to_concat):

def fillna(self, value=None, method=None, limit=None):
from pandas.api.types import is_array_like
from pandas.util._validators import validate_fillna_kwargs
from pandas.core.missing import get_fill_func
from pandas.util._validators import validate_fillna_kwargs

value, method = validate_fillna_kwargs(value, method)

Expand Down Expand Up @@ -766,10 +774,8 @@ def is_geometry_array(data):


def to_geometry_array(data, dtype=None):
from . import (
PointArray, MultiPointArray, LineArray, RingArray,
MultiLineArray, PolygonArray, MultiPolygonArray
)
from . import (LineArray, MultiLineArray, MultiPointArray,
MultiPolygonArray, PointArray, PolygonArray, RingArray)
if sg is not None:
shapely_to_spatialpandas = {
sg.Point: PointArray,
Expand Down
8 changes: 3 additions & 5 deletions spatialpandas/geometry/baselist.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,18 @@ def _lexographic_lt0(a1, a2):


def _lexographic_lt(a1, a2):
if a1.dtype != np.object and a1.dtype != np.object:
if a1.dtype != np.dtype(object) and a1.dtype != np.dtype(object):
# a1 and a2 primitive
return _lexographic_lt0(a1, a2)
elif a1.dtype == np.object and a1.dtype == np.object:
elif a1.dtype == np.dtype(object) and a1.dtype == np.dtype(object):
# a1 and a2 object, process recursively
for e1, e2 in zip(a1, a2):
if _lexographic_lt(e1, e2):
return True
elif _lexographic_lt(e2, e1):
return False
return len(a1) < len(a2)
elif a1.dtype != np.object:
elif a1.dtype != np.dtype(object):
# a2 is object array, a1 primitive
return True
else:
Expand Down Expand Up @@ -333,5 +333,3 @@ def _geometry_map_nested3(
start = value_offsets1[value_offsets0[i]]
stop = value_offsets1[value_offsets0[i + 1]]
result[i] = fn(values, value_offsets2[start:stop + 1])


87 changes: 55 additions & 32 deletions spatialpandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,27 @@
import json
import pathlib
from functools import reduce
from numbers import Number
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import fsspec
import pandas as pd
import pyarrow as pa
from dask import delayed
from dask.dataframe import ( # noqa
to_parquet as dd_to_parquet, read_parquet as dd_read_parquet,
from_delayed, from_pandas,
)
from dask.dataframe import from_delayed, from_pandas
from dask.dataframe import read_parquet as dd_read_parquet
from dask.dataframe import to_parquet as dd_to_parquet # noqa
from dask.utils import natural_sort_key

from pandas.io.parquet import (
to_parquet as pd_to_parquet,
)

import pyarrow as pa
from pandas.io.parquet import to_parquet as pd_to_parquet
from pyarrow import parquet as pq
from spatialpandas.io.utils import validate_coerce_filesystem

from spatialpandas import GeoDataFrame
from spatialpandas.dask import DaskGeoDataFrame
from spatialpandas.geometry import (GeometryDtype, LineDtype, MultiLineDtype,
MultiPointDtype, MultiPolygonDtype,
PointDtype, PolygonDtype, RingDtype)
from spatialpandas.geometry.base import to_geometry_array
from spatialpandas.geometry import (
PointDtype, MultiPointDtype, RingDtype, LineDtype,
MultiLineDtype, PolygonDtype, MultiPolygonDtype, GeometryDtype
)
from spatialpandas.io.utils import validate_coerce_filesystem
jbednar marked this conversation as resolved.
Show resolved Hide resolved

_geometry_dtypes = [
PointDtype, MultiPointDtype, RingDtype, LineDtype,
Expand Down Expand Up @@ -86,19 +84,28 @@ def _get_geometry_columns(pandas_metadata):


def to_parquet(
df,
df: GeoDataFrame,
fname,
compression="snappy",
index=None,
**kwargs
):
compression: Optional[str] = "snappy",
index: Optional[bool] = None,
**kwargs,
) -> None:
# Standard pandas to_parquet with pyarrow engine
pd_to_parquet(
df, fname, engine="pyarrow", compression=compression, index=index, **kwargs
df,
fname,
engine="pyarrow",
compression=compression,
index=index,
**kwargs,
)


def read_parquet(path, columns=None, filesystem=None):
def read_parquet(
path: str,
columns: Optional[Iterable[str]] = None,
filesystem: Optional[fsspec.spec.AbstractFileSystem] = None,
) -> GeoDataFrame:
filesystem = validate_coerce_filesystem(path, filesystem)

# Load pandas parquet metadata
Expand Down Expand Up @@ -136,16 +143,25 @@ def read_parquet(path, columns=None, filesystem=None):


def to_parquet_dask(
ddf, path, compression="snappy", filesystem=None, storage_options=None, **kwargs
):
ddf: DaskGeoDataFrame,
path,
compression: Optional[str] = "snappy",
filesystem: Optional[fsspec.spec.AbstractFileSystem] = None,
storage_options: Optional[Dict[str, Any]] = None,
**kwargs,
) -> None:
assert isinstance(ddf, DaskGeoDataFrame)
filesystem = validate_coerce_filesystem(path, filesystem)
if path and filesystem.isdir(path):
filesystem.rm(path, recursive=True)

dd_to_parquet(
ddf, path, engine="pyarrow", compression=compression,
storage_options=storage_options, **kwargs
ddf,
path,
engine="pyarrow",
compression=compression,
storage_options=storage_options,
**kwargs,
)

# Write partition bounding boxes to the _metadata file
Expand Down Expand Up @@ -178,12 +194,19 @@ def to_parquet_dask(


def read_parquet_dask(
path, columns=None, filesystem=None, load_divisions=False,
geometry=None, bounds=None, categories=None, build_sindex=False
):
"""
Read spatialpandas parquet dataset(s) as DaskGeoDataFrame. Datasets are assumed to
have been written with the DaskGeoDataFrame.to_parquet or
path: str,
columns: Optional[Iterable[str]] = None,
filesystem: Optional[fsspec.spec.AbstractFileSystem] = None,
load_divisions: Optional[bool] = False,
geometry: Optional[str] = None,
bounds: Optional[Tuple[Number, Number, Number, Number]] = None,
categories: Optional[Union[List[str], Dict[str, str]]] = None,
build_sindex: Optional[bool] = False,
) -> DaskGeoDataFrame:
"""Read spatialpandas parquet dataset(s) as DaskGeoDataFrame.

Datasets are assumed to have been written with the
DaskGeoDataFrame.to_parquet or
DaskGeoDataFrame.pack_partitions_to_parquet methods.

Args:
Expand Down
Loading