Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integrate smaup into tobler #139

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ dependencies:
- pygeos
- h3-py
- joblib
- esda
1 change: 1 addition & 0 deletions .ci/38.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ dependencies:
- pygeos
- h3-py
- joblib
- esda
1 change: 1 addition & 0 deletions .ci/39.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ dependencies:
- numpydoc
- nbsphinx
- joblib
- esda
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ dependencies:
- mapclassify
- descartes
- joblib
- esda
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ libpysal
tqdm
pygeos
joblib
esda
3 changes: 2 additions & 1 deletion requirements_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ pytest-cov
twine
xgboost
shap >=0.33
quilt3 >=3.1.11
quilt3 >=3.1.11
esda
1 change: 1 addition & 0 deletions tobler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from . import dasymetric
from . import model
from . import util
from . import diagnostics
19 changes: 17 additions & 2 deletions tobler/area_weighted/area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import numpy as np
import geopandas as gpd
from ._vectorized_raster_interpolation import _fast_append_profile_in_gdf
import warnings
from warnings import warn
from scipy.sparse import dok_matrix, diags, coo_matrix
import pandas as pd
import os

from tobler.util.util import _check_crs, _nan_check, _inf_check, _check_presence_of_crs
from tobler.diagnostics import _smaup


def _chunk_dfs(geoms_to_chunk, geoms_full, n_jobs):
Expand Down Expand Up @@ -268,6 +269,7 @@ def _area_interpolate_binning(
spatial_index="auto",
n_jobs=1,
categorical_variables=None,
smaup_kwds=None
):
"""
Area interpolation for extensive, intensive and categorical variables.
Expand Down Expand Up @@ -307,6 +309,11 @@ def _area_interpolate_binning(
of `pygeos` and `geopandas`.
categorical_variables : list
[Optional. Default=None] Columns in dataframes for categorical variables
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.

Returns
-------
Expand Down Expand Up @@ -348,6 +355,14 @@ def _area_interpolate_binning(
source_df = source_df.copy()
target_df = target_df.copy()

if smaup_kwds is not None:
for var in intensive_variables,extensive_variables:
stat = _smaup(smaup_kwds["k"], source_df[var].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{var} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{var} is not affected by the MAUP.")

if _check_crs(source_df, target_df):
pass
else:
Expand Down Expand Up @@ -614,7 +629,7 @@ def _area_tables_raster(
res_union_pre = gpd.overlay(source_df, target_df, how="union")

# Establishing a CRS for the generated union
warnings.warn(
warn(
"The CRS for the generated union will be set to be the same as source_df."
)
res_union_pre.crs = source_df.crs
Expand Down
17 changes: 17 additions & 0 deletions tobler/dasymetric/masked_area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from ..area_weighted._vectorized_raster_interpolation import *

from tobler.diagnostics import _smaup
from warnings import warn


def masked_area_interpolate(
source_df,
Expand All @@ -13,6 +16,7 @@ def masked_area_interpolate(
intensive_variables=None,
allocate_total=True,
tables=None,
smaup_kwds=None,
):
"""Interpolate data between two polygonal datasets using an auxiliary raster to mask out uninhabited land.

Expand All @@ -38,6 +42,11 @@ def masked_area_interpolate(
whether to allocate the total from the source geometries (the default is True).
tables : tuple of two numpy.array (optional)
As generated from `tobler.area_weighted.area_tables_raster` (the default is None).
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.

Returns
-------
Expand All @@ -53,6 +62,14 @@ def masked_area_interpolate(
"You must pass the path to a raster that can be read with rasterio"
)

if smaup_kwds is not None:
for var in intensive_variables,extensive_variables:
stat = _smaup(smaup_kwds["k"], source_df[var].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{var} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{var} is not affected by the MAUP.")

if not tables:
tables = _area_tables_raster(
source_df,
Expand Down
1 change: 1 addition & 0 deletions tobler/diagnostics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .smaup import _smaup
35 changes: 35 additions & 0 deletions tobler/diagnostics/smaup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
A wrapper for using the S-maup statistical test in tobler interpolation

"""

from esda.smaup import Smaup
from esda.moran import Moran

def _smaup(k, y, w,):
"""
A function that wraps esda's smaup function and automates some of the process of calculating smaup.
For more about smaup read here: https://pysal.org/esda/generated/esda.Smaup.html#esda.Smaup

Parameters
----------

k : int
number of regions
y : array
data for autocorellation calculation
w : libpysal.weights object
pysal spatial weights object for autocorellation calculation

Returns
-------
esda.smaup.Smaup:
statistic that contains information regarding the extent to which the variable is affected by the MAUP.


"""
rho = Moran(y, w).I
n = len(y)
stat = Smaup(n,k,rho)
return stat

15 changes: 15 additions & 0 deletions tobler/model/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_return_weights_from_regression,
)
from tobler.util import project_gdf
from tobler.diagnostics import _smaup


def glm_pixel_adjusted(
Expand Down Expand Up @@ -100,6 +101,7 @@ def glm(
likelihood="poisson",
force_crs_match=True,
return_model=False,
smaup_kwds=None
):
"""Estimate interpolated values using raster data as input to a generalized linear model.

Expand Down Expand Up @@ -130,6 +132,11 @@ def glm(
return model : bool
whether to return the fitted model in addition to the interpolated geodataframe.
If true, this will return (geodataframe, model)
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.

Returns
--------
Expand All @@ -143,6 +150,14 @@ def glm(
source_df = source_df.copy()
target_df = target_df.copy()
_check_presence_of_crs(source_df)

if smaup_kwds is not None:
stat = _smaup(smaup_kwds["k"], source_df[variable].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{variable} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{variable} is not affected by the MAUP.")

liks = {"poisson": Poisson, "gaussian": Gaussian, "neg_binomial": NegativeBinomial}

if likelihood not in liks.keys():
Expand Down
17 changes: 17 additions & 0 deletions tobler/tests/test_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""test diagnostics funtions"""

import geopandas
from tobler.diagnostics import _smaup
from libpysal.weights import Queen
from libpysal.examples import load_example


sac1 = load_example("Sacramento1")
sac1 = geopandas.read_file(sac1.get_path("sacramentot2.shp"))
sac1["pct_poverty"] = sac1.POV_POP / sac1.POV_TOT


def test_smaup():
queen = Queen.from_dataframe(sac1)
s = _smaup(1,sac1["pct_poverty"].to_numpy(),queen)
AnGWar26 marked this conversation as resolved.
Show resolved Hide resolved
assert s.summary