-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Imbalanced covariates metrics and sklearn scorer wrapper (#59)
* Add arbitrary `kwargs` to metrics to more easily align signatures Signed-off-by: Ehud-Karavani <[email protected]> * Add count/fraction of imbalanced covariates metric+scorer Signed-off-by: Ehud-Karavani <[email protected]> * Add scikit-learn scorer wrapper for propensity models Signed-off-by: Ehud-Karavani <[email protected]> * Add name to time-variable (pd.Series) in NHEFS survival data Signed-off-by: Ehud-Karavani <[email protected]> * Bump version: 0.9.5 Signed-off-by: Ehud-Karavani <[email protected]> --------- Signed-off-by: Ehud-Karavani <[email protected]>
- Loading branch information
Showing
12 changed files
with
178 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "0.9.4" | ||
__version__ = "0.9.5" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .sklearn_scorer_wrapper import SKLearnScorerWrapper |
28 changes: 28 additions & 0 deletions
28
causallib/contrib/sklearn_scorer_wrapper/sklearn_scorer_wrapper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from causallib.metrics.scorers import PropensityScorerBase | ||
|
||
|
||
class SKLearnScorerWrapper(PropensityScorerBase): | ||
def __init__(self, score_func, sign=None, **kwargs): | ||
super().__init__( | ||
score_func=score_func, | ||
sign=1, # This keeps original scorer sign | ||
**kwargs | ||
) | ||
|
||
def _score(self, estimator, X, a, y=None, sample_weight=None, **kwargs): | ||
learner = self._extract_sklearn_estimator(estimator) | ||
score = self._score_func(learner, X, a, sample_weight=sample_weight) | ||
return score | ||
|
||
@staticmethod | ||
def _extract_sklearn_estimator(estimator): | ||
if hasattr(estimator, "best_estimator_"): | ||
# Causallib's wrapper around GridSearchCV | ||
return estimator.best_estimator_.learner | ||
if hasattr(estimator, "learner"): | ||
return estimator.learner | ||
raise AttributeError( | ||
f"Could not extract an sklearn estimator from {estimator}," | ||
f"which has the following attributes:\n" | ||
f"{list(estimator.__dict__.keys())}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import unittest | ||
|
||
import pandas as pd | ||
|
||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.datasets import make_classification | ||
from sklearn.utils import Bunch | ||
from sklearn.metrics import get_scorer | ||
|
||
from causallib.estimation import IPW | ||
from causallib.model_selection import GridSearchCV | ||
|
||
from causallib.contrib.sklearn_scorer_wrapper import SKLearnScorerWrapper | ||
|
||
|
||
class TestSKLearnScorerWrapper(unittest.TestCase): | ||
@classmethod | ||
def setUpClass(cls): | ||
N = 500 | ||
X, a = make_classification( | ||
n_samples=N, | ||
n_features=5, | ||
n_informative=5, | ||
n_redundant=0, | ||
random_state=42, | ||
) | ||
X = pd.DataFrame(X) | ||
a = pd.Series(a) | ||
cls.data = Bunch(X=X, a=a, y=a) | ||
|
||
learner = LogisticRegression() | ||
ipw = IPW(learner) | ||
ipw.fit(X, a) | ||
# cls.learner = learner | ||
cls.estimator = ipw | ||
|
||
def test_agreement_with_sklearn(self): | ||
scorer_names = [ | ||
"accuracy", | ||
"average_precision", | ||
"neg_brier_score", | ||
"f1", | ||
"neg_log_loss", | ||
"precision", | ||
"recall", | ||
"roc_auc", | ||
] | ||
for scorer_name in scorer_names: | ||
with self.subTest(f"Test scorer {scorer_name}"): | ||
scorer = get_scorer(scorer_name) | ||
score = scorer(self.estimator.learner, self.data.X, self.data.a) | ||
|
||
causallib_adapted_scorer = SKLearnScorerWrapper(scorer) | ||
causallib_score = causallib_adapted_scorer( | ||
self.estimator, self.data.X, self.data.a, self.data.y | ||
) | ||
|
||
self.assertAlmostEqual(causallib_score, score) | ||
|
||
def test_hyperparameter_search_model(self): | ||
scorer = SKLearnScorerWrapper(get_scorer("roc_auc")) | ||
param_grid = dict( | ||
clip_min=[0.2, 0.3], | ||
learner__C=[0.1, 1], | ||
) | ||
model = GridSearchCV( | ||
self.estimator, | ||
param_grid=param_grid, | ||
scoring=scorer, | ||
cv=3, | ||
) | ||
model.fit(self.data.X, self.data.a, self.data.y) | ||
|
||
score = scorer(model, self.data.X, self.data.a, self.data.y) | ||
self.assertGreaterEqual(score, model.best_score_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters