Skip to content

Commit

Permalink
V0.18.5 (#633)
Browse files Browse the repository at this point in the history
* v0.18.5

* docs fix kmf and naf plotting

* bump version

* lint

* better image

* fix plotting test
  • Loading branch information
CamDavidsonPilon authored Feb 11, 2019
1 parent 5a09f11 commit a43f62d
Show file tree
Hide file tree
Showing 17 changed files with 523 additions and 190 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ script:
- make test
after_success:
- coveralls
# run linter but don't fail for errors
- make lint
# Don't want notifications
notifications:
email: false
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
### Changelogs

### 0.18.5
- added new plotting methods to parametric univariate models: `plot_survival_function`, `plot_hazard` and `plot_cumulative_hazard`. The last one is an alias for `plot`.
- added new properties to parametric univarite models: `confidence_interval_survival_function_`, `confidence_interval_hazard_`, `confidence_interval_cumulative_hazard_`. The last one is an alias for `confidence_interval_`.
- Fixed some overflow issues with `AalenJohansenFitter`'s variance calculations when using large datasets.
- Fixed an edgecase in `AalenJohansenFitter` that causing some datasets with to be jittered too often.
- Add a new kwarg to `AalenJohansenFitter`, `calculate_variance` that can be used to turn off variance calculations since this can take a long time for large datasets. Thanks @pzivich!

### 0.18.4
- fixed confidence intervals in cumulative hazards for parametric univarite models. They were previously
serverly depressed.
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ lint:
ifeq ($(TRAVIS_PYTHON_VERSION), 2.7)
echo "Skip linting for Python2.7"
else
black lifelines/ -l 120 --fast
black tests/ -l 120 --fast
prospector --output-format grouped
endif

Expand Down
40 changes: 34 additions & 6 deletions docs/Survival analysis with lifelines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -570,17 +570,45 @@ Similarly, there are other parametric models in *lifelines*. Generally, which pa
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')
pwf = PiecewiseExponentialFitter([40, 60]).fit(T, E, label='PiecewiseExponentialFitter')
wbf.plot(ax=axes[0][0])
exf.plot(ax=axes[0][1])
lnf.plot(ax=axes[0][2])
naf.plot(ax=axes[1][0])
llf.plot(ax=axes[1][1])
pwf.plot(ax=axes[1][2])
wbf.plot_cumulative_hazard(ax=axes[0][0])
exf.plot_cumulative_hazard(ax=axes[0][1])
lnf.plot_cumulative_hazard(ax=axes[0][2])
naf.plot_cumulative_hazard(ax=axes[1][0])
llf.plot_cumulative_hazard(ax=axes[1][1])
pwf.plot_cumulative_hazard(ax=axes[1][2])
.. image:: images/waltons_cumulative_hazard.png

*lifelines* can also be used to define your own parametic model. There is a tutorial on this available, see `Piecewise Exponential Models and Creating Custom Models`_.

Parametric models can also be used to create and plot the survival function, too. Below we compare the parametic models versus the non-parametric Kaplan-Meier estimate:

.. code:: python
from lifelines import KaplanMeierFitter
fig, axes = plt.subplots(2, 3, figsize=(9, 5))
T = data['T']
E = data['E']
kmf = KaplanMeierFitter().fit(T, E, label='KaplanMeierFitter')
wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
exf = ExponentialFitter().fit(T, E, label='ExponentalFitter')
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')
pwf = PiecewiseExponentialFitter([40, 60]).fit(T, E, label='PiecewiseExponentialFitter')
wbf.plot_survival_function(ax=axes[0][0])
exf.plot_survival_function(ax=axes[0][1])
lnf.plot_survival_function(ax=axes[0][2])
kmf.plot_survival_function(ax=axes[1][0])
llf.plot_survival_function(ax=axes[1][1])
pwf.plot_survival_function(ax=axes[1][2])
.. image:: images/waltons_survival_function.png


Other types of censoring
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#
# The short X.Y version.

version = "0.18.4"
version = "0.18.5"
# The full version, including dev info
release = version

Expand Down
Binary file added docs/images/waltons_survival_function.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
234 changes: 192 additions & 42 deletions docs/jupyter_notebooks/Modelling time-lagged conversion rates.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

128 changes: 101 additions & 27 deletions lifelines/fitters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from numpy.linalg import inv, pinv


from lifelines.plotting import plot_estimate
from lifelines.plotting import _plot_estimate
from lifelines.utils import (
qth_survival_times,
_to_array,
Expand Down Expand Up @@ -84,18 +84,20 @@ def _update_docstrings(self):
self._estimate_name, self.__class__.__name__
)
self.__class__.predict.__func__.__doc__ = self.predict.__doc__.format(self.__class__.__name__)
self.__class__.plot.__func__.__doc__ = plot_estimate.__doc__.format(
self.__class__.plot.__func__.__doc__ = _plot_estimate.__doc__.format(
self.__class__.__name__, self._estimate_name
)
elif PY3:
self.__class__.subtract.__doc__ = self.subtract.__doc__.format(self._estimate_name, self.__class__.__name__)
self.__class__.divide.__doc__ = self.divide.__doc__.format(self._estimate_name, self.__class__.__name__)
self.__class__.predict.__doc__ = self.predict.__doc__.format(self.__class__.__name__)
self.__class__.plot.__doc__ = plot_estimate.__doc__.format(self.__class__.__name__, self._estimate_name)
self.__class__.plot.__doc__ = _plot_estimate.__doc__.format(self.__class__.__name__, self._estimate_name)

@_must_call_fit_first
def plot(self, *args, **kwargs):
return plot_estimate(self, *args, **kwargs)
def plot(self, **kwargs):
return _plot_estimate(
self, estimate=getattr(self, self._estimate_name), confidence_intervals=self.confidence_interval_, **kwargs
)

@_must_call_fit_first
def subtract(self, other):
Expand Down Expand Up @@ -204,17 +206,29 @@ def _conditional_time_to_event_(self):
)

@_must_call_fit_first
def hazard_at_times(self, times):
def hazard_at_times(self, times, label=None):
raise NotImplementedError

@_must_call_fit_first
def survival_function_at_times(self, times):
def survival_function_at_times(self, times, label=None):
raise NotImplementedError

@_must_call_fit_first
def cumulative_hazard_at_times(self, times):
def cumulative_hazard_at_times(self, times, label=None):
raise NotImplementedError

@_must_call_fit_first
def plot_cumulative_hazard(self, **kwargs):
raise NotImplementedError()

@_must_call_fit_first
def plot_survival_function(self, **kwargs):
raise NotImplementedError()

@_must_call_fit_first
def plot_hazard(self, **kwargs):
raise NotImplementedError()


class ParametericUnivariateFitter(UnivariateFitter):
"""
Expand All @@ -228,7 +242,6 @@ class ParametericUnivariateFitter(UnivariateFitter):
def __init__(self, *args, **kwargs):
super(ParametericUnivariateFitter, self).__init__(*args, **kwargs)
self._estimate_name = "cumulative_hazard_"
self.plot_cumulative_hazard = self.plot
if not hasattr(self, "_hazard"):
# pylint: disable=no-value-for-parameter,unexpected-keyword-arg
self._hazard = egrad(self._cumulative_hazard, argnum=1)
Expand Down Expand Up @@ -302,9 +315,9 @@ def _buffer_bounds(self, bounds):
if lb is None and ub is None:
yield (None, None)
elif lb is None:
yield (None, self._MIN_PARAMETER_VALUE)
yield (None, ub - self._MIN_PARAMETER_VALUE)
elif ub is None:
yield (self._MIN_PARAMETER_VALUE, None)
yield (lb + self._MIN_PARAMETER_VALUE, None)
else:
yield (lb + self._MIN_PARAMETER_VALUE, ub - self._MIN_PARAMETER_VALUE)

Expand All @@ -327,13 +340,31 @@ def _negative_log_likelihood(self, params, T, E, entry):
return -ll / n

def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels)

def _compute_confidence_bounds_of_transform(self, transform, alpha, ci_labels):
"""
This computes the confidence intervals of a transform of the parameters. Ex: take
the fitted parameters, a function/transform and the variance matrix and give me
back confidence intervals of the transform.
Parameters
-----------
transform: function
must a function of two parameters:
``params``, an iterable that stores the parameters
``times``, a numpy vector representing some timeline
the function must use autograd imports (scipy and numpy)
alpha: float
confidence level
ci_labels: tuple
"""
alpha2 = inv_normal_cdf((1.0 + alpha) / 2.0)
df = pd.DataFrame(index=self.timeline)

# pylint: disable=no-value-for-parameter
gradient_of_cum_hazard_at_mle = make_jvp_reversemode(self._cumulative_hazard)(
self._fitted_parameters_, self.timeline
)
gradient_of_cum_hazard_at_mle = make_jvp_reversemode(transform)(self._fitted_parameters_, self.timeline)

gradient_at_times = np.vstack(
[gradient_of_cum_hazard_at_mle(basis) for basis in np.eye(len(self._fitted_parameters_))]
Expand All @@ -346,8 +377,9 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
if ci_labels is None:
ci_labels = ["%s_upper_%.2f" % (self._label, alpha), "%s_lower_%.2f" % (self._label, alpha)]
assert len(ci_labels) == 2, "ci_labels should be a length 2 array."
df[ci_labels[0]] = self.cumulative_hazard_at_times(self.timeline) + alpha2 * std_cumulative_hazard
df[ci_labels[1]] = self.cumulative_hazard_at_times(self.timeline) - alpha2 * std_cumulative_hazard

df[ci_labels[0]] = transform(self._fitted_parameters_, self.timeline) + alpha2 * std_cumulative_hazard
df[ci_labels[1]] = transform(self._fitted_parameters_, self.timeline) - alpha2 * std_cumulative_hazard
return df

def _fit_model(self, T, E, entry, show_progress=True):
Expand Down Expand Up @@ -538,7 +570,8 @@ def fit(
self.timeline = np.linspace(self.durations.min(), self.durations.max(), self.durations.shape[0])

self._label = label
alpha = alpha if alpha is not None else self.alpha
self._ci_labels = ci_labels
self.alpha = coalesce(alpha, self.alpha)

# estimation
self._fitted_parameters_, self._log_likelihood, self._hessian_ = self._fit_model(
Expand Down Expand Up @@ -576,30 +609,71 @@ def fit(
self._predict_label = label
self._update_docstrings()

self.survival_function_ = self.survival_function_at_times(self.timeline).to_frame(name=self._label)
self.hazard_ = self.hazard_at_times(self.timeline).to_frame(self._label)
self.cumulative_hazard_ = self.cumulative_hazard_at_times(self.timeline).to_frame(self._label)
self.survival_function_ = self.survival_function_at_times(self.timeline).to_frame()
self.hazard_ = self.hazard_at_times(self.timeline).to_frame()
self.cumulative_hazard_ = self.cumulative_hazard_at_times(self.timeline).to_frame()

self.confidence_interval_ = self._compute_confidence_bounds_of_cumulative_hazard(alpha, ci_labels)
return self

@_must_call_fit_first
def survival_function_at_times(self, times):
return pd.Series(self._survival_function(self._fitted_parameters_, times), index=_to_array(times))
def survival_function_at_times(self, times, label=None):
label = coalesce(label, self._label)
return pd.Series(self._survival_function(self._fitted_parameters_, times), index=_to_array(times), name=label)

@_must_call_fit_first
def cumulative_hazard_at_times(self, times):
return pd.Series(self._cumulative_hazard(self._fitted_parameters_, times), index=_to_array(times))
def cumulative_hazard_at_times(self, times, label=None):
label = coalesce(label, self._label)
return pd.Series(self._cumulative_hazard(self._fitted_parameters_, times), index=_to_array(times), name=label)

@_must_call_fit_first
def hazard_at_times(self, times):
return pd.Series(self._hazard(self._fitted_parameters_, times), index=_to_array(times))
def hazard_at_times(self, times, label=None):
label = coalesce(label, self._label)
return pd.Series(self._hazard(self._fitted_parameters_, times), index=_to_array(times), name=label)

@property
@_must_call_fit_first
def median_(self):
return median_survival_times(self.survival_function_)

@property
@_must_call_fit_first
def confidence_interval_(self):
return self._compute_confidence_bounds_of_cumulative_hazard(self.alpha, self._ci_labels)

@property
@_must_call_fit_first
def confidence_interval_cumulative_hazard_(self):
return self.confidence_interval_

@property
@_must_call_fit_first
def confidence_interval_hazard_(self):
return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels)

@property
@_must_call_fit_first
def confidence_interval_survival_function_(self):
return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels)

@_must_call_fit_first
def plot_cumulative_hazard(self, **kwargs):
return self.plot(**kwargs)

@_must_call_fit_first
def plot_survival_function(self, **kwargs):
return _plot_estimate(
self,
estimate=getattr(self, "survival_function_"),
confidence_intervals=self.confidence_interval_survival_function_,
**kwargs
)

@_must_call_fit_first
def plot_hazard(self, **kwargs):
return _plot_estimate(
self, estimate=getattr(self, "hazard_"), confidence_intervals=self.confidence_interval_hazard_, **kwargs
)


class KnownModelParametericUnivariateFitter(ParametericUnivariateFitter):

Expand Down
Loading

0 comments on commit a43f62d

Please sign in to comment.