Merge pull request #135 from CamDavidsonPilon/statistics-updates

Version 0.7.0
CamDavidsonPilon · Mar 1, 2015 · 669d292 · 669d292
2 parents e501eea + 7a8ddc2
commit 669d292
Show file tree

Hide file tree

Showing 6 changed files with 193 additions and 167 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ### Changelogs
 
+#### 0.7.0
+- allow for multiple fitters to be passed into `k_fold_cross_validation`. 
+- statistical tests in `lifelines.statstics`. now return a `StatisticalResult` object with properties like `p_value`, `test_results`, and `summary`.  
+- fixed a bug in how log-rank statistical tests are performed. The covariance matrix was not being correctly calculated. This resulted in slightly different p-values. 
+- `WeibullFitter`, `ExponentialFitter`, `KaplanMeierFitter` and `BreslowFlemingHarringtonFitter` all have a `conditional_time_to_event_` property that measures  the median duration remaining until the death event, given survival up until time t.
+
 #### 0.6.1
 
 - addition of `median_` property to `WeibullFitter` and `ExponentialFitter`. 

diff --git a/lifelines/estimation.py b/lifelines/estimation.py
@@ -57,7 +57,7 @@ def fit(self, durations, event_observed=None, timeline=None, entry=None,
         """
         Parameters:
           duration: an array, or pd.Series, of length n -- duration subject was observed for
-          timeline: return the best estimate at the values in timelines (postively increasing)
+          timeline: return the estimate at the values in timeline (postively increasing)
           event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event
              was lost (right-censored). Defaults all True if event_observed==None
           entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
@@ -102,6 +102,10 @@ def fit(self, durations, event_observed=None, timeline=None, entry=None,
 
         return self
 
+    @property
+    def conditional_time_to_event_(self):
+        return _conditional_time_to_event_(self)
+
     def hazard_at_times(self, times):
         return self.lambda_ * self.rho_ * (self.lambda_ * times) ** (self.rho_ - 1)
 
@@ -245,6 +249,10 @@ def fit(self, durations, event_observed=None, timeline=None, entry=None,
 
         return self
 
+    @property
+    def conditional_time_to_event_(self):
+        return _conditional_time_to_event_(self)
+
     def _bounds(self, alpha, ci_labels):
         alpha2 = inv_normal_cdf((1. + alpha) / 2.)
         df = pd.DataFrame(index=self.timeline)
@@ -469,6 +477,10 @@ def fit(self, durations, event_observed=None, timeline=None, entry=None, label='
         setattr(self, "plot_" + estimate_name, self.plot)
         return self
 
+    @property
+    def conditional_time_to_event_(self):
+        return _conditional_time_to_event_(self)
+
     def _bounds(self, cumulative_sq_, alpha, ci_labels):
         # See http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes2.pdf
         alpha2 = inv_normal_cdf((1. + alpha) / 2.)
@@ -491,23 +503,6 @@ def _additive_var(self, population, deaths):
         np.seterr(divide='ignore')
         return (1. * deaths / (population * (population - deaths))).replace([np.inf], 0)
 
-    def conditional_time_to(self):
-        """
-        Return a DataFrame, with index equal to survival_function_, that estimates the median
-        duration remaining until the death event, given survival up until time t. For example, if an
-        indivual exists until age 1, their expected life remaining *given they lived to time 1*
-        might be 9 years.
-
-        Returns:
-            conditional_time_to_: DataFrame, with index equal to survival_function_
-
-        """
-        age = self.survival_function_.index.values[:, None]
-        columns = ['%s - Conditional time remaining to event' % self._label]
-        return pd.DataFrame(qth_survival_times(self.survival_function_[self._label] * 0.5, self.survival_function_).T.sort(ascending=False).values,
-                            index=self.survival_function_.index,
-                            columns=columns) - age
-
 
 class BreslowFlemingHarringtonFitter(BaseFitter):
 
@@ -570,6 +565,10 @@ def fit(self, durations, event_observed=None, timeline=None, entry=None,
         self.plot_survival_function = self.plot
         return self
 
+    @property
+    def conditional_time_to_event_(self):
+        return _conditional_time_to_event_(self)
+
 
 class AalenAdditiveFitter(BaseFitter):
 
@@ -1264,7 +1263,7 @@ def _compute_confidence_intervals(self):
 
     def _compute_standard_errors(self):
         se = np.sqrt(inv(-self._hessian_).diagonal())
-        return pd.DataFrame(se[None,:],
+        return pd.DataFrame(se[None, :],
                             index=['se'], columns=self.hazards_.columns)
 
     def _compute_z_values(self):
@@ -1425,6 +1424,24 @@ def get_index(X):
     return index
 
 
+def _conditional_time_to_event_(fitter):
+    """
+    Return a DataFrame, with index equal to survival_function_, that estimates the median
+    duration remaining until the death event, given survival up until time t. For example, if an
+    individual exists until age 1, their expected life remaining *given they lived to time 1*
+    might be 9 years.
+
+    Returns:
+        conditional_time_to_: DataFrame, with index equal to survival_function_
+
+    """
+    age = fitter.survival_function_.index.values[:, None]
+    columns = ['%s - Conditional time remaining to event' % fitter._label]
+    return pd.DataFrame(qth_survival_times(fitter.survival_function_[fitter._label] * 0.5, fitter.survival_function_).T.sort(ascending=False).values,
+                        index=fitter.survival_function_.index,
+                        columns=columns) - age
+
+
 def _subtract(fitter, estimate):
     class_name = fitter.__class__.__name__
     doc_string = """
@@ -1578,10 +1595,6 @@ def qth_survival_time(q, survival_function):
 def median_survival_times(survival_functions):
     return qth_survival_times(0.5, survival_functions)
 
-
-def asymmetric_epanechnikov_kernel(q, x):
-    return (64 * (2 - 4 * q + 6 * q * q - 3 * q ** 3) + 240 * (1 - q) ** 2 * x) / ((1 + q) ** 4 * (19 - 18 * q + 3 * q ** 2))
-
 """
 References:
 [1] Aalen, O., Borgan, O., Gjessing, H., 2008. Survival and Event History Analysis

diff --git a/lifelines/statistics.py b/lifelines/statistics.py
@@ -10,7 +10,7 @@
 
 
 def logrank_test(event_times_A, event_times_B, event_observed_A=None, event_observed_B=None,
-                 alpha=0.95, t_0=-1, suppress_print=False, **kwargs):
+                 alpha=0.95, t_0=-1, **kwargs):
     """
     Measures and reports on whether two intensity processes are different. That is, given two
     event series, determines whether the data generating processes are statistically different.
@@ -19,24 +19,18 @@ def logrank_test(event_times_A, event_times_B, event_observed_A=None, event_obse
     H_0: both event series are from the same generating processes
     H_A: the event series are from different generating processes.
 
-    Pre lifelines 0.2.x: this returned a test statistic.
-    Post lifelines 0.2.x: this returns the results of the entire test.
-
-    See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights. The R language
-    equivilant in the `survival` package would use conf.type='log-log'.
+    See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights.
 
     Parameters:
       event_times_foo: a (nx1) array of event durations (birth to death,...) for the population.
       censorship_bar: a (nx1) array of censorship flags, 1 if observed, 0 if not. Default assumes all observed.
       t_0: the period under observation, -1 for all time.
       alpha: the level of signifiance
-      suppress_print: if True, do not print the summary. Default False.
       kwargs: add keywords and meta-data to the experiment summary
 
-    Returns
-      summary: a print-friendly string detailing the results of the test.
-      p: the p-value
-      test result: True if reject the null, (pendantically None if inconclusive)
+    Returns:
+      results: a StatisticalResult object with properties 'p_value', 'summary', 'test_statistic', 'test_result'
+
     """
 
     event_times_A, event_times_B = np.array(event_times_A), np.array(event_times_B)
@@ -46,14 +40,14 @@ def logrank_test(event_times_A, event_times_B, event_observed_A=None, event_obse
         event_observed_B = np.ones(event_times_B.shape[0])
 
     event_times = np.r_[event_times_A, event_times_B]
-    groups = np.r_[np.zeros(event_times_A.shape[0]), np.ones(event_times_B.shape[0])]
+    groups = np.r_[np.zeros(event_times_A.shape[0], dtype=int), np.ones(event_times_B.shape[0], dtype=int)]
     event_observed = np.r_[event_observed_A, event_observed_B]
     return multivariate_logrank_test(event_times, groups, event_observed,
-                                     alpha=alpha, t_0=t_0, suppress_print=suppress_print, **kwargs)
+                                     alpha=alpha, t_0=t_0, **kwargs)
 
 
 def pairwise_logrank_test(event_durations, groups, event_observed=None,
-                          alpha=0.95, t_0=-1, bonferroni=True, suppress_print=False, **kwargs):
+                          alpha=0.95, t_0=-1, bonferroni=True, **kwargs):
     """
     Perform the logrank test pairwise for all n>2 unique groups (use the more appropriate logrank_test for n=2).
     We have to be careful here: if there are n groups, then there are n*(n-1)/2 pairs -- so many pairs increase
@@ -70,26 +64,10 @@ def pairwise_logrank_test(event_durations, groups, event_observed=None,
       t_0: the final time to compare the series' up to. Defaults to all.
       bonferroni: If true, uses the Bonferroni correction to compare the M=n(n-1)/2 pairs, i.e alpha = alpha/M
             See (here)[http://en.wikipedia.org/wiki/Bonferroni_correction].
-      suppress_print: if True, do not print the summary. Default False.
       kwargs: add keywords and meta-data to the experiment summary.
 
     Returns:
-      S: a (n,n) dataframe of print-friendly test summaries (np.nan on the diagonal). Ex:
-      P: a (n,n) dataframe of p-values (np.nan on the diagonal).
-      T: a (n,n) dataframe of test results (True is significant, None if not) (np.nan on the diagonal).
-
-    Example:
-      P:
-                a         b         c
-      a       NaN  0.711136  0.401462
-      b  0.711136       NaN  0.734605
-      c  0.401462  0.734605       NaN
-
-      T:
-            a     b     c
-      a   NaN  None  None
-      b  None   NaN  None
-      c  None  None   NaN
+        R: a (n,n) dataframe of StatisticalResults (None on the diagonal)
 
     """
 
@@ -108,32 +86,25 @@ def pairwise_logrank_test(event_durations, groups, event_observed=None,
         m = 0.5 * n * (n - 1)
         alpha = 1 - (1 - alpha) / m
 
-    P = np.zeros((n, n), dtype=float)
-    T = np.empty((n, n), dtype=object)
-    S = np.empty((n, n), dtype=object)
+    R = np.zeros((n, n), dtype=object)
 
-    np.fill_diagonal(P, np.nan)
-    np.fill_diagonal(T, np.nan)
-    np.fill_diagonal(S, np.nan)
+    np.fill_diagonal(R, None)
 
     for i1, i2 in combinations(np.arange(n), 2):
         g1, g2 = unique_groups[[i1, i2]]
         ix1, ix2 = (groups == g1), (groups == g2)
         test_name = str(g1) + " vs. " + str(g2)
-        summary, p_value, result = logrank_test(event_durations.ix[ix1], event_durations.ix[ix2],
-                                                event_observed.ix[ix1], event_observed.ix[ix2],
-                                                alpha=alpha, t_0=t_0, use_bonferroni=bonferroni,
-                                                test_name=test_name, suppress_print=suppress_print,
-                                                **kwargs)
-        T[i1, i2], T[i2, i1] = result, result
-        P[i1, i2], P[i2, i1] = p_value, p_value
-        S[i1, i2], S[i2, i1] = summary, summary
+        result = logrank_test(event_durations.ix[ix1], event_durations.ix[ix2],
+                              event_observed.ix[ix1], event_observed.ix[ix2],
+                              alpha=alpha, t_0=t_0, use_bonferroni=bonferroni,
+                              test_name=test_name, **kwargs)
+        R[i1, i2], R[i2, i1] = result, result
 
-    return [pd.DataFrame(x, columns=unique_groups, index=unique_groups) for x in [S, P, T]]
+    return pd.DataFrame(R, columns=unique_groups, index=unique_groups)
 
 
 def multivariate_logrank_test(event_durations, groups, event_observed=None,
-                              alpha=0.95, t_0=-1, suppress_print=False, **kwargs):
+                              alpha=0.95, t_0=-1, **kwargs):
     """
     This test is a generalization of the logrank_test: it can deal with n>2 populations (and should
       be equal when n=2):
@@ -148,15 +119,13 @@ def multivariate_logrank_test(event_durations, groups, event_observed=None,
           to all observed.
       alpha: the level of significance desired.
       t_0: the final time to compare the series' up to. Defaults to all.
-      suppress_print: if True, do not print the summary. Default False.
       kwargs: add keywords and meta-data to the experiment summary.
 
-    Returns:
-      summary: a print-friendly summary of the statistical test
-      p_value: the p-value
-      test_result: True if reject the null, (pendantically) None if we can't reject the null.
+    Returns
+      results: a StatisticalResult object with properties 'p_value', 'summary', 'test_statistic', 'test_result'
 
     """
+    event_durations, groups = np.asarray(event_durations), np.asarray(groups)
     if event_observed is None:
         event_observed = np.ones((event_durations.shape[0], 1))
 
@@ -180,23 +149,64 @@ def multivariate_logrank_test(event_durations, groups, event_observed=None,
     assert abs(Z_j.sum()) < 10e-8, "Sum is not zero."  # this should move to a test eventually.
 
     # compute covariance matrix
-    V_ = n_ij.mul(np.sqrt(d_i) / n_i, axis='index').fillna(1)
+    factor = (((n_i - d_i) / (n_i - 1)).replace(np.inf, 1)) * d_i
+    n_ij['_'] = n_i.values
+    V_ = n_ij.mul(np.sqrt(factor) / n_i, axis='index').fillna(1)
     V = -np.dot(V_.T, V_)
     ix = np.arange(n_groups)
-    V[ix, ix] = V[ix, ix] + ev
+    V[ix, ix] = -V[-1, ix] + V[ix, ix]
+    V = V[:-1, :-1]
 
     # take the first n-1 groups
-    U = Z_j.ix[:-1].dot(np.linalg.pinv(V[:-1, :-1]).dot(Z_j.ix[:-1]))  # Z.T*inv(V)*Z
+    U = Z_j.iloc[:-1].dot(np.linalg.pinv(V[:-1, :-1]).dot(Z_j.iloc[:-1]))  # Z.T*inv(V)*Z
 
     # compute the p-values and tests
     test_result, p_value = chisq_test(U, n_groups - 1, alpha)
-    summary = pretty_print_summary(test_result, p_value, U, t_0=t_0, test='logrank',
-                                   alpha=alpha, null_distribution='chi squared',
-                                   df=n_groups - 1, **kwargs)
 
-    if not suppress_print:
-        print(summary)
-    return summary, p_value, test_result
+    return StatisticalResult(test_result, p_value, U, t_0=t_0, test='logrank',
+                             alpha=alpha, null_distribution='chi squared',
+                             df=n_groups - 1, **kwargs)
+
+
+class StatisticalResult(object):
+
+    def __init__(self, test_result, p_value, test_statistic, **kwargs):
+        self.p_value = p_value
+        self.test_statistic = test_statistic
+        self.test_result = "Reject Null" if test_result else "Cannot Reject Null"
+        self.is_significant = test_result is True
+
+        for kw, value in kwargs.items():
+            setattr(self, kw, value)
+
+        self._kwargs = kwargs
+
+    def print_summary(self):
+        print(self.__unicode__())
+
+    @property
+    def summary(self):
+        cols = ['p-value', 'test_statistic', 'test_result', 'is_significant']
+        return pd.DataFrame([[self.p_value, self.test_statistic, self.test_result, self.is_significant]], columns=cols)
+
+    def __repr__(self):
+        return "<lifelines.StatisticalResult: \n%s\n>" % self.__unicode__()
+
+    def __unicode__(self):
+        HEADER = "   __ p-value ___|__ test statistic __|____ test result ____|__ is significant __"
+        meta_data = self._pretty_print_meta_data(self._kwargs)
+        s = ""
+        s += "Results\n"
+        s += meta_data + "\n"
+        s += HEADER + "\n"
+        s += '{:>16.5f} | {:>18.3f} |  {: ^19}| {: ^18}'.format(self.p_value, self.test_statistic, self.test_result, 'True' if self.is_significant else 'False')
+        return s
+
+    def _pretty_print_meta_data(self, dictionary):
+        s = ""
+        for k, v in dictionary.items():
+            s += "   " + str(k).replace('_', ' ') + ": " + str(v) + "\n"
+        return s
 
 
 def chisq_test(U, degrees_freedom, alpha):
@@ -212,24 +222,4 @@ def two_sided_z_test(Z, alpha):
     if p_value < 1 - alpha / 2.:
         return True, p_value
     else:
-        return False, p_value
-
-
-def pretty_print_summary(test_results, p_value, test_statistic, **kwargs):
-    """
-    kwargs are experiment meta-data.
-    """
-    HEADER = "   __ p-value ___|__ test statistic __|__ test results __"
-    s = "Results\n"
-    meta_data = pretty_print_meta_data(kwargs)
-    s += meta_data + "\n"
-    s += HEADER + "\n"
-    s += "         %.5f |              %.3f |     %s   " % (p_value, test_statistic, test_results)
-    return s
-
-
-def pretty_print_meta_data(dictionary):
-    s = ""
-    for k, v in dictionary.items():
-        s = s + "   " + k.__str__().replace('_', ' ') + ": " + v.__str__() + "\n"
-    return s
+        return None, p_value