nt-williams · nt-williams · Apr 26, 2024 · Apr 26, 2024 · Apr 26, 2024 · Apr 26, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: lmtp
 Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies
-Version: 1.4.0
+Version: 1.4.1
 Authors@R: 
     c(person(given = "Nicholas",
              family = "Williams",
@@ -24,7 +24,7 @@ License: AGPL-3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Imports: 
     stats,
     nnls,
@@ -37,7 +37,7 @@ Imports:
     data.table (>= 1.13.0),
     checkmate (>= 2.1.0),
     SuperLearner,
-    schoolmath
+    isotone
 URL: https://beyondtheate.com/, https://github.com/nt-williams/lmtp
 BugReports: https://github.com/nt-williams/lmtp/issues
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,7 +2,9 @@
 
 S3method(print,lmtp)
 S3method(print,lmtp_contrast)
+S3method(print,lmtp_survival)
 S3method(tidy,lmtp)
+S3method(tidy,lmtp_survival)
 export(create_node_list)
 export(event_locf)
 export(ipsi)
@@ -11,6 +13,7 @@ export(lmtp_control)
 export(lmtp_ipw)
 export(lmtp_sdr)
 export(lmtp_sub)
+export(lmtp_survival)
 export(lmtp_tmle)
 export(static_binary_off)
 export(static_binary_on)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,19 @@
+# lmtp 1.4.1
+
+### New Features
+
+-   Added `lmtp_survival()` function for estimating the entire survival curve. Enforces monotonicity using isotonic regression (see issue \#140).
+-   Bootstrap for TMLE with the `boot` argument using a modified TMLE algorithm (https://arxiv.org/abs/1810.03030).
+
+### Bug Fixes
+
+-   Using fitted values from isotonic regression in `lmtp_survival()` instead of the original values (see issue \#149).
+-   Bootstrap TMLE uses cumulative density ratios (see issue \#151). 
+
+### General
+
+-   Removed dependency on `schoolmath` which used a very slow function for testing if a vector was "decimalish".
+
 # lmtp 1.4.0
 
 ### New Features

diff --git a/R/checks.R b/R/checks.R
@@ -177,7 +177,7 @@ check_trt_type <- function(data, trt, mtp) {
   for (i in seq_along(trt)) {
     a <- data[[trt[i]]]
     if (is.character(a) | is.factor(a)) next
-    is_decimal[i] <- any(schoolmath::is.decimal(a[!is.na(a)]))
+    is_decimal[i] <- any(is_decimal(a[!is.na(a)]))
   }
   if (any(is_decimal) & isFALSE(mtp)) {
       cli::cli_warn("Detected decimalish `trt` values and {.code mtp = FALSE}. Consider setting {.code mtp = TRUE} if getting errors.")

diff --git a/R/estimators.R b/R/estimators.R
@@ -41,6 +41,9 @@
 #' @param mtp \[\code{logical(1)}\]\cr
 #'  Is the intervention of interest a modified treatment policy?
 #'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
+#' @param boot \[\code{logical(1)}\]\cr
+#'  Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
+#'  errors will be calculated using the empirical variance of the efficient influence function.
 #' @param outcome_type \[\code{character(1)}\]\cr
 #'  Outcome variable type (i.e., continuous, binomial, survival).
 #' @param id \[\code{character(1)}\]\cr
@@ -77,7 +80,8 @@
 #' \item{standard_error}{The estimated standard error of the LMTP effect.}
 #' \item{low}{Lower bound of the 95% confidence interval of the LMTP effect.}
 #' \item{high}{Upper bound of the 95% confidence interval of the LMTP effect.}
-#' \item{eif}{The estimated, un-centered, influence function of the estimate.}
+#' \item{eif}{The estimated, un-centered, influence function of the estimate,
+#'  \code{NULL} if \code{boot = TRUE}.}
 #' \item{shift}{The shift function specifying the treatment policy of interest.}
 #' \item{outcome_reg}{An n x Tau + 1 matrix of outcome regression predictions.
 #'  The mean of the first column is used for calculating theta.}
@@ -92,7 +96,8 @@
 #' @export
 lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
                       cens = NULL, shift = NULL, shifted = NULL, k = Inf,
-                      mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"),
+                      mtp = FALSE, boot = FALSE,
+                      outcome_type = c("binomial", "continuous", "survival"),
                       id = NULL, bounds = NULL,
                       learners_outcome = "SL.glm",
                       learners_trt = "SL.glm",
@@ -125,6 +130,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
   checkmate::assertNumber(control$.bound)
   checkmate::assertNumber(control$.trim, upper = 1)
   checkmate::assertLogical(control$.return_full_fits, len = 1)
+  checkmate::assertLogical(boot, len = 1)
   check_trt_type(data, unlist(trt), mtp)
 
   task <- lmtp_task$new(
@@ -147,6 +153,33 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
 
   pb <- progressr::progressor(task$tau*folds*2)
 
+  if (isTRUE(boot)) {
+    ratios <- cf_r(task, learners_trt, mtp, control, pb)
+    Qn <- cf_sub(task, "tmp_lmtp_scaled_outcome", learners_outcome, control, pb)
+    Qnb_eps <- cf_tmle2(task, ratios$ratios, Qn, control)
+
+    ans <- theta_boot(
+      list(
+        estimator = "TMLE",
+        m = Qnb_eps$psi,
+        r = ratios$ratios,
+        boots = Qnb_eps$booted,
+        tau = task$tau,
+        folds = task$folds,
+        id = task$id,
+        outcome_type = task$outcome_type,
+        bounds = task$bounds,
+        weights = task$weights,
+        shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL,
+        fits_m = Qn$fits,
+        fits_r = ratios$fits,
+        outcome_type = task$outcome_type,
+        seed = Qnb_eps$seed
+      )
+    )
+    return(ans)
+  }
+
   ratios <- cf_r(task, learners_trt, mtp, control, pb)
   estims <- cf_tmle(task,
                     "tmp_lmtp_scaled_outcome",
@@ -486,7 +519,7 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens
 
   theta_sub(
     eta = list(
-      m = estims$m,
+      m = estims$ms,
       outcome_type = task$outcome_type,
       bounds = task$bounds,
       folds = task$folds,

diff --git a/R/gcomp.R b/R/gcomp.R
@@ -23,15 +23,15 @@ cf_sub <- function(task, outcome, learners, control, pb) {
   out <- future::value(out)
 
   list(
-    m = recombine_outcome(out, "m", task$folds),
+    ms = recombine_outcome(out, "ms", task$folds),
+    mn = recombine_outcome(out, "mn", task$folds),
     fits = lapply(out, function(x) x[["fits"]])
   )
 }
 
 estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk,
                          tau, outcome_type, learners, control, pb) {
-
-  m <- matrix(nrow = nrow(natural$valid), ncol = tau)
+  ms <- mn <- matrix(nrow = nrow(natural$valid), ncol = tau)
   fits <- vector("list", length = tau)
 
   for (t in tau:1) {
@@ -79,13 +79,15 @@ estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk,
     under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t]
 
     natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, under_shift_train), 1e-05)
-    m[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05)
+    ms[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05)
+    mn[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05)
 
     natural$train[!rt, pseudo] <- 0
-    m[!rv, t] <- 0
+    ms[!rv, t] <- 0
+    mn[!rv, t] <- 0
 
     pb()
   }
 
-  list(m = m, fits = fits)
+  list(ms = ms, mn = mn, fits = fits)
 }
diff --git a/R/lmtp_control.R b/R/lmtp_control.R
@@ -13,6 +13,8 @@
 #'  The number of cross-validation folds for \code{learners_trt}.
 #' @param .return_full_fits \[\code{logical(1)}\]\cr
 #'  Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.
+#' @param .B description
+#' @param .boot_seed description
 #'
 #' @return A list of parameters controlling the estimation procedure.
 #' @export
@@ -23,10 +25,14 @@ lmtp_control <- function(.bound = 1e5,
                          .trim = 0.999,
                          .learners_outcome_folds = 10,
                          .learners_trt_folds = 10,
-                         .return_full_fits = FALSE) {
+                         .return_full_fits = FALSE,
+                         .B = 1000,
+                         .boot_seed = NULL) {
   list(.bound = .bound,
        .trim = .trim,
        .learners_outcome_folds = .learners_outcome_folds,
        .learners_trt_folds = .learners_trt_folds,
-       .return_full_fits = .return_full_fits)
+       .return_full_fits = .return_full_fits,
+       .B = .B,
+       .boot_seed = .boot_seed)
 }
diff --git a/R/lmtp_survival.R b/R/lmtp_survival.R
@@ -0,0 +1,142 @@
+#' LMTP Survival Curve Estimator
+#'
+#' Wrapper around \code{lmtp_tmle} and \code{lmtp_sdr} for survival outcomes to estimate the entire survival curve.
+#' Estimates are reconstructed using isotonic regression to enforce monotonicity of the survival curve.
+#' \bold{Confidence intervals correspond to marginal confidence intervals for the survival curve, not simultaneous intervals.}
+#'
+#' @param data \[\code{data.frame}\]\cr
+#'  A \code{data.frame} in wide format containing all necessary variables
+#'  for the estimation problem. Must not be a \code{data.table}.
+#' @param trt \[\code{character}\] or \[\code{list}\]\cr
+#'  A vector containing the column names of treatment variables ordered by time.
+#'  Or, a list of vectors, the same length as the number of time points of observation.
+#'  Vectors should contain column names for the treatment variables at each time point. The list
+#'  should be ordered following the time ordering of the model.
+#' @param outcomes \[\code{character}\]\cr
+#'  A vector containing the columns names of intermediate outcome variables and the final
+#'  outcome variable ordered by time. Only numeric values are allowed. Variables should be coded as 0 and 1.
+#' @param baseline \[\code{character}\]\cr
+#'  An optional vector containing the column names of baseline covariates to be
+#'  included for adjustment at every time point.
+#' @param time_vary \[\code{list}\]\cr
+#'  A list the same length as the number of time points of observation with
+#'  the column names for new time-varying covariates introduced at each time point. The list
+#'  should be ordered following the time ordering of the model.
+#' @param cens \[\code{character}\]\cr
+#'  An optional vector of column names of censoring indicators the same
+#'  length as the number of time points of observation. If missingness in the outcome is
+#'  present or if time-to-event outcome, must be provided.
+#' @param shift \[\code{closure}\]\cr
+#'  A two argument function that specifies how treatment variables should be shifted.
+#'  See examples for how to specify shift functions for continuous, binary, and categorical exposures.
+#' @param shifted \[\code{data.frame}\]\cr
+#'  An optional data frame, the same as in \code{data}, but modified according
+#'  to the treatment policy of interest. If specified, \code{shift} is ignored.
+#' @param estimator \[\code{character(1)}\]\cr
+#'  The estimator to use. Either \code{"lmtp_tmle"} or \code{"lmtp_sdr"}.
+#' @param k \[\code{integer(1)}\]\cr
+#'  An integer specifying how previous time points should be
+#'  used for estimation at the given time point. Default is \code{Inf},
+#'  all time points.
+#' @param mtp \[\code{logical(1)}\]\cr
+#'  Is the intervention of interest a modified treatment policy?
+#'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
+#' @param boot \[\code{logical(1)}\]\cr
+#'  Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
+#'  errors will be calculated using the empirical variance of the efficient influence function.
+#'  Ignored if \code{estimator = "lmtp_sdr"}.
+#' @param id \[\code{character(1)}\]\cr
+#'  An optional column name containing cluster level identifiers.
+#' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation
+#'  of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.
+#' @param learners_trt \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation
+#'  of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM.
+#'  \bold{Only include candidate learners capable of binary classification}.
+#' @param folds \[\code{integer(1)}\]\cr
+#'  The number of folds to be used for cross-fitting.
+#' @param weights \[\code{numeric(nrow(data))}\]\cr
+#'  An optional vector containing sampling weights.
+#' @param control \[\code{list()}\]\cr
+#'  Output of \code{lmtp_control()}.
+#'
+#' @return A list of class \code{lmtp_survival} containing \code{lmtp} objects for each time point.
+#'
+#' @example inst/examples/lmtp_survival-ex.R
+#' @export
+lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL,
+                          cens = NULL, shift = NULL, shifted = NULL,
+                          estimator = c("lmtp_tmle", "lmtp_sdr"),
+                          k = Inf,
+                          mtp = FALSE,
+                          boot = FALSE,
+                          id = NULL,
+                          learners_outcome = "SL.glm",
+                          learners_trt = "SL.glm",
+                          folds = 10,
+                          weights = NULL,
+                          control = lmtp_control()) {
+
+  checkmate::assertCharacter(outcomes, min.len = 2, null.ok = FALSE, unique = TRUE, any.missing = FALSE)
+
+  estimator <- match.arg(estimator)
+  tau <- length(outcomes)
+  estimates <- vector("list", tau)
+
+  args <- list(
+    data = data,
+    baseline = baseline,
+    shift = shift,
+    shifted = shifted,
+    k = k,
+    mtp = mtp,
+    id = id,
+    learners_outcome = learners_outcome,
+    learners_trt = learners_trt,
+    folds = folds,
+    weights = weights,
+    control = control
+  )
+
+  if (length(trt) == 1) args$trt <- trt
+  if (length(time_vary) == 1) args$time_vary <- time_vary
+
+  if (estimator == "lmtp_tmle") {
+    args$boot <- boot
+    expr <- expression(do.call(lmtp_tmle, args))
+  } else {
+    expr <- expression(do.call(lmtp_sdr, args))
+  }
+
+  t <- 1
+  cli::cli_progress_step("Working on time {t}/{tau}...")
+  for (t in 1:tau) {
+    if (length(trt) > 1) args$trt <- trt[1:t]
+    if (length(args$time_vary) > 1) args$time_vary <- time_vary[1:t]
+    args$outcome <- outcomes[1:t]
+    args$cens <- cens[1:t]
+    args$outcome_type <- ifelse(t == 1, "binomial", "survival")
+
+    estimates[[t]] <- future::future(eval(expr), seed = TRUE)
+    cli::cli_progress_update()
+  }
+
+  cli::cli_progress_done()
+  estimates <- future::value(estimates)
+  estimates <- fix_surv_time1(estimates)
+  estimates <- isotonic_projection(estimates)
+
+  class(estimates) <- "lmtp_survival"
+  estimates
+}
+
+isotonic_projection <- function(x, alpha = 0.05) {
+  cv <- abs(qnorm(p = alpha / 2))
+  estim <- tidy.lmtp_survival(x)
+  iso_fit <- isotone::gpava(1:length(x), 1 - estim$estimate)
+  for (i in seq_along(x)) {
+    x[[i]]$theta <- (1 - iso_fit$x[i])
+    x[[i]]$low <- x[[i]]$theta - (qnorm(0.975) * x[[i]]$standard_error)
+    x[[i]]$high <- x[[i]]$theta + (qnorm(0.975) * x[[i]]$standard_error)
+  }
+  x
+}
diff --git a/R/print.R b/R/print.R
@@ -21,3 +21,8 @@ print.lmtp_contrast <- function(x, ...) {
   x$vals$p.value <- format.pval(x$vals$p.value, digits = 3, eps = 0.001)
   print(format(x$vals, digits = 3))
 }
+
+#' @export
+print.lmtp_survival <- function(x, ...) {
+  print(as.data.frame(tidy.lmtp_survival(x)))
+}