From 8d4ccd8d17e7e950508e0208576f0d6d57943e44 Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Fri, 26 Apr 2024 10:20:46 -0700
Subject: [PATCH 1/7] Modified TMLE for computationally effiecient bootstrap

---
 DESCRIPTION         |  2 +-
 R/estimators.R      | 38 +++++++++++++++++++++++++++---
 R/gcomp.R           | 14 ++++++-----
 R/lmtp_control.R    | 10 ++++++--
 R/theta.R           | 35 ++++++++++++++++++++++++++++
 R/tmle2.R           | 57 +++++++++++++++++++++++++++++++++++++++++++++
 man/lmtp_control.Rd |  8 ++++++-
 man/lmtp_tmle.Rd    |  8 ++++++-
 8 files changed, 158 insertions(+), 14 deletions(-)
 create mode 100644 R/tmle2.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 14c2e99..977052d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,7 +24,7 @@ License: AGPL-3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Imports: 
     stats,
     nnls,
diff --git a/R/estimators.R b/R/estimators.R
index 8f4b7c5..e3e80fd 100644
--- a/R/estimators.R
+++ b/R/estimators.R
@@ -41,6 +41,9 @@
 #' @param mtp \[\code{logical(1)}\]\cr
 #'  Is the intervention of interest a modified treatment policy?
 #'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
+#' @param boot \[\code{logical(1)}\]\cr
+#'  Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+#'  errors will be calculated using the empirical variance of the efficient influence function.
 #' @param outcome_type \[\code{character(1)}\]\cr
 #'  Outcome variable type (i.e., continuous, binomial, survival).
 #' @param id \[\code{character(1)}\]\cr
@@ -77,7 +80,8 @@
 #' \item{standard_error}{The estimated standard error of the LMTP effect.}
 #' \item{low}{Lower bound of the 95% confidence interval of the LMTP effect.}
 #' \item{high}{Upper bound of the 95% confidence interval of the LMTP effect.}
-#' \item{eif}{The estimated, un-centered, influence function of the estimate.}
+#' \item{eif}{The estimated, un-centered, influence function of the estimate,
+#'  \code{NULL} if \code{boot = TRUE}.}
 #' \item{shift}{The shift function specifying the treatment policy of interest.}
 #' \item{outcome_reg}{An n x Tau + 1 matrix of outcome regression predictions.
 #'  The mean of the first column is used for calculating theta.}
@@ -92,7 +96,8 @@
 #' @export
 lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
                       cens = NULL, shift = NULL, shifted = NULL, k = Inf,
-                      mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"),
+                      mtp = FALSE, boot = TRUE,
+                      outcome_type = c("binomial", "continuous", "survival"),
                       id = NULL, bounds = NULL,
                       learners_outcome = "SL.glm",
                       learners_trt = "SL.glm",
@@ -125,6 +130,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
   checkmate::assertNumber(control$.bound)
   checkmate::assertNumber(control$.trim, upper = 1)
   checkmate::assertLogical(control$.return_full_fits, len = 1)
+  checkmate::assertLogical(boot, len = 1)
 
   task <- lmtp_task$new(
     data = data,
@@ -146,6 +152,32 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
 
   pb <- progressr::progressor(task$tau*folds*2)
 
+  if (isTRUE(boot)) {
+    ratios <- cf_r(task, learners_trt, mtp, control, pb)
+    Qn <- cf_sub(task, "tmp_lmtp_scaled_outcome", learners_outcome, control, pb)
+    Qnb_eps <- cf_tmle2(task, ratios$ratios, Qn, control)
+
+    ans <- theta_boot(
+      list(
+        estimator = "TMLE",
+        m = Qnb_eps$psi,
+        r = ratios$ratios,
+        boots = Qnb_eps$booted,
+        tau = task$tau,
+        folds = task$folds,
+        id = task$id,
+        outcome_type = task$outcome_type,
+        bounds = task$bounds,
+        weights = task$weights,
+        shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL,
+        fits_m = Qn$fits,
+        fits_r = ratios$fits,
+        outcome_type = task$outcome_type
+      )
+    )
+    return(ans)
+  }
+
   ratios <- cf_r(task, learners_trt, mtp, control, pb)
   estims <- cf_tmle(task,
                     "tmp_lmtp_scaled_outcome",
@@ -484,7 +516,7 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens
 
   theta_sub(
     eta = list(
-      m = estims$m,
+      m = estims$ms,
       outcome_type = task$outcome_type,
       bounds = task$bounds,
       folds = task$folds,
diff --git a/R/gcomp.R b/R/gcomp.R
index ad0b718..a5feeb9 100644
--- a/R/gcomp.R
+++ b/R/gcomp.R
@@ -23,15 +23,15 @@ cf_sub <- function(task, outcome, learners, control, pb) {
   out <- future::value(out)
 
   list(
-    m = recombine_outcome(out, "m", task$folds),
+    ms = recombine_outcome(out, "ms", task$folds),
+    mn = recombine_outcome(out, "mn", task$folds),
     fits = lapply(out, function(x) x[["fits"]])
   )
 }
 
 estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk,
                          tau, outcome_type, learners, control, pb) {
-
-  m <- matrix(nrow = nrow(natural$valid), ncol = tau)
+  ms <- mn <- matrix(nrow = nrow(natural$valid), ncol = tau)
   fits <- vector("list", length = tau)
 
   for (t in tau:1) {
@@ -79,13 +79,15 @@ estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk,
     under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t]
 
     natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, under_shift_train), 1e-05)
-    m[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05)
+    ms[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05)
+    mn[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05)
 
     natural$train[!rt, pseudo] <- 0
-    m[!rv, t] <- 0
+    ms[!rv, t] <- 0
+    mn[!rv, t] <- 0
 
     pb()
   }
 
-  list(m = m, fits = fits)
+  list(ms = ms, mn = mn, fits = fits)
 }
diff --git a/R/lmtp_control.R b/R/lmtp_control.R
index 4c8ad6b..86a3237 100644
--- a/R/lmtp_control.R
+++ b/R/lmtp_control.R
@@ -13,6 +13,8 @@
 #'  The number of cross-validation folds for \code{learners_trt}.
 #' @param .return_full_fits \[\code{logical(1)}\]\cr
 #'  Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.
+#' @param .B description
+#' @param .boot_seed description
 #'
 #' @return A list of parameters controlling the estimation procedure.
 #' @export
@@ -23,10 +25,14 @@ lmtp_control <- function(.bound = 1e5,
                          .trim = 0.999,
                          .learners_outcome_folds = 10,
                          .learners_trt_folds = 10,
-                         .return_full_fits = FALSE) {
+                         .return_full_fits = FALSE,
+                         .B = 1000,
+                         .boot_seed = NULL) {
   list(.bound = .bound,
        .trim = .trim,
        .learners_outcome_folds = .learners_outcome_folds,
        .learners_trt_folds = .learners_trt_folds,
-       .return_full_fits = .return_full_fits)
+       .return_full_fits = .return_full_fits,
+       .B = .B,
+       .boot_seed = .boot_seed)
 }
diff --git a/R/theta.R b/R/theta.R
index 66582c5..56a156f 100644
--- a/R/theta.R
+++ b/R/theta.R
@@ -111,3 +111,38 @@ theta_dr <- function(eta, augmented = FALSE) {
   class(out) <- "lmtp"
   out
 }
+
+theta_boot <- function(eta) {
+  theta <- weighted.mean(eta$m[, 1], eta$weights)
+
+  if (eta$outcome_type == "continuous") {
+    theta <- rescale_y_continuous(theta, eta$bounds)
+  }
+
+  # NEED TO FIGURE OUT HOW THIS WOULD WORK WITH CLUSTERING
+  se <- sqrt(var(eta$boots))
+  ci_low  <- theta - (qnorm(0.975) * se)
+  ci_high <- theta + (qnorm(0.975) * se)
+
+  out <- list(
+    estimator = eta$estimator,
+    theta = theta,
+    standard_error = se,
+    low = ci_low,
+    high = ci_high,
+    id = eta$id,
+    shift = eta$shift,
+    outcome_reg = switch(
+      eta$outcome_type,
+      continuous = rescale_y_continuous(eta$m, eta$bounds),
+      binomial = eta$m
+    ),
+    density_ratios = eta$r,
+    fits_m = eta$fits_m,
+    fits_r = eta$fits_r,
+    outcome_type = eta$outcome_type
+  )
+
+  class(out) <- "lmtp"
+  out
+}
diff --git a/R/tmle2.R b/R/tmle2.R
new file mode 100644
index 0000000..87517ff
--- /dev/null
+++ b/R/tmle2.R
@@ -0,0 +1,57 @@
+cf_tmle2 <- function(task, ratios, m_init, control) {
+  psi <- estimate_tmle2(task$natural,
+                        task$cens,
+                        task$risk,
+                        task$tau,
+                        m_init$mn,
+                        m_init$ms,
+                        ratios,
+                        task$weights)
+
+  if (!is.null(control$.boot_seed)) set.seed(control$.boot_seed)
+
+  boots <- replicate(control$.B,
+                     sample(1:nrow(task$natural), nrow(task$natural), replace = TRUE),
+                     simplify = FALSE)
+
+  Qnb <- sapply(boots, function(i) {
+    psis <- estimate_tmle2(task$natural[i, , drop = FALSE],
+                           task$cens,
+                           task$risk,
+                           task$tau,
+                           m_init$mn[i, , drop = FALSE],
+                           m_init$ms[i, , drop = FALSE],
+                           ratios[i, , drop = FALSE],
+                           task$weights[i])
+    weighted.mean(psis[,1], task$weights[i])
+  })
+
+  list(psi = psi, booted = Qnb)
+}
+
+estimate_tmle2 <- function(data, cens, risk, tau, mn, ms, ratios, weights) {
+  m_eps <- matrix(nrow = nrow(data), ncol = tau + 1)
+  m_eps[, tau + 1] <- data$tmp_lmtp_scaled_outcome
+
+  fits <- vector("list", length = tau)
+  for (t in tau:1) {
+    i  <- censored(data, cens, t)$i
+    j <- censored(data, cens, t)$j
+    r <- at_risk(data, risk, t)
+
+    wts <- ratios[i & r, t] * weights[i & r]
+
+    fit <- sw(
+      glm(
+        m_eps[i & r, t + 1] ~ offset(qlogis(mn[i & r, t])),
+        weights = wts,
+        family = "binomial"
+      )
+    )
+
+    m_eps[j & r, t] <- bound(plogis(qlogis(ms[j & r, t]) + coef(fit)))
+    m_eps[!r, t] <- 0
+  }
+
+  m_eps
+}
diff --git a/man/lmtp_control.Rd b/man/lmtp_control.Rd
index 2e1d10a..8172476 100644
--- a/man/lmtp_control.Rd
+++ b/man/lmtp_control.Rd
@@ -9,7 +9,9 @@ lmtp_control(
   .trim = 0.999,
   .learners_outcome_folds = 10,
   .learners_trt_folds = 10,
-  .return_full_fits = FALSE
+  .return_full_fits = FALSE,
+  .B = 1000,
+  .boot_seed = NULL
 )
 }
 \arguments{
@@ -30,6 +32,10 @@ The number of cross-validation folds for \code{learners_trt}.}
 
 \item{.return_full_fits}{[\code{logical(1)}]\cr
 Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.}
+
+\item{.B}{description}
+
+\item{.boot_seed}{description}
 }
 \value{
 A list of parameters controlling the estimation procedure.
diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd
index f790bab..1d19454 100644
--- a/man/lmtp_tmle.Rd
+++ b/man/lmtp_tmle.Rd
@@ -15,6 +15,7 @@ lmtp_tmle(
   shifted = NULL,
   k = Inf,
   mtp = FALSE,
+  boot = TRUE,
   outcome_type = c("binomial", "continuous", "survival"),
   id = NULL,
   bounds = NULL,
@@ -73,6 +74,10 @@ all time points.}
 Is the intervention of interest a modified treatment policy?
 Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.}
 
+\item{boot}{[\code{logical(1)}]\cr
+Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+errors will be calculated using the empirical variance of the efficient influence function.}
+
 \item{outcome_type}{[\code{character(1)}]\cr
 Outcome variable type (i.e., continuous, binomial, survival).}
 
@@ -108,7 +113,8 @@ A list of class \code{lmtp} containing the following components:
 \item{standard_error}{The estimated standard error of the LMTP effect.}
 \item{low}{Lower bound of the 95\% confidence interval of the LMTP effect.}
 \item{high}{Upper bound of the 95\% confidence interval of the LMTP effect.}
-\item{eif}{The estimated, un-centered, influence function of the estimate.}
+\item{eif}{The estimated, un-centered, influence function of the estimate,
+\code{NULL} if \code{boot = TRUE}.}
 \item{shift}{The shift function specifying the treatment policy of interest.}
 \item{outcome_reg}{An n x Tau + 1 matrix of outcome regression predictions.
 The mean of the first column is used for calculating theta.}

From a73f4866aa8409b710d34bcddae73d8778fa9255 Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Fri, 26 Apr 2024 10:28:03 -0700
Subject: [PATCH 2/7] Pre-allocated for loop with future for bootstrap

---
 R/tmle2.R | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/R/tmle2.R b/R/tmle2.R
index 87517ff..75bd45b 100644
--- a/R/tmle2.R
+++ b/R/tmle2.R
@@ -14,19 +14,24 @@ cf_tmle2 <- function(task, ratios, m_init, control) {
                      sample(1:nrow(task$natural), nrow(task$natural), replace = TRUE),
                      simplify = FALSE)
 
-  Qnb <- sapply(boots, function(i) {
-    psis <- estimate_tmle2(task$natural[i, , drop = FALSE],
-                           task$cens,
-                           task$risk,
-                           task$tau,
-                           m_init$mn[i, , drop = FALSE],
-                           m_init$ms[i, , drop = FALSE],
-                           ratios[i, , drop = FALSE],
-                           task$weights[i])
-    weighted.mean(psis[,1], task$weights[i])
-  })
-
-  list(psi = psi, booted = Qnb)
+  Qnb <- vector("list", control$.B)
+  for (i in seq_along(boots)) {
+    ib <- boots[[i]]
+    Qnb[[i]] <- future::future({
+      psis <- estimate_tmle2(task$natural[ib, , drop = FALSE],
+                             task$cens,
+                             task$risk,
+                             task$tau,
+                             m_init$mn[ib, , drop = FALSE],
+                             m_init$ms[ib, , drop = FALSE],
+                             ratios[ib, , drop = FALSE],
+                             task$weights[ib])
+      weighted.mean(psis[, 1], task$weights[ib])
+    },
+    seed = TRUE)
+  }
+
+  list(psi = psi, booted = unlist(future::value(Qnb)))
 }
 
 estimate_tmle2 <- function(data, cens, risk, tau, mn, ms, ratios, weights) {

From 327c0bf1ceff17d7ea4c9e96d282b8ae70797ff8 Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Fri, 26 Apr 2024 10:32:35 -0700
Subject: [PATCH 3/7] Updating tests

---
 tests/testthat/test-censoring.R              | 2 +-
 tests/testthat/test-dynamic.R                | 6 +++---
 tests/testthat/test-point_treatment.R        | 2 +-
 tests/testthat/test-shifted.R                | 2 +-
 tests/testthat/test-survey.R                 | 2 +-
 tests/testthat/test-time_varying_treatment.R | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/testthat/test-censoring.R b/tests/testthat/test-censoring.R
index c7b2ae1..3b1547d 100644
--- a/tests/testthat/test-censoring.R
+++ b/tests/testthat/test-censoring.R
@@ -7,7 +7,7 @@ truth <- 0.8
 
 sub <- sw(lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1))
 ipw <- sw(lmtp_ipw(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1))
-tmle <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1))
+tmle <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, boot = F, folds = 1))
 sdr <- sw(lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1))
 
 # tests
diff --git a/tests/testthat/test-dynamic.R b/tests/testthat/test-dynamic.R
index 94bf0c2..27e7e51 100644
--- a/tests/testthat/test-dynamic.R
+++ b/tests/testthat/test-dynamic.R
@@ -32,18 +32,18 @@ cens <- c("C1", "C2")
 nodes <- list(c(NULL), c("L"))
 
 # truth = 0.308
-tml.stc <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = static_binary_on, folds = 1))
+tml.stc <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = static_binary_on, folds = 1, boot = F))
 
 # truth = 0.528
 sdr.stc <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = static_binary_off, folds = 1))
 
 # truth = 0.433
-tml.tv <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1))
+tml.tv <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1, boot = F))
 sdr.tv <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1))
 
 # time varying and covariate dynamic
 # truth = 0.345
-tml.dyn <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1))
+tml.dyn <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1, boot = F))
 sdr.dyn <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1))
 
 test_that("Dynamic intervention fidelity", {
diff --git a/tests/testthat/test-point_treatment.R b/tests/testthat/test-point_treatment.R
index 638e103..5f2b8d3 100644
--- a/tests/testthat/test-point_treatment.R
+++ b/tests/testthat/test-point_treatment.R
@@ -15,7 +15,7 @@ truth <- mean(tmp$Y.1)
 
 sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1)
 ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1)
-tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1)
+tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1, boot = F)
 sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1)
 
 # tests
diff --git a/tests/testthat/test-shifted.R b/tests/testthat/test-shifted.R
index b85f90b..161af27 100644
--- a/tests/testthat/test-shifted.R
+++ b/tests/testthat/test-shifted.R
@@ -20,7 +20,7 @@ ipw <-
 tmle <-
   sw(lmtp_tmle(sim_cens, a, "Y", nodes, baseline = NULL,
                cens, k = 0, shifted = sc,
-               outcome_type = "binomial", folds = 2, mtp = TRUE))
+               outcome_type = "binomial", folds = 2, mtp = TRUE, boot = F))
 
 sdr <-
   sw(lmtp_sdr(sim_cens, a, "Y", nodes, baseline = NULL,
diff --git a/tests/testthat/test-survey.R b/tests/testthat/test-survey.R
index 2cdd6d5..b8021bf 100644
--- a/tests/testthat/test-survey.R
+++ b/tests/testthat/test-survey.R
@@ -25,7 +25,7 @@ ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_o
                 weights = wts, folds = 2)
 
 tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on,
-                  weights = wts, folds = 2)
+                  weights = wts, folds = 2, boot = F)
 
 sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on,
                 weights = wts, folds = 2)
diff --git a/tests/testthat/test-time_varying_treatment.R b/tests/testthat/test-time_varying_treatment.R
index 7e7cd1e..4b289e0 100644
--- a/tests/testthat/test-time_varying_treatment.R
+++ b/tests/testthat/test-time_varying_treatment.R
@@ -25,7 +25,7 @@ truth <- 0.305
 
 sub <- sw(lmtp_sub(tmp, a, "Y", time_vary = time_varying, shift = d, folds = 1))
 ipw <- sw(lmtp_ipw(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1))
-tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1))
+tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1, boot = F))
 sdr <- sw(lmtp_sdr(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1))
 
 test_that("time varying treatment fidelity, t = 4", {

From 6e63f60b70db9a27570cebcd3e21d75a5dfdb0c6 Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Fri, 26 Apr 2024 10:33:47 -0700
Subject: [PATCH 4/7] Updated NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index f512846..99c17a0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,7 @@
 -   Can now estimate the effects of simultaneous interventions on multiple variables. 
 -   New pre-packaged shift function, `ipsi()` for estimating IPSI effects using the risk ratio.
 -   `lmtp_control()` now replaces extra estimator arguments.
+-   Bootstrap for TMLE with the `boot` argument using a modified TMLE algorithm (https://arxiv.org/abs/1810.03030).
 
 ### Buf Fixes
 

From 2a72e6640fbd0dc8300cf2b05618a223791682ee Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Fri, 26 Apr 2024 10:37:31 -0700
Subject: [PATCH 5/7] Todo notes

---
 R/theta.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/theta.R b/R/theta.R
index 56a156f..a0065c8 100644
--- a/R/theta.R
+++ b/R/theta.R
@@ -112,6 +112,7 @@ theta_dr <- function(eta, augmented = FALSE) {
   out
 }
 
+# TODO: NEED TO SAVE THE SEED FOR THE REPLICATES AND THE BOOTED ESTIMATES FOR ESTIMATNG CONTRASTS
 theta_boot <- function(eta) {
   theta <- weighted.mean(eta$m[, 1], eta$weights)
 
@@ -119,7 +120,7 @@ theta_boot <- function(eta) {
     theta <- rescale_y_continuous(theta, eta$bounds)
   }
 
-  # NEED TO FIGURE OUT HOW THIS WOULD WORK WITH CLUSTERING
+  # TODO: NEED TO FIGURE OUT HOW THIS WOULD WORK WITH CLUSTERING
   se <- sqrt(var(eta$boots))
   ci_low  <- theta - (qnorm(0.975) * se)
   ci_high <- theta + (qnorm(0.975) * se)

From b3c826e07b71fda971679613c42e94819d17416b Mon Sep 17 00:00:00 2001
From: nt-williams <ntwilliams.personal@gmail.com>
Date: Sat, 29 Jun 2024 16:44:58 -0700
Subject: [PATCH 6/7] Capturing boot call in lmtp_survival

---
 R/lmtp_survival.R    | 11 +++++++++--
 man/lmtp_survival.Rd |  6 ++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/R/lmtp_survival.R b/R/lmtp_survival.R
index 42c985d..ffaac84 100644
--- a/R/lmtp_survival.R
+++ b/R/lmtp_survival.R
@@ -41,6 +41,10 @@
 #' @param mtp \[\code{logical(1)}\]\cr
 #'  Is the intervention of interest a modified treatment policy?
 #'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
+#' @param boot \[\code{logical(1)}\]\cr
+#'  Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+#'  errors will be calculated using the empirical variance of the efficient influence function.
+#'  Ignored if \code{estimator = "lmtp_sdr"}.
 #' @param id \[\code{character(1)}\]\cr
 #'  An optional column name containing cluster level identifiers.
 #' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation
@@ -64,6 +68,7 @@ lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL
                           estimator = c("lmtp_tmle", "lmtp_sdr"),
                           k = Inf,
                           mtp = FALSE,
+                          boot = TRUE,
                           id = NULL,
                           learners_outcome = "SL.glm",
                           learners_trt = "SL.glm",
@@ -94,8 +99,10 @@ lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL
     control = control
   )
 
-  if (estimator == "lmtp_tmle") expr <- expression(do.call(lmtp_tmle, args))
-  else expr <- expression(do.call(lmtp_sdr, args))
+  if (estimator == "lmtp_tmle") {
+    args$boot <- boot
+    expr <- expression(do.call(lmtp_tmle, args))
+  } else expr <- expression(do.call(lmtp_sdr, args))
 
   t <- 1
   cli::cli_progress_step("Working on time {t}/{tau}...")
diff --git a/man/lmtp_survival.Rd b/man/lmtp_survival.Rd
index 9f693bd..906c8c1 100644
--- a/man/lmtp_survival.Rd
+++ b/man/lmtp_survival.Rd
@@ -16,6 +16,7 @@ lmtp_survival(
   estimator = c("lmtp_tmle", "lmtp_sdr"),
   k = Inf,
   mtp = FALSE,
+  boot = TRUE,
   id = NULL,
   learners_outcome = "SL.glm",
   learners_trt = "SL.glm",
@@ -73,6 +74,11 @@ all time points.}
 Is the intervention of interest a modified treatment policy?
 Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.}
 
+\item{boot}{[\code{logical(1)}]\cr
+Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+errors will be calculated using the empirical variance of the efficient influence function.
+Ignored if \code{estimator = "lmtp_sdr"}.}
+
 \item{id}{[\code{character(1)}]\cr
 An optional column name containing cluster level identifiers.}
 

From 98fd3833c40a19323ce363d1490d65e7cabbad78 Mon Sep 17 00:00:00 2001
From: Nicholas Williams <ntwilliams.personal@gmail.com>
Date: Tue, 17 Sep 2024 10:35:47 -0700
Subject: [PATCH 7/7] Changing default for boot to false

---
 R/estimators.R       | 4 ++--
 R/lmtp_survival.R    | 4 ++--
 man/lmtp_survival.Rd | 4 ++--
 man/lmtp_tmle.Rd     | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/estimators.R b/R/estimators.R
index 2d13277..5d8c8d3 100644
--- a/R/estimators.R
+++ b/R/estimators.R
@@ -42,7 +42,7 @@
 #'  Is the intervention of interest a modified treatment policy?
 #'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
 #' @param boot \[\code{logical(1)}\]\cr
-#'  Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+#'  Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
 #'  errors will be calculated using the empirical variance of the efficient influence function.
 #' @param outcome_type \[\code{character(1)}\]\cr
 #'  Outcome variable type (i.e., continuous, binomial, survival).
@@ -96,7 +96,7 @@
 #' @export
 lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
                       cens = NULL, shift = NULL, shifted = NULL, k = Inf,
-                      mtp = FALSE, boot = TRUE,
+                      mtp = FALSE, boot = FALSE,
                       outcome_type = c("binomial", "continuous", "survival"),
                       id = NULL, bounds = NULL,
                       learners_outcome = "SL.glm",
diff --git a/R/lmtp_survival.R b/R/lmtp_survival.R
index ac20e83..5f12759 100644
--- a/R/lmtp_survival.R
+++ b/R/lmtp_survival.R
@@ -42,7 +42,7 @@
 #'  Is the intervention of interest a modified treatment policy?
 #'  Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
 #' @param boot \[\code{logical(1)}\]\cr
-#'  Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+#'  Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
 #'  errors will be calculated using the empirical variance of the efficient influence function.
 #'  Ignored if \code{estimator = "lmtp_sdr"}.
 #' @param id \[\code{character(1)}\]\cr
@@ -68,7 +68,7 @@ lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL
                           estimator = c("lmtp_tmle", "lmtp_sdr"),
                           k = Inf,
                           mtp = FALSE,
-                          boot = TRUE,
+                          boot = FALSE,
                           id = NULL,
                           learners_outcome = "SL.glm",
                           learners_trt = "SL.glm",
diff --git a/man/lmtp_survival.Rd b/man/lmtp_survival.Rd
index 906c8c1..5baf4e0 100644
--- a/man/lmtp_survival.Rd
+++ b/man/lmtp_survival.Rd
@@ -16,7 +16,7 @@ lmtp_survival(
   estimator = c("lmtp_tmle", "lmtp_sdr"),
   k = Inf,
   mtp = FALSE,
-  boot = TRUE,
+  boot = FALSE,
   id = NULL,
   learners_outcome = "SL.glm",
   learners_trt = "SL.glm",
@@ -75,7 +75,7 @@ Is the intervention of interest a modified treatment policy?
 Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.}
 
 \item{boot}{[\code{logical(1)}]\cr
-Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
 errors will be calculated using the empirical variance of the efficient influence function.
 Ignored if \code{estimator = "lmtp_sdr"}.}
 
diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd
index 1d19454..e8dcc50 100644
--- a/man/lmtp_tmle.Rd
+++ b/man/lmtp_tmle.Rd
@@ -15,7 +15,7 @@ lmtp_tmle(
   shifted = NULL,
   k = Inf,
   mtp = FALSE,
-  boot = TRUE,
+  boot = FALSE,
   outcome_type = c("binomial", "continuous", "survival"),
   id = NULL,
   bounds = NULL,
@@ -75,7 +75,7 @@ Is the intervention of interest a modified treatment policy?
 Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.}
 
 \item{boot}{[\code{logical(1)}]\cr
-Compute standard errors using the bootstrap? Default is \code{TRUE}. If \code{FALSE}, standard
+Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard
 errors will be calculated using the empirical variance of the efficient influence function.}
 
 \item{outcome_type}{[\code{character(1)}]\cr