diff --git a/DESCRIPTION b/DESCRIPTION index d0403f7..09a318b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lmtp Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies -Version: 1.4.0 +Version: 1.4.1 Authors@R: c(person(given = "Nicholas", family = "Williams", @@ -24,7 +24,7 @@ License: AGPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: stats, nnls, @@ -37,7 +37,7 @@ Imports: data.table (>= 1.13.0), checkmate (>= 2.1.0), SuperLearner, - schoolmath + isotone URL: https://beyondtheate.com/, https://github.com/nt-williams/lmtp BugReports: https://github.com/nt-williams/lmtp/issues Suggests: diff --git a/NAMESPACE b/NAMESPACE index 102394e..bbeee75 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,9 @@ S3method(print,lmtp) S3method(print,lmtp_contrast) +S3method(print,lmtp_survival) S3method(tidy,lmtp) +S3method(tidy,lmtp_survival) export(create_node_list) export(event_locf) export(ipsi) @@ -11,6 +13,7 @@ export(lmtp_control) export(lmtp_ipw) export(lmtp_sdr) export(lmtp_sub) +export(lmtp_survival) export(lmtp_tmle) export(static_binary_off) export(static_binary_on) diff --git a/NEWS.md b/NEWS.md index 10cdb3a..92942cd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,19 @@ +# lmtp 1.4.1 + +### New Features + +- Added `lmtp_survival()` function for estimating the entire survival curve. Enforces monotonicity using isotonic regression (see issue \#140). +- Bootstrap for TMLE with the `boot` argument using a modified TMLE algorithm (https://arxiv.org/abs/1810.03030). + +### Bug Fixes + +- Using fitted values from isotonic regression in `lmtp_survival()` instead of the original values (see issue \#149). +- Bootstrap TMLE uses cumulative density ratios (see issue \#151). + +### General + +- Removed dependency on `schoolmath` which used a very slow function for testing if a vector was "decimalish". + # lmtp 1.4.0 ### New Features diff --git a/R/checks.R b/R/checks.R index 10e7649..f6ae1ce 100644 --- a/R/checks.R +++ b/R/checks.R @@ -177,7 +177,7 @@ check_trt_type <- function(data, trt, mtp) { for (i in seq_along(trt)) { a <- data[[trt[i]]] if (is.character(a) | is.factor(a)) next - is_decimal[i] <- any(schoolmath::is.decimal(a[!is.na(a)])) + is_decimal[i] <- any(is_decimal(a[!is.na(a)])) } if (any(is_decimal) & isFALSE(mtp)) { cli::cli_warn("Detected decimalish `trt` values and {.code mtp = FALSE}. Consider setting {.code mtp = TRUE} if getting errors.") diff --git a/R/estimators.R b/R/estimators.R index e3f956a..5d8c8d3 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -41,6 +41,9 @@ #' @param mtp \[\code{logical(1)}\]\cr #' Is the intervention of interest a modified treatment policy? #' Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}. +#' @param boot \[\code{logical(1)}\]\cr +#' Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard +#' errors will be calculated using the empirical variance of the efficient influence function. #' @param outcome_type \[\code{character(1)}\]\cr #' Outcome variable type (i.e., continuous, binomial, survival). #' @param id \[\code{character(1)}\]\cr @@ -77,7 +80,8 @@ #' \item{standard_error}{The estimated standard error of the LMTP effect.} #' \item{low}{Lower bound of the 95% confidence interval of the LMTP effect.} #' \item{high}{Upper bound of the 95% confidence interval of the LMTP effect.} -#' \item{eif}{The estimated, un-centered, influence function of the estimate.} +#' \item{eif}{The estimated, un-centered, influence function of the estimate, +#' \code{NULL} if \code{boot = TRUE}.} #' \item{shift}{The shift function specifying the treatment policy of interest.} #' \item{outcome_reg}{An n x Tau + 1 matrix of outcome regression predictions. #' The mean of the first column is used for calculating theta.} @@ -92,7 +96,8 @@ #' @export lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, - mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"), + mtp = FALSE, boot = FALSE, + outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, learners_outcome = "SL.glm", learners_trt = "SL.glm", @@ -125,6 +130,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, checkmate::assertNumber(control$.bound) checkmate::assertNumber(control$.trim, upper = 1) checkmate::assertLogical(control$.return_full_fits, len = 1) + checkmate::assertLogical(boot, len = 1) check_trt_type(data, unlist(trt), mtp) task <- lmtp_task$new( @@ -147,6 +153,33 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, pb <- progressr::progressor(task$tau*folds*2) + if (isTRUE(boot)) { + ratios <- cf_r(task, learners_trt, mtp, control, pb) + Qn <- cf_sub(task, "tmp_lmtp_scaled_outcome", learners_outcome, control, pb) + Qnb_eps <- cf_tmle2(task, ratios$ratios, Qn, control) + + ans <- theta_boot( + list( + estimator = "TMLE", + m = Qnb_eps$psi, + r = ratios$ratios, + boots = Qnb_eps$booted, + tau = task$tau, + folds = task$folds, + id = task$id, + outcome_type = task$outcome_type, + bounds = task$bounds, + weights = task$weights, + shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL, + fits_m = Qn$fits, + fits_r = ratios$fits, + outcome_type = task$outcome_type, + seed = Qnb_eps$seed + ) + ) + return(ans) + } + ratios <- cf_r(task, learners_trt, mtp, control, pb) estims <- cf_tmle(task, "tmp_lmtp_scaled_outcome", @@ -486,7 +519,7 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens theta_sub( eta = list( - m = estims$m, + m = estims$ms, outcome_type = task$outcome_type, bounds = task$bounds, folds = task$folds, diff --git a/R/gcomp.R b/R/gcomp.R index ad0b718..a5feeb9 100644 --- a/R/gcomp.R +++ b/R/gcomp.R @@ -23,15 +23,15 @@ cf_sub <- function(task, outcome, learners, control, pb) { out <- future::value(out) list( - m = recombine_outcome(out, "m", task$folds), + ms = recombine_outcome(out, "ms", task$folds), + mn = recombine_outcome(out, "mn", task$folds), fits = lapply(out, function(x) x[["fits"]]) ) } estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk, tau, outcome_type, learners, control, pb) { - - m <- matrix(nrow = nrow(natural$valid), ncol = tau) + ms <- mn <- matrix(nrow = nrow(natural$valid), ncol = tau) fits <- vector("list", length = tau) for (t in tau:1) { @@ -79,13 +79,15 @@ estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk, under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t] natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, under_shift_train), 1e-05) - m[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05) + ms[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05) + mn[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05) natural$train[!rt, pseudo] <- 0 - m[!rv, t] <- 0 + ms[!rv, t] <- 0 + mn[!rv, t] <- 0 pb() } - list(m = m, fits = fits) + list(ms = ms, mn = mn, fits = fits) } diff --git a/R/lmtp_control.R b/R/lmtp_control.R index 4c8ad6b..86a3237 100644 --- a/R/lmtp_control.R +++ b/R/lmtp_control.R @@ -13,6 +13,8 @@ #' The number of cross-validation folds for \code{learners_trt}. #' @param .return_full_fits \[\code{logical(1)}\]\cr #' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param .B description +#' @param .boot_seed description #' #' @return A list of parameters controlling the estimation procedure. #' @export @@ -23,10 +25,14 @@ lmtp_control <- function(.bound = 1e5, .trim = 0.999, .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE) { + .return_full_fits = FALSE, + .B = 1000, + .boot_seed = NULL) { list(.bound = .bound, .trim = .trim, .learners_outcome_folds = .learners_outcome_folds, .learners_trt_folds = .learners_trt_folds, - .return_full_fits = .return_full_fits) + .return_full_fits = .return_full_fits, + .B = .B, + .boot_seed = .boot_seed) } diff --git a/R/lmtp_survival.R b/R/lmtp_survival.R new file mode 100644 index 0000000..afb484c --- /dev/null +++ b/R/lmtp_survival.R @@ -0,0 +1,142 @@ +#' LMTP Survival Curve Estimator +#' +#' Wrapper around \code{lmtp_tmle} and \code{lmtp_sdr} for survival outcomes to estimate the entire survival curve. +#' Estimates are reconstructed using isotonic regression to enforce monotonicity of the survival curve. +#' \bold{Confidence intervals correspond to marginal confidence intervals for the survival curve, not simultaneous intervals.} +#' +#' @param data \[\code{data.frame}\]\cr +#' A \code{data.frame} in wide format containing all necessary variables +#' for the estimation problem. Must not be a \code{data.table}. +#' @param trt \[\code{character}\] or \[\code{list}\]\cr +#' A vector containing the column names of treatment variables ordered by time. +#' Or, a list of vectors, the same length as the number of time points of observation. +#' Vectors should contain column names for the treatment variables at each time point. The list +#' should be ordered following the time ordering of the model. +#' @param outcomes \[\code{character}\]\cr +#' A vector containing the columns names of intermediate outcome variables and the final +#' outcome variable ordered by time. Only numeric values are allowed. Variables should be coded as 0 and 1. +#' @param baseline \[\code{character}\]\cr +#' An optional vector containing the column names of baseline covariates to be +#' included for adjustment at every time point. +#' @param time_vary \[\code{list}\]\cr +#' A list the same length as the number of time points of observation with +#' the column names for new time-varying covariates introduced at each time point. The list +#' should be ordered following the time ordering of the model. +#' @param cens \[\code{character}\]\cr +#' An optional vector of column names of censoring indicators the same +#' length as the number of time points of observation. If missingness in the outcome is +#' present or if time-to-event outcome, must be provided. +#' @param shift \[\code{closure}\]\cr +#' A two argument function that specifies how treatment variables should be shifted. +#' See examples for how to specify shift functions for continuous, binary, and categorical exposures. +#' @param shifted \[\code{data.frame}\]\cr +#' An optional data frame, the same as in \code{data}, but modified according +#' to the treatment policy of interest. If specified, \code{shift} is ignored. +#' @param estimator \[\code{character(1)}\]\cr +#' The estimator to use. Either \code{"lmtp_tmle"} or \code{"lmtp_sdr"}. +#' @param k \[\code{integer(1)}\]\cr +#' An integer specifying how previous time points should be +#' used for estimation at the given time point. Default is \code{Inf}, +#' all time points. +#' @param mtp \[\code{logical(1)}\]\cr +#' Is the intervention of interest a modified treatment policy? +#' Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}. +#' @param boot \[\code{logical(1)}\]\cr +#' Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard +#' errors will be calculated using the empirical variance of the efficient influence function. +#' Ignored if \code{estimator = "lmtp_sdr"}. +#' @param id \[\code{character(1)}\]\cr +#' An optional column name containing cluster level identifiers. +#' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation +#' of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM. +#' @param learners_trt \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation +#' of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. +#' \bold{Only include candidate learners capable of binary classification}. +#' @param folds \[\code{integer(1)}\]\cr +#' The number of folds to be used for cross-fitting. +#' @param weights \[\code{numeric(nrow(data))}\]\cr +#' An optional vector containing sampling weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. +#' +#' @return A list of class \code{lmtp_survival} containing \code{lmtp} objects for each time point. +#' +#' @example inst/examples/lmtp_survival-ex.R +#' @export +lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL, + cens = NULL, shift = NULL, shifted = NULL, + estimator = c("lmtp_tmle", "lmtp_sdr"), + k = Inf, + mtp = FALSE, + boot = FALSE, + id = NULL, + learners_outcome = "SL.glm", + learners_trt = "SL.glm", + folds = 10, + weights = NULL, + control = lmtp_control()) { + + checkmate::assertCharacter(outcomes, min.len = 2, null.ok = FALSE, unique = TRUE, any.missing = FALSE) + + estimator <- match.arg(estimator) + tau <- length(outcomes) + estimates <- vector("list", tau) + + args <- list( + data = data, + baseline = baseline, + shift = shift, + shifted = shifted, + k = k, + mtp = mtp, + id = id, + learners_outcome = learners_outcome, + learners_trt = learners_trt, + folds = folds, + weights = weights, + control = control + ) + + if (length(trt) == 1) args$trt <- trt + if (length(time_vary) == 1) args$time_vary <- time_vary + + if (estimator == "lmtp_tmle") { + args$boot <- boot + expr <- expression(do.call(lmtp_tmle, args)) + } else { + expr <- expression(do.call(lmtp_sdr, args)) + } + + t <- 1 + cli::cli_progress_step("Working on time {t}/{tau}...") + for (t in 1:tau) { + if (length(trt) > 1) args$trt <- trt[1:t] + if (length(args$time_vary) > 1) args$time_vary <- time_vary[1:t] + args$outcome <- outcomes[1:t] + args$cens <- cens[1:t] + args$outcome_type <- ifelse(t == 1, "binomial", "survival") + + estimates[[t]] <- future::future(eval(expr), seed = TRUE) + cli::cli_progress_update() + } + + cli::cli_progress_done() + estimates <- future::value(estimates) + estimates <- fix_surv_time1(estimates) + estimates <- isotonic_projection(estimates) + + class(estimates) <- "lmtp_survival" + estimates +} + +isotonic_projection <- function(x, alpha = 0.05) { + cv <- abs(qnorm(p = alpha / 2)) + estim <- tidy.lmtp_survival(x) + iso_fit <- isotone::gpava(1:length(x), 1 - estim$estimate) + for (i in seq_along(x)) { + x[[i]]$theta <- (1 - iso_fit$x[i]) + x[[i]]$low <- x[[i]]$theta - (qnorm(0.975) * x[[i]]$standard_error) + x[[i]]$high <- x[[i]]$theta + (qnorm(0.975) * x[[i]]$standard_error) + } + x +} diff --git a/R/print.R b/R/print.R index 0fac95f..7f931f1 100644 --- a/R/print.R +++ b/R/print.R @@ -21,3 +21,8 @@ print.lmtp_contrast <- function(x, ...) { x$vals$p.value <- format.pval(x$vals$p.value, digits = 3, eps = 0.001) print(format(x$vals, digits = 3)) } + +#' @export +print.lmtp_survival <- function(x, ...) { + print(as.data.frame(tidy.lmtp_survival(x))) +} diff --git a/R/theta.R b/R/theta.R index 7df2f8f..6c38fbc 100644 --- a/R/theta.R +++ b/R/theta.R @@ -112,3 +112,41 @@ theta_dr <- function(eta, augmented = FALSE) { class(out) <- "lmtp" out } + +# TODO: NEED TO SAVE THE SEED FOR THE REPLICATES AND THE BOOTED ESTIMATES FOR ESTIMATNG CONTRASTS +theta_boot <- function(eta) { + theta <- weighted.mean(eta$m[, 1], eta$weights) + + if (eta$outcome_type == "continuous") { + theta <- rescale_y_continuous(theta, eta$bounds) + } + + # TODO: NEED TO FIGURE OUT HOW THIS WOULD WORK WITH CLUSTERING + se <- sqrt(var(eta$boots)) + ci_low <- theta - (qnorm(0.975) * se) + ci_high <- theta + (qnorm(0.975) * se) + + out <- list( + estimator = eta$estimator, + theta = theta, + standard_error = se, + low = ci_low, + high = ci_high, + boots = eta$boots, + id = eta$id, + shift = eta$shift, + outcome_reg = switch( + eta$outcome_type, + continuous = rescale_y_continuous(eta$m, eta$bounds), + binomial = eta$m + ), + density_ratios = eta$r, + fits_m = eta$fits_m, + fits_r = eta$fits_r, + outcome_type = eta$outcome_type, + seed = eta$seed + ) + + class(out) <- "lmtp" + out +} diff --git a/R/tidy.R b/R/tidy.R index 8ff520b..12f0e96 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -28,3 +28,17 @@ tidy.lmtp <- function(x, ...) { class(out) <- c("tbl_df", "tbl", "data.frame") out } + +#' Tidy a(n) lmtp_survival object +#' +#' @param x A `lmtp_survival` object produced by a call to [lmtp::lmtp_survival()]. +#' @param ... Unused, included for generic consistency only. +#' +#' @example inst/examples/lmtp_survival-ex.R +#' +#' @export +tidy.lmtp_survival <- function(x, ...) { + out <- do.call("rbind", lapply(x, tidy)) + out$t <- 1:length(x) + out[, c(ncol(out), 1:ncol(out) - 1)] +} diff --git a/R/tmle2.R b/R/tmle2.R new file mode 100644 index 0000000..c4c81e5 --- /dev/null +++ b/R/tmle2.R @@ -0,0 +1,71 @@ +cf_tmle2 <- function(task, ratios, m_init, control) { + ratios <- matrix(t(apply(ratios, 1, cumprod)), + nrow = nrow(ratios), + ncol = ncol(ratios)) + + psi <- estimate_tmle2(task$natural, + task$cens, + task$risk, + task$tau, + m_init$mn, + m_init$ms, + ratios, + task$weights) + + if (!is.null(control$.boot_seed)) { + seed <- control$.boot_seed + } else { + seed <- .Random.seed[1] + } + + set.seed(seed) + boots <- replicate(control$.B, + sample(1:nrow(task$natural), nrow(task$natural), replace = TRUE), + simplify = FALSE) + + Qnb <- vector("list", control$.B) + for (i in seq_along(boots)) { + ib <- boots[[i]] + Qnb[[i]] <- future::future({ + psis <- estimate_tmle2(task$natural[ib, , drop = FALSE], + task$cens, + task$risk, + task$tau, + m_init$mn[ib, , drop = FALSE], + m_init$ms[ib, , drop = FALSE], + ratios[ib, , drop = FALSE], + task$weights[ib]) + weighted.mean(psis[, 1], task$weights[ib]) + }, + seed = TRUE) + } + + list(psi = psi, booted = unlist(future::value(Qnb)), seed = seed) +} + +estimate_tmle2 <- function(data, cens, risk, tau, mn, ms, ratios, weights) { + m_eps <- matrix(nrow = nrow(data), ncol = tau + 1) + m_eps[, tau + 1] <- data$tmp_lmtp_scaled_outcome + + fits <- vector("list", length = tau) + for (t in tau:1) { + i <- censored(data, cens, t)$i + j <- censored(data, cens, t)$j + r <- at_risk(data, risk, t) + + wts <- ratios[i & r, t] * weights[i & r] + + fit <- sw( + glm( + m_eps[i & r, t + 1] ~ offset(qlogis(mn[i & r, t])), + weights = wts, + family = "binomial" + ) + ) + + m_eps[j & r, t] <- bound(plogis(qlogis(ms[j & r, t]) + coef(fit))) + m_eps[!r, t] <- 0 + } + + m_eps +} diff --git a/R/utils.R b/R/utils.R index 92becb1..a2889b8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -227,3 +227,18 @@ is_normalized <- function(x, tolerance = .Machine$double.eps^0.5) { # Check if the mean is approximately 1 within the given tolerance abs(mean(x) - 1) < tolerance } + +fix_surv_time1 <- function(x) { + x[[1]]$theta <- 1 - x[[1]]$theta + high <- x[[1]]$high + low <- x[[1]]$low + x[[1]]$high <- 1 - low + x[[1]]$low <- 1 - high + x[[1]]$eif <- 1 - x[[1]]$eif + x +} + +is_decimal <- function(x) { + test <- floor(x) + !(x == test) +} diff --git a/inst/examples/lmtp_survival-ex.R b/inst/examples/lmtp_survival-ex.R new file mode 100644 index 0000000..8a01cd7 --- /dev/null +++ b/inst/examples/lmtp_survival-ex.R @@ -0,0 +1,14 @@ +\donttest{ +# Time-to-event analysis with a binary time-invariant exposure. Interested in +# the effect of treatment being given to all observations on the cumulative +# incidence of the outcome. +A <- "trt" +Y <- paste0("Y.", 1:6) +C <- paste0("C.", 0:5) +W <- c("W1", "W2") + +curve <- lmtp_survival(sim_point_surv, A, Y, W, cens = C, folds = 1, + shift = static_binary_on, estimator = "lmtp_tmle") + +tidy(curve) +} diff --git a/man/lmtp_control.Rd b/man/lmtp_control.Rd index 2e1d10a..8172476 100644 --- a/man/lmtp_control.Rd +++ b/man/lmtp_control.Rd @@ -9,7 +9,9 @@ lmtp_control( .trim = 0.999, .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE + .return_full_fits = FALSE, + .B = 1000, + .boot_seed = NULL ) } \arguments{ @@ -30,6 +32,10 @@ The number of cross-validation folds for \code{learners_trt}.} \item{.return_full_fits}{[\code{logical(1)}]\cr Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} + +\item{.B}{description} + +\item{.boot_seed}{description} } \value{ A list of parameters controlling the estimation procedure. diff --git a/man/lmtp_survival.Rd b/man/lmtp_survival.Rd new file mode 100644 index 0000000..5baf4e0 --- /dev/null +++ b/man/lmtp_survival.Rd @@ -0,0 +1,124 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lmtp_survival.R +\name{lmtp_survival} +\alias{lmtp_survival} +\title{LMTP Survival Curve Estimator} +\usage{ +lmtp_survival( + data, + trt, + outcomes, + baseline = NULL, + time_vary = NULL, + cens = NULL, + shift = NULL, + shifted = NULL, + estimator = c("lmtp_tmle", "lmtp_sdr"), + k = Inf, + mtp = FALSE, + boot = FALSE, + id = NULL, + learners_outcome = "SL.glm", + learners_trt = "SL.glm", + folds = 10, + weights = NULL, + control = lmtp_control() +) +} +\arguments{ +\item{data}{[\code{data.frame}]\cr +A \code{data.frame} in wide format containing all necessary variables +for the estimation problem. Must not be a \code{data.table}.} + +\item{trt}{[\code{character}] or [\code{list}]\cr +A vector containing the column names of treatment variables ordered by time. +Or, a list of vectors, the same length as the number of time points of observation. +Vectors should contain column names for the treatment variables at each time point. The list +should be ordered following the time ordering of the model.} + +\item{outcomes}{[\code{character}]\cr +A vector containing the columns names of intermediate outcome variables and the final +outcome variable ordered by time. Only numeric values are allowed. Variables should be coded as 0 and 1.} + +\item{baseline}{[\code{character}]\cr +An optional vector containing the column names of baseline covariates to be +included for adjustment at every time point.} + +\item{time_vary}{[\code{list}]\cr +A list the same length as the number of time points of observation with +the column names for new time-varying covariates introduced at each time point. The list +should be ordered following the time ordering of the model.} + +\item{cens}{[\code{character}]\cr +An optional vector of column names of censoring indicators the same +length as the number of time points of observation. If missingness in the outcome is +present or if time-to-event outcome, must be provided.} + +\item{shift}{[\code{closure}]\cr +A two argument function that specifies how treatment variables should be shifted. +See examples for how to specify shift functions for continuous, binary, and categorical exposures.} + +\item{shifted}{[\code{data.frame}]\cr +An optional data frame, the same as in \code{data}, but modified according +to the treatment policy of interest. If specified, \code{shift} is ignored.} + +\item{estimator}{[\code{character(1)}]\cr +The estimator to use. Either \code{"lmtp_tmle"} or \code{"lmtp_sdr"}.} + +\item{k}{[\code{integer(1)}]\cr +An integer specifying how previous time points should be +used for estimation at the given time point. Default is \code{Inf}, +all time points.} + +\item{mtp}{[\code{logical(1)}]\cr +Is the intervention of interest a modified treatment policy? +Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.} + +\item{boot}{[\code{logical(1)}]\cr +Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard +errors will be calculated using the empirical variance of the efficient influence function. +Ignored if \code{estimator = "lmtp_sdr"}.} + +\item{id}{[\code{character(1)}]\cr +An optional column name containing cluster level identifiers.} + +\item{learners_outcome}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation +of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.} + +\item{learners_trt}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation +of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. +\bold{Only include candidate learners capable of binary classification}.} + +\item{folds}{[\code{integer(1)}]\cr +The number of folds to be used for cross-fitting.} + +\item{weights}{[\code{numeric(nrow(data))}]\cr +An optional vector containing sampling weights.} + +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} +} +\value{ +A list of class \code{lmtp_survival} containing \code{lmtp} objects for each time point. +} +\description{ +Wrapper around \code{lmtp_tmle} and \code{lmtp_sdr} for survival outcomes to estimate the entire survival curve. +Estimates are reconstructed using isotonic regression to enforce monotonicity of the survival curve. +\bold{Confidence intervals correspond to marginal confidence intervals for the survival curve, not simultaneous intervals.} +} +\examples{ +\donttest{ +# Time-to-event analysis with a binary time-invariant exposure. Interested in +# the effect of treatment being given to all observations on the cumulative +# incidence of the outcome. +A <- "trt" +Y <- paste0("Y.", 1:6) +C <- paste0("C.", 0:5) +W <- c("W1", "W2") + +curve <- lmtp_survival(sim_point_surv, A, Y, W, cens = C, folds = 1, + shift = static_binary_on, estimator = "lmtp_tmle") + +tidy(curve) +} +} diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd index f790bab..e8dcc50 100644 --- a/man/lmtp_tmle.Rd +++ b/man/lmtp_tmle.Rd @@ -15,6 +15,7 @@ lmtp_tmle( shifted = NULL, k = Inf, mtp = FALSE, + boot = FALSE, outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, @@ -73,6 +74,10 @@ all time points.} Is the intervention of interest a modified treatment policy? Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.} +\item{boot}{[\code{logical(1)}]\cr +Compute standard errors using the bootstrap? Default is \code{FALSE}. If \code{FALSE}, standard +errors will be calculated using the empirical variance of the efficient influence function.} + \item{outcome_type}{[\code{character(1)}]\cr Outcome variable type (i.e., continuous, binomial, survival).} @@ -108,7 +113,8 @@ A list of class \code{lmtp} containing the following components: \item{standard_error}{The estimated standard error of the LMTP effect.} \item{low}{Lower bound of the 95\% confidence interval of the LMTP effect.} \item{high}{Upper bound of the 95\% confidence interval of the LMTP effect.} -\item{eif}{The estimated, un-centered, influence function of the estimate.} +\item{eif}{The estimated, un-centered, influence function of the estimate, +\code{NULL} if \code{boot = TRUE}.} \item{shift}{The shift function specifying the treatment policy of interest.} \item{outcome_reg}{An n x Tau + 1 matrix of outcome regression predictions. The mean of the first column is used for calculating theta.} diff --git a/man/tidy.lmtp_survival.Rd b/man/tidy.lmtp_survival.Rd new file mode 100644 index 0000000..a356f0c --- /dev/null +++ b/man/tidy.lmtp_survival.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy.R +\name{tidy.lmtp_survival} +\alias{tidy.lmtp_survival} +\title{Tidy a(n) lmtp_survival object} +\usage{ +\method{tidy}{lmtp_survival}(x, ...) +} +\arguments{ +\item{x}{A \code{lmtp_survival} object produced by a call to \code{\link[=lmtp_survival]{lmtp_survival()}}.} + +\item{...}{Unused, included for generic consistency only.} +} +\description{ +Tidy a(n) lmtp_survival object +} +\examples{ +\donttest{ +# Time-to-event analysis with a binary time-invariant exposure. Interested in +# the effect of treatment being given to all observations on the cumulative +# incidence of the outcome. +A <- "trt" +Y <- paste0("Y.", 1:6) +C <- paste0("C.", 0:5) +W <- c("W1", "W2") + +curve <- lmtp_survival(sim_point_surv, A, Y, W, cens = C, folds = 1, + shift = static_binary_on, estimator = "lmtp_tmle") + +tidy(curve) +} +} diff --git a/tests/testthat/test-censoring.R b/tests/testthat/test-censoring.R index c7b2ae1..3b1547d 100644 --- a/tests/testthat/test-censoring.R +++ b/tests/testthat/test-censoring.R @@ -7,7 +7,7 @@ truth <- 0.8 sub <- sw(lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) ipw <- sw(lmtp_ipw(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) -tmle <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) +tmle <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, boot = F, folds = 1)) sdr <- sw(lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) # tests diff --git a/tests/testthat/test-dynamic.R b/tests/testthat/test-dynamic.R index 94bf0c2..27e7e51 100644 --- a/tests/testthat/test-dynamic.R +++ b/tests/testthat/test-dynamic.R @@ -32,18 +32,18 @@ cens <- c("C1", "C2") nodes <- list(c(NULL), c("L")) # truth = 0.308 -tml.stc <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = static_binary_on, folds = 1)) +tml.stc <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = static_binary_on, folds = 1, boot = F)) # truth = 0.528 sdr.stc <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = static_binary_off, folds = 1)) # truth = 0.433 -tml.tv <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1)) +tml.tv <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1, boot = F)) sdr.tv <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = time_vary_on, folds = 1)) # time varying and covariate dynamic # truth = 0.345 -tml.dyn <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1)) +tml.dyn <- sw(lmtp_tmle(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1, boot = F)) sdr.dyn <- sw(lmtp_sdr(sim, a, "Y", baseline, nodes, cens, shift = dynamic_vec, folds = 1)) test_that("Dynamic intervention fidelity", { diff --git a/tests/testthat/test-point_treatment.R b/tests/testthat/test-point_treatment.R index 638e103..5f2b8d3 100644 --- a/tests/testthat/test-point_treatment.R +++ b/tests/testthat/test-point_treatment.R @@ -15,7 +15,7 @@ truth <- mean(tmp$Y.1) sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) -tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) +tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1, boot = F) sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) # tests diff --git a/tests/testthat/test-shifted.R b/tests/testthat/test-shifted.R index b85f90b..161af27 100644 --- a/tests/testthat/test-shifted.R +++ b/tests/testthat/test-shifted.R @@ -20,7 +20,7 @@ ipw <- tmle <- sw(lmtp_tmle(sim_cens, a, "Y", nodes, baseline = NULL, cens, k = 0, shifted = sc, - outcome_type = "binomial", folds = 2, mtp = TRUE)) + outcome_type = "binomial", folds = 2, mtp = TRUE, boot = F)) sdr <- sw(lmtp_sdr(sim_cens, a, "Y", nodes, baseline = NULL, diff --git a/tests/testthat/test-survey.R b/tests/testthat/test-survey.R index 2cdd6d5..b8021bf 100644 --- a/tests/testthat/test-survey.R +++ b/tests/testthat/test-survey.R @@ -25,7 +25,7 @@ ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_o weights = wts, folds = 2) tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, - weights = wts, folds = 2) + weights = wts, folds = 2, boot = F) sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, weights = wts, folds = 2) diff --git a/tests/testthat/test-time_varying_treatment.R b/tests/testthat/test-time_varying_treatment.R index 7e7cd1e..4b289e0 100644 --- a/tests/testthat/test-time_varying_treatment.R +++ b/tests/testthat/test-time_varying_treatment.R @@ -25,7 +25,7 @@ truth <- 0.305 sub <- sw(lmtp_sub(tmp, a, "Y", time_vary = time_varying, shift = d, folds = 1)) ipw <- sw(lmtp_ipw(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) -tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) +tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1, boot = F)) sdr <- sw(lmtp_sdr(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) test_that("time varying treatment fidelity, t = 4", {