From ded6a9b2f820f3650cccfdd05c7943045e0e83fa Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 13:17:32 +0100 Subject: [PATCH 01/19] Draft new `data_summary()` function --- NAMESPACE | 5 ++ R/data_summary.R | 207 ++++++++++++++++++++++++++++++++++++++++++++ man/data_summary.Rd | 51 +++++++++++ 3 files changed, 263 insertions(+) create mode 100644 R/data_summary.R create mode 100644 man/data_summary.Rd diff --git a/NAMESPACE b/NAMESPACE index e89863807..4e924d76b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,10 @@ S3method(data_modify,data.frame) S3method(data_modify,default) S3method(data_modify,grouped_df) S3method(data_peek,data.frame) +S3method(data_summary,data.frame) +S3method(data_summary,default) +S3method(data_summary,grouped_df) +S3method(data_summary,matrix) S3method(data_tabulate,data.frame) S3method(data_tabulate,default) S3method(data_tabulate,grouped_df) @@ -249,6 +253,7 @@ export(data_rotate) export(data_seek) export(data_select) export(data_separate) +export(data_summary) export(data_tabulate) export(data_to_long) export(data_to_wide) diff --git a/R/data_summary.R b/R/data_summary.R new file mode 100644 index 000000000..ae37c2707 --- /dev/null +++ b/R/data_summary.R @@ -0,0 +1,207 @@ +#' @title Summarize data +#' @name data_summary +#' +#' @description This function can be used to compute summary statistics for a +#' data frame or a matrix. +#' +#' @param x A (grouped) data frame. +#' @param by Optional character string, indicating the name of a variable in `x`. +#' If supplied, the data will be split by this variable and summary statistics +#' will be computed for each group. +#' @param ... One or more named expressions that define the new variable name +#' and the function to compute the summary statistic. Example: +#' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided +#' as a character string, e.g. `"mean_sepal_width = mean(Sepal.Width)"`. +#' +#' @return A data frame with the requested summary statistics. +#' +#' @examples +#' data(iris) +#' data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) +#' data_summary( +#' iris, +#' MW = mean(Sepal.Width), +#' SD = sd(Sepal.Width), +#' by = "Species" +#' ) +#' +#' # same as +#' d <- data_group(iris, "Species") +#' data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) +#' +#' # multiple groups +#' data(mtcars) +#' data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) +#' +#' # expressions can also be supplied as character strings +#' data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear")) +#' @export +data_summary <- function(x, ...) { + UseMethod("data_summary") +} + + +#' @export +data_summary.matrix <- function(x, ..., by = NULL) { + data_summary(as.data.frame(x), ..., by = by) +} + + +#' @export +data_summary.default <- function(x, ...) { + insight::format_error("`data_summary()` only works for (grouped) data frames and matrices.") +} + + +#' @rdname data_summary +#' @export +data_summary.data.frame <- function(x, ..., by = NULL) { + dots <- eval(substitute(alist(...))) + + if (is.null(by)) { + # when we have no grouping, just compute a one-row summary + summarise <- .process_datasummary_dots(dots, x) + out <- data.frame(summarise) + colnames(out) <- vapply(summarise, names, character(1)) + } else { + # split data + splitted_data <- split(x, x[by]) + out <- lapply(splitted_data, function(s) { + # no data for combination? Return NULL + if (nrow(s) == 0) { + return(NULL) + } + # summarize data + summarise <- .process_datasummary_dots(dots, s) + # coerce to data frame + summarised_data <- data.frame(summarise) + # bind grouping-variables and values + summarised_data <- cbind(s[1, by], summarised_data) + # make sure we have proper column names + colnames(summarised_data) <- c(by, vapply(summarise, names, character(1))) + summarised_data + }) + out <- do.call(rbind, out) + } + class(out) <- "data.frame" + rownames(out) <- NULL + out +} + + +#' @export +data_summary.grouped_df <- function(x, ..., by = NULL) { + # extract group variables + grps <- attr(x, "groups", exact = TRUE) + group_variables <- data_remove(grps, ".rows") + # if "by" is not supplied, use group variables + if (is.null(by)) { + by <- colnames_to_row(group_variables) + } + attr(x, "groups") <- NULL + data_summary(x, ..., by = by) +} + + +# helper ----------------------------------------------------------------------- + +.process_datasummary_dots <- function(dots, data) { + out <- NULL + if (length(dots)) { + # we check for character vector of expressions, in which case + # "dots" should be unnamed + if (is.null(names(dots))) { + # if we have multiple strings, concatenate them to a character vector + # and put it into a list... + if (length(dots) > 1) { + if (all(vapply(dots, is.character, logical(1)))) { + dots <- list(unlist(dots)) + } else { + insight::format_error("You cannot mix string and literal representation of expressions.") + } + } + # expression is given as character string, e.g. + # a <- "double_SepWidth = 2 * Sepal.Width" + # data_modify(iris, a) + # or as character vector, e.g. + # data_modify(iris, c("var_a = Sepal.Width / 10", "var_b = Sepal.Width * 10")) + character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) NULL) + # do we have a character vector? Then we can proceed + if (is.character(character_symbol)) { + dots <- lapply(character_symbol, function(s) { + # turn value from character vector into expression + str2lang(.dynEval(s)) + }) + names(dots) <- vapply(dots, function(n) insight::safe_deparse(n[[2]]), character(1)) + } + } + + out <- lapply(seq_along(dots), function(i) { + new_variable <- .get_new_dots_variable(dots, i, data) + stats::setNames(new_variable, names(dots)[i]) + }) + } + + out +} + + +.get_new_dots_variable <- function(dots, i, data) { + # iterate expressions for new variables + symbol <- dots[[i]] + + # expression is given as character string in a variable, but named, e.g. + # a <- "2 * Sepal.Width" + # data_modify(iris, double_SepWidth = a) + # we reconstruct the symbol as if it were provided as literal expression. + # However, we need to check that we don't have a character vector, + # like: data_modify(iris, new_var = "a") + # this one should be recycled instead. + if (!is.character(symbol)) { + eval_symbol <- .dynEval(symbol, ifnotfound = NULL) + if (is.character(eval_symbol)) { + symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE) + # we may have the edge-case of having a function that returns a character + # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol" + # is of type character, but no symbol, thus str2lang() above creates a + # wrong pattern. We then take "eval_symbol" as character input. + if (inherits(symbol, "try-error")) { + symbol <- str2lang(paste0( + names(dots)[i], + " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")" + )) + } + } + } + + # finally, we can evaluate expression and get values for new variables + new_variable <- try(with(data, eval(symbol)), silent = TRUE) + + # successful, or any errors, like misspelled variable name? + if (inherits(new_variable, "try-error")) { + # in which step did error happen? + step_number <- switch(as.character(i), + "1" = "the first expression", + "2" = "the second expression", + "3" = "the third expression", + paste("expression", i) + ) + step_msg <- paste0("There was an error in ", step_number, ".") + # try to find out which variable was the cause for the error + error_msg <- attributes(new_variable)$condition$message + if (grepl("object '(.*)' not found", error_msg)) { + error_var <- gsub("object '(.*)' not found", "\\1", error_msg) + insight::format_error( + paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."), + .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?") + ) + } else { + insight::format_error(paste0( + step_msg, " ", insight::format_capitalize(error_msg), + ". Possibly misspelled or not yet defined?" + )) + } + } + + new_variable +} diff --git a/man/data_summary.Rd b/man/data_summary.Rd new file mode 100644 index 000000000..6c0e89744 --- /dev/null +++ b/man/data_summary.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_summary.R +\name{data_summary} +\alias{data_summary} +\alias{data_summary.data.frame} +\title{Summarize data} +\usage{ +data_summary(x, ...) + +\method{data_summary}{data.frame}(x, ..., by = NULL) +} +\arguments{ +\item{x}{A (grouped) data frame.} + +\item{...}{One or more named expressions that define the new variable name +and the function to compute the summary statistic. Example: +\code{mean_sepal_width = mean(Sepal.Width)}. The expression can also be provided +as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}.} + +\item{by}{Optional character string, indicating the name of a variable in \code{x}. +If supplied, the data will be split by this variable and summary statistics +will be computed for each group.} +} +\value{ +A data frame with the requested summary statistics. +} +\description{ +This function can be used to compute summary statistics for a +data frame or a matrix. +} +\examples{ +data(iris) +data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) +data_summary( + iris, + MW = mean(Sepal.Width), + SD = sd(Sepal.Width), + by = "Species" +) + +# same as +d <- data_group(iris, "Species") +data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) + +# multiple groups +data(mtcars) +data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) + +# expressions can also be supplied as character strings +data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear")) +} From 872c6d000bc0945d08bad7309ea6ecbda39bb5bc Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 13:19:48 +0100 Subject: [PATCH 02/19] check if we can avoid duplicated code --- R/data_modify.R | 58 ++----------------------------------------------- 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index 40a186736..c3cb0f521 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -177,62 +177,8 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = } for (i in seq_along(dots)) { - # iterate expressions for new variables - symbol <- dots[[i]] - - # expression is given as character string in a variable, but named, e.g. - # a <- "2 * Sepal.Width" - # data_modify(iris, double_SepWidth = a) - # we reconstruct the symbol as if it were provided as literal expression. - # However, we need to check that we don't have a character vector, - # like: data_modify(iris, new_var = "a") - # this one should be recycled instead. - if (!is.character(symbol)) { - eval_symbol <- .dynEval(symbol, ifnotfound = NULL) - if (is.character(eval_symbol)) { - symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE) - # we may have the edge-case of having a function that returns a character - # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol" - # is of type character, but no symbol, thus str2lang() above creates a - # wrong pattern. We then take "eval_symbol" as character input. - if (inherits(symbol, "try-error")) { - symbol <- str2lang(paste0( - names(dots)[i], - " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")" - )) - } - } - } - - # finally, we can evaluate expression and get values for new variables - new_variable <- try(with(data, eval(symbol)), silent = TRUE) - - # successful, or any errors, like misspelled variable name? - if (inherits(new_variable, "try-error")) { - # in which step did error happen? - step_number <- switch(as.character(i), - "1" = "the first expression", - "2" = "the second expression", - "3" = "the third expression", - paste("expression", i) - ) - step_msg <- paste0("There was an error in ", step_number, ".") - # try to find out which variable was the cause for the error - error_msg <- attributes(new_variable)$condition$message - if (grepl("object '(.*)' not found", error_msg)) { - error_var <- gsub("object '(.*)' not found", "\\1", error_msg) - insight::format_error( - paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."), - .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?") - ) - } else { - insight::format_error(paste0( - step_msg, " ", insight::format_capitalize(error_msg), - ". Possibly misspelled or not yet defined?" - )) - } - } - + # create new variable + new_variable <- .get_new_dots_variable(dots, i, data) # give informative error when new variable doesn't match number of rows if (!is.null(new_variable) && length(new_variable) != nrow(data) && (nrow(data) %% length(new_variable)) != 0) { insight::format_error( From af2ce1830c6d7ac5b09989229a1d327638cc755d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 13:20:17 +0100 Subject: [PATCH 03/19] pkgdown --- _pkgdown.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 2adc0768c..0062b8a5a 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -61,6 +61,7 @@ reference: Functions to compute statistical summaries of data properties and distributions contents: - data_codebook + - data_summary - data_tabulate - data_peek - data_seek From 123bb562a56002a63e338d0aa61ec6e232989842 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 13:22:32 +0100 Subject: [PATCH 04/19] fix --- R/data_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_summary.R b/R/data_summary.R index ae37c2707..4f80bd2cd 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -96,7 +96,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL) { group_variables <- data_remove(grps, ".rows") # if "by" is not supplied, use group variables if (is.null(by)) { - by <- colnames_to_row(group_variables) + by <- colnames(group_variables) } attr(x, "groups") <- NULL data_summary(x, ..., by = by) From 2876044cb6d54fa96ec51d798104c406d69c2af2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 13:25:16 +0100 Subject: [PATCH 05/19] fixes --- R/data_summary.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/data_summary.R b/R/data_summary.R index 4f80bd2cd..af1ec65a4 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -98,7 +98,9 @@ data_summary.grouped_df <- function(x, ..., by = NULL) { if (is.null(by)) { by <- colnames(group_variables) } + # remove information specific to grouped df's attr(x, "groups") <- NULL + class(x) <- "data.frame" data_summary(x, ..., by = by) } From f672e679e6efddf9726ec6344562cbd37dbddef4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 18:00:20 +0100 Subject: [PATCH 06/19] lintr --- tests/testthat/test-data_reorder.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-data_reorder.R b/tests/testthat/test-data_reorder.R index 34a8f5420..463f36040 100644 --- a/tests/testthat/test-data_reorder.R +++ b/tests/testthat/test-data_reorder.R @@ -1,11 +1,11 @@ test_that("data_reorder works as expected", { - expect_equal( - names(data_reorder(iris, c("Species", "Sepal.Length"))), + expect_named( + data_reorder(iris, c("Species", "Sepal.Length")), c("Species", "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) - expect_warning(expect_equal( - names(data_reorder(iris, c("Species", "dupa"))), + expect_warning(expect_named( + data_reorder(iris, c("Species", "dupa")), c("Species", "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )) }) @@ -24,5 +24,6 @@ test_that("data_reorder preserves attributes", { a2 <- attributes(out2) # attributes may not be in the same order - expect_true(all(names(a1) %in% names(a2)) && length(a1) == length(a2)) + expect_true(all(names(a1) %in% names(a2))) + expect_length(a1, length(a2)) }) From 3ebab822af97d5da64cdc10ced4e870fec2bb546 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 18:22:48 +0100 Subject: [PATCH 07/19] add tests --- R/data_summary.R | 12 +++ tests/testthat/test-data_summary.R | 134 +++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 tests/testthat/test-data_summary.R diff --git a/R/data_summary.R b/R/data_summary.R index af1ec65a4..5e3627c54 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -64,6 +64,18 @@ data_summary.data.frame <- function(x, ..., by = NULL) { out <- data.frame(summarise) colnames(out) <- vapply(summarise, names, character(1)) } else { + # sanity check - is "by" a character string? + if (!is.character(by)) { + insight::format_error("Argument `by` must be a character string, indicating the name of a variable in the data.") + } + # is "by" in the data? + if (!all(by %in% colnames(x))) { + by_not_found <- by[!by %in% colnames(x)] + insight::format_error( + paste0("Variable \"", by_not_found, "\" not found in the data."), + .misspelled_string(colnames(x), by_not_found, "Possibly misspelled?") + ) + } # split data splitted_data <- split(x, x[by]) out <- lapply(splitted_data, function(s) { diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R new file mode 100644 index 000000000..5d48ee84d --- /dev/null +++ b/tests/testthat/test-data_summary.R @@ -0,0 +1,134 @@ +test_that("data_summary, single row summary", { + data(iris) + out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) + expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4) + expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4) +}) + + +test_that("data_summary, single row summary, string expression", { + data(iris) + out <- data_summary(iris, "MW = mean(Sepal.Width)", "SD = sd(Sepal.Width)") + expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4) + expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4) +}) + + +test_that("data_summary, summary for groups", { + data(iris) + out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Species") + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, summary for groups, string expression", { + data(iris) + out <- data_summary( + iris, + "MW = mean(Sepal.Width)", + "SD = sd(Sepal.Width)", + by = "Species" + ) + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, grouped data frames", { + data(iris) + d <- data_group(iris, "Species") + out <- data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) + # "by" overrides groups + data(mtcars) + d <- data_group(mtcars, "gear") + out <- data_summary(d, MW = mean(mpg), SD = sd(mpg), by = "am") + expect_identical( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am), mean)$mpg + ) +}) + + +test_that("data_summary, summary for multiple groups", { + data(mtcars) + out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) + expect_equal( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, + tolerance = 1e-4 + ) + x <- data_group(mtcars, c("am", "gear")) + out <- data_summary(x, MW = mean(mpg), SD = sd(mpg)) + expect_equal( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, errors", { + data(iris) + data(mtcars) + # "by" must be character + expect_error( + data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = 5), + regex = "Argument `by` must be a character string" + ) + # "by" must be in data + expect_error( + data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Speceis"), + regex = "Variable \"Speceis\" not" + ) + # by for multiple variables + expect_error( + data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "gear")), + regex = "Variable \"bam\" not" + ) + expect_error( + data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "geas")), + regex = "Did you mean one of \"am\" or \"gear\"?" + ) + # not a data frame + expect_error( + data_summary(iris$Sepal.Width, MW = mean(Sepal.Width), SD = sd(Sepal.Width)), + regex = "only works for (grouped) data frames" + ) +}) From f0f832d088a50ec971842a6fabd26d5899302da9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 18:23:54 +0100 Subject: [PATCH 08/19] code style --- R/data_modify.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/data_modify.R b/R/data_modify.R index c3cb0f521..4621590c8 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -185,7 +185,6 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = "New variable has not the same length as the other variables in the data frame and cannot be recycled." ) } - data[[names(dots)[i]]] <- new_variable } } From 9faba9f5220780d98d2f5e46f7b7deb38eeedafb Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 18:24:41 +0100 Subject: [PATCH 09/19] desc, news --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 048d15065..eeee80702 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.9.1.4 +Version: 0.9.1.5 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 968c9d417..8b6ba88da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # datawizard 0.9.2 +NEW FUNCTIONS + +* `data_summary()`, to compute summary statistics of (grouped) data frames. + CHANGES * `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify From 6abbf8ae321b80bc89f0b42ece1f944395949d91 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 18:45:27 +0100 Subject: [PATCH 10/19] fix --- tests/testthat/test-data_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 5d48ee84d..f94811a85 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -129,6 +129,6 @@ test_that("data_summary, errors", { # not a data frame expect_error( data_summary(iris$Sepal.Width, MW = mean(Sepal.Width), SD = sd(Sepal.Width)), - regex = "only works for (grouped) data frames" + regex = "only works for" ) }) From 30cb27069625c4cb3682a8425fc1fa29f64ffe60 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 19:52:17 +0100 Subject: [PATCH 11/19] add print method and snapshot test --- NAMESPACE | 1 + R/data_modify.R | 60 +++++++++++++++++++++++++ R/data_summary.R | 65 ++++----------------------- tests/testthat/_snaps/data_summary.md | 12 +++++ tests/testthat/test-data_summary.R | 7 +++ 5 files changed, 88 insertions(+), 57 deletions(-) create mode 100644 tests/testthat/_snaps/data_summary.md diff --git a/NAMESPACE b/NAMESPACE index 4e924d76b..d10d1884b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,6 +94,7 @@ S3method(plot,visualisation_recipe) S3method(print,data_codebook) S3method(print,data_seek) S3method(print,dw_data_peek) +S3method(print,dw_data_summary) S3method(print,dw_data_tabulate) S3method(print,dw_data_tabulates) S3method(print,dw_data_xtabulate) diff --git a/R/data_modify.R b/R/data_modify.R index 4621590c8..6234255fd 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -320,3 +320,63 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify = data } + +.get_new_dots_variable <- function(dots, i, data) { + # iterate expressions for new variables + symbol <- dots[[i]] + + # expression is given as character string in a variable, but named, e.g. + # a <- "2 * Sepal.Width" + # data_modify(iris, double_SepWidth = a) + # we reconstruct the symbol as if it were provided as literal expression. + # However, we need to check that we don't have a character vector, + # like: data_modify(iris, new_var = "a") + # this one should be recycled instead. + if (!is.character(symbol)) { + eval_symbol <- .dynEval(symbol, ifnotfound = NULL) + if (is.character(eval_symbol)) { + symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE) + # we may have the edge-case of having a function that returns a character + # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol" + # is of type character, but no symbol, thus str2lang() above creates a + # wrong pattern. We then take "eval_symbol" as character input. + if (inherits(symbol, "try-error")) { + symbol <- str2lang(paste0( + names(dots)[i], + " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")" + )) + } + } + } + + # finally, we can evaluate expression and get values for new variables + new_variable <- try(with(data, eval(symbol)), silent = TRUE) + + # successful, or any errors, like misspelled variable name? + if (inherits(new_variable, "try-error")) { + # in which step did error happen? + step_number <- switch(as.character(i), + "1" = "the first expression", + "2" = "the second expression", + "3" = "the third expression", + paste("expression", i) + ) + step_msg <- paste0("There was an error in ", step_number, ".") + # try to find out which variable was the cause for the error + error_msg <- attributes(new_variable)$condition$message + if (grepl("object '(.*)' not found", error_msg)) { + error_var <- gsub("object '(.*)' not found", "\\1", error_msg) + insight::format_error( + paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."), + .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?") + ) + } else { + insight::format_error(paste0( + step_msg, " ", insight::format_capitalize(error_msg), + ". Possibly misspelled or not yet defined?" + )) + } + } + + new_variable +} diff --git a/R/data_summary.R b/R/data_summary.R index 5e3627c54..d4474eead 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -95,7 +95,7 @@ data_summary.data.frame <- function(x, ..., by = NULL) { }) out <- do.call(rbind, out) } - class(out) <- "data.frame" + class(out) <- c("dw_data_summary", "data.frame") rownames(out) <- NULL out } @@ -160,62 +160,13 @@ data_summary.grouped_df <- function(x, ..., by = NULL) { } -.get_new_dots_variable <- function(dots, i, data) { - # iterate expressions for new variables - symbol <- dots[[i]] - - # expression is given as character string in a variable, but named, e.g. - # a <- "2 * Sepal.Width" - # data_modify(iris, double_SepWidth = a) - # we reconstruct the symbol as if it were provided as literal expression. - # However, we need to check that we don't have a character vector, - # like: data_modify(iris, new_var = "a") - # this one should be recycled instead. - if (!is.character(symbol)) { - eval_symbol <- .dynEval(symbol, ifnotfound = NULL) - if (is.character(eval_symbol)) { - symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE) - # we may have the edge-case of having a function that returns a character - # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol" - # is of type character, but no symbol, thus str2lang() above creates a - # wrong pattern. We then take "eval_symbol" as character input. - if (inherits(symbol, "try-error")) { - symbol <- str2lang(paste0( - names(dots)[i], - " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")" - )) - } - } - } +# methods ---------------------------------------------------------------------- - # finally, we can evaluate expression and get values for new variables - new_variable <- try(with(data, eval(symbol)), silent = TRUE) - - # successful, or any errors, like misspelled variable name? - if (inherits(new_variable, "try-error")) { - # in which step did error happen? - step_number <- switch(as.character(i), - "1" = "the first expression", - "2" = "the second expression", - "3" = "the third expression", - paste("expression", i) - ) - step_msg <- paste0("There was an error in ", step_number, ".") - # try to find out which variable was the cause for the error - error_msg <- attributes(new_variable)$condition$message - if (grepl("object '(.*)' not found", error_msg)) { - error_var <- gsub("object '(.*)' not found", "\\1", error_msg) - insight::format_error( - paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."), - .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?") - ) - } else { - insight::format_error(paste0( - step_msg, " ", insight::format_capitalize(error_msg), - ". Possibly misspelled or not yet defined?" - )) - } +#' @export +print.dw_data_summary <- function(x, ...) { + if (nrow(x) == 0) { + cat("No matches found.\n") + } else { + cat(insight::export_table(x, ...)) } - - new_variable } diff --git a/tests/testthat/_snaps/data_summary.md b/tests/testthat/_snaps/data_summary.md new file mode 100644 index 000000000..65d9e82f2 --- /dev/null +++ b/tests/testthat/_snaps/data_summary.md @@ -0,0 +1,12 @@ +# data_summary, print + + Code + print(out) + Output + am | gear | MW | SD + ------------------------ + 0 | 3 | 16.11 | 3.37 + 0 | 4 | 21.05 | 3.07 + 1 | 4 | 26.27 | 5.41 + 1 | 5 | 21.38 | 6.66 + diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index f94811a85..16cecb809 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -132,3 +132,10 @@ test_that("data_summary, errors", { regex = "only works for" ) }) + + +test_that("data_summary, print", { + data(mtcars) + out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) + expect_snapshot(print(out)) +}) From 465a9ffe0cd4503cd44e7f01f685cf161e877d6a Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 20:10:55 +0100 Subject: [PATCH 12/19] add test --- tests/testthat/test-data_summary.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 16cecb809..e057cc35a 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -139,3 +139,25 @@ test_that("data_summary, print", { out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) expect_snapshot(print(out)) }) + + +test_that("data_summary, inside functions", { + foo1 <- function(x, ...) { + datawizard::data_summary(x, ..., by = "Species") + } + + foo2 <- function(x, by, ...) { + datawizard::data_summary(x, ..., by = by) + } + + foo3 <- function(x, by) { + datawizard::data_summary(x, MW = mean(Sepal.Width), by = by) + } + + data(iris) + out1 <- foo1(iris, MW = mean(Sepal.Width)) + out2 <- foo2(iris, by = "Species", MW = mean(Sepal.Width)) + out3 <- foo3(iris, "Species") + expect_equal(out1$MW, out2$MW, tolerance = 1e-4) + expect_equal(out1$MW, out3$MW, tolerance = 1e-4) +}) From 973417b90cd9252b62129ce3c6556992cb857c7e Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 20:13:30 +0100 Subject: [PATCH 13/19] correct english form --- R/data_summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data_summary.R b/R/data_summary.R index d4474eead..8bb4f3e09 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -77,8 +77,8 @@ data_summary.data.frame <- function(x, ..., by = NULL) { ) } # split data - splitted_data <- split(x, x[by]) - out <- lapply(splitted_data, function(s) { + split_data <- split(x, x[by]) + out <- lapply(split_data, function(s) { # no data for combination? Return NULL if (nrow(s) == 0) { return(NULL) From b3c7628fb00fdf70b098ffa9a4e73433e775a7f8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 20:15:53 +0100 Subject: [PATCH 14/19] test --- R/data_summary.R | 5 +++++ tests/testthat/test-data_summary.R | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/data_summary.R b/R/data_summary.R index 8bb4f3e09..3b02762ec 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -58,6 +58,11 @@ data_summary.default <- function(x, ...) { data_summary.data.frame <- function(x, ..., by = NULL) { dots <- eval(substitute(alist(...))) + # do we have any expression at all? + if (length(dots) == 0) { + insight::format_error("No expressions for calculating summary statistics provided.") + } + if (is.null(by)) { # when we have no grouping, just compute a one-row summary summarise <- .process_datasummary_dots(dots, x) diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index e057cc35a..b82d78095 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -131,6 +131,11 @@ test_that("data_summary, errors", { data_summary(iris$Sepal.Width, MW = mean(Sepal.Width), SD = sd(Sepal.Width)), regex = "only works for" ) + # no expressions + expect_error( + data_summary(iris, by = "Species"), + regex = "No expressions for calculating" + ) }) From ce578d54712cde4b9bfc904d08621b9b23163238 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 20:37:08 +0100 Subject: [PATCH 15/19] include NA, sort output --- R/data_summary.R | 29 +++++++++++++----- man/data_summary.Rd | 6 +++- tests/testthat/_snaps/data_summary.md | 43 +++++++++++++++++++++++++++ tests/testthat/test-data_summary.R | 12 ++++++++ 4 files changed, 81 insertions(+), 9 deletions(-) diff --git a/R/data_summary.R b/R/data_summary.R index 3b02762ec..8d89b93ae 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -8,6 +8,9 @@ #' @param by Optional character string, indicating the name of a variable in `x`. #' If supplied, the data will be split by this variable and summary statistics #' will be computed for each group. +#' @param include_na Logical, if `TRUE`, missing values are included as a level +#' in the grouping variable. If `FALSE`, missing values are omitted from the +#' grouping variable. #' @param ... One or more named expressions that define the new variable name #' and the function to compute the summary statistic. Example: #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided @@ -42,8 +45,8 @@ data_summary <- function(x, ...) { #' @export -data_summary.matrix <- function(x, ..., by = NULL) { - data_summary(as.data.frame(x), ..., by = by) +data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) { + data_summary(as.data.frame(x), ..., by = by, include_na = include_na) } @@ -55,7 +58,7 @@ data_summary.default <- function(x, ...) { #' @rdname data_summary #' @export -data_summary.data.frame <- function(x, ..., by = NULL) { +data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) { dots <- eval(substitute(alist(...))) # do we have any expression at all? @@ -81,8 +84,15 @@ data_summary.data.frame <- function(x, ..., by = NULL) { .misspelled_string(colnames(x), by_not_found, "Possibly misspelled?") ) } - # split data - split_data <- split(x, x[by]) + # split data, add NA levels, if requested + l <- lapply(x[by], function(i) { + if (include_na) { + addNA(i) + } else { + i + } + }) + split_data <- split(x, l, drop = TRUE) out <- lapply(split_data, function(s) { # no data for combination? Return NULL if (nrow(s) == 0) { @@ -100,6 +110,9 @@ data_summary.data.frame <- function(x, ..., by = NULL) { }) out <- do.call(rbind, out) } + # sort data + out <- data_arrange(out, select = by) + # data attributes class(out) <- c("dw_data_summary", "data.frame") rownames(out) <- NULL out @@ -107,7 +120,7 @@ data_summary.data.frame <- function(x, ..., by = NULL) { #' @export -data_summary.grouped_df <- function(x, ..., by = NULL) { +data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { # extract group variables grps <- attr(x, "groups", exact = TRUE) group_variables <- data_remove(grps, ".rows") @@ -118,7 +131,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL) { # remove information specific to grouped df's attr(x, "groups") <- NULL class(x) <- "data.frame" - data_summary(x, ..., by = by) + data_summary(x, ..., by = by, include_na = include_na) } @@ -172,6 +185,6 @@ print.dw_data_summary <- function(x, ...) { if (nrow(x) == 0) { cat("No matches found.\n") } else { - cat(insight::export_table(x, ...)) + cat(insight::export_table(x, missing = "", ...)) } } diff --git a/man/data_summary.Rd b/man/data_summary.Rd index 6c0e89744..0602dc7da 100644 --- a/man/data_summary.Rd +++ b/man/data_summary.Rd @@ -7,7 +7,7 @@ \usage{ data_summary(x, ...) -\method{data_summary}{data.frame}(x, ..., by = NULL) +\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE) } \arguments{ \item{x}{A (grouped) data frame.} @@ -20,6 +20,10 @@ as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}.} \item{by}{Optional character string, indicating the name of a variable in \code{x}. If supplied, the data will be split by this variable and summary statistics will be computed for each group.} + +\item{include_na}{Logical, if \code{TRUE}, missing values are included as a level +in the grouping variable. If \code{FALSE}, missing values are omitted from the +grouping variable.} } \value{ A data frame with the requested summary statistics. diff --git a/tests/testthat/_snaps/data_summary.md b/tests/testthat/_snaps/data_summary.md index 65d9e82f2..44b7f3f4c 100644 --- a/tests/testthat/_snaps/data_summary.md +++ b/tests/testthat/_snaps/data_summary.md @@ -10,3 +10,46 @@ 1 | 4 | 26.27 | 5.41 1 | 5 | 21.38 | 6.66 +# data_summary, with NA + + Code + print(out) + Output + c172code | MW + ---------------- + 1 | 87.12 + 2 | 94.05 + 3 | 75.00 + | 47.80 + +--- + + Code + print(out) + Output + c172code | MW + ---------------- + 1 | 87.12 + 2 | 94.05 + 3 | 75.00 + +--- + + Code + print(out) + Output + e42dep | c172code | MW + -------------------------- + 1 | 2 | 17.00 + 2 | 2 | 34.25 + 3 | 1 | 39.50 + 3 | 2 | 52.44 + 3 | 3 | 52.00 + 3 | | 84.00 + 4 | 1 | 134.75 + 4 | 2 | 119.26 + 4 | 3 | 88.80 + 4 | | 43.29 + | 2 | + | | 7.00 + diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index b82d78095..23eb02a7c 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -146,6 +146,18 @@ test_that("data_summary, print", { }) +test_that("data_summary, with NA", { + data(efc, package = "datawizard") + out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code") + expect_snapshot(print(out)) + out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE) + expect_snapshot(print(out)) + # sorting for multiple groups + out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code")) + expect_snapshot(print(out)) +}) + + test_that("data_summary, inside functions", { foo1 <- function(x, ...) { datawizard::data_summary(x, ..., by = "Species") From 2bd8cd042148401ca3181aaed170f2e43c5d5b71 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 21:39:27 +0100 Subject: [PATCH 16/19] add test --- tests/testthat/test-data_summary.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 23eb02a7c..16adf9ddb 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -178,3 +178,20 @@ test_that("data_summary, inside functions", { expect_equal(out1$MW, out2$MW, tolerance = 1e-4) expect_equal(out1$MW, out3$MW, tolerance = 1e-4) }) + + +test_that("data_summary, expression as variable", { + data(mtcars) + a <- "MW = mean(mpg)" + b <- "SD = sd(mpg)" + out <- data_summary(mtcars, a, by = c("am", "gear")) + expect_named(out, c("am", "gear", "MW")) + expect_equal(out$MW, aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, tolerance = 1e-4) + expect_error( + data_summary(mtcars, a, b, by = c("am", "gear")), + regex = "You cannot mix" + ) + out <- data_summary(mtcars, c(a, b), by = c("am", "gear")) + expect_named(out, c("am", "gear", "MW", "SD")) + expect_equal(out$SD, aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, tolerance = 1e-4) +}) From 502df98aeff78a7ee08b9efa57d818ff1355382b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 21:41:32 +0100 Subject: [PATCH 17/19] meaningful code comments --- R/data_summary.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data_summary.R b/R/data_summary.R index 8d89b93ae..90f3e8ae3 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -153,10 +153,10 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { } } # expression is given as character string, e.g. - # a <- "double_SepWidth = 2 * Sepal.Width" - # data_modify(iris, a) + # a <- "mean_sepwid = mean(Sepal.Width)" + # data_summary(iris, a, by = "Species") # or as character vector, e.g. - # data_modify(iris, c("var_a = Sepal.Width / 10", "var_b = Sepal.Width * 10")) + # data_summary(iris, c("var_a = mean(Sepal.Width)", "var_b = sd(Sepal.Width)")) character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) NULL) # do we have a character vector? Then we can proceed if (is.character(character_symbol)) { From 410090e38f43df0005a826633cbac21f69fdf4bb Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 23:44:18 +0100 Subject: [PATCH 18/19] add test --- tests/testthat/test-data_summary.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 16adf9ddb..aede5c042 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -136,6 +136,11 @@ test_that("data_summary, errors", { data_summary(iris, by = "Species"), regex = "No expressions for calculating" ) + # wrong expression + expect_error( + data_summary(mtcars, mw = mesn(mpg), by = "am"), + regex = "There was an error" + ) }) From 26f70f208e97fbd4e038badfc4ee2bc6c24199d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 3 Mar 2024 23:57:41 +0100 Subject: [PATCH 19/19] Update data_summary.R --- R/data_summary.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/data_summary.R b/R/data_summary.R index 90f3e8ae3..5f8b0ee08 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -105,7 +105,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) { # bind grouping-variables and values summarised_data <- cbind(s[1, by], summarised_data) # make sure we have proper column names - colnames(summarised_data) <- c(by, vapply(summarise, names, character(1))) + colnames(summarised_data) <- c(by, unlist(lapply(summarise, names))) summarised_data }) out <- do.call(rbind, out) @@ -170,7 +170,11 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { out <- lapply(seq_along(dots), function(i) { new_variable <- .get_new_dots_variable(dots, i, data) - stats::setNames(new_variable, names(dots)[i]) + if (inherits(new_variable, c("bayestestR_ci", "bayestestR_eti"))) { + stats::setNames(new_variable, c("CI", "CI_low", "CI_high")) + } else { + stats::setNames(new_variable, names(dots)[i]) + } }) } @@ -185,6 +189,11 @@ print.dw_data_summary <- function(x, ...) { if (nrow(x) == 0) { cat("No matches found.\n") } else { + if (all(c("CI", "CI_low", "CI_high") %in% colnames(x))) { + ci <- insight::format_table(x[c("CI", "CI_low", "CI_high")], ...) + x$CI <- x$CI_low <- x$CI_high <- NULL + x <- cbind(x, ci) + } cat(insight::export_table(x, missing = "", ...)) } }