Skip to content

Commit

Permalink
Merge pull request #1383 from tidymodels/add-snapshots
Browse files Browse the repository at this point in the history
Add missing snapshots - part 1
  • Loading branch information
EmilHvitfeldt authored Oct 25, 2024
2 parents cfd1166 + e4ee075 commit 5b29dc6
Show file tree
Hide file tree
Showing 67 changed files with 778 additions and 86 deletions.
2 changes: 1 addition & 1 deletion R/BoxCox.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ bc_trans <- function(x, lambda, eps = .001) {
if (any(x <= 0)) {
cli::cli_warn(
"Applying Box-Cox transformation to non-positive data in column \\
{names(lambda)}"
{.field {names(lambda)}}."
)
}

Expand Down
8 changes: 4 additions & 4 deletions R/case_weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ get_case_weights <- function(info, .data, call = rlang::caller_env()) {
if (!is.numeric(res)) {
cli::cli_abort(
c(
"x" = "{.arg {wt_col}} has a {.code case_weights} role,\\
but is not numeric.",
"i" = "{.arg {wt_col}} is {.obj_type_friendly {wt_col}}."
),
x = "{.field {wt_col}} has a {.code case_weights} role and should be
numeric, but is {.obj_type_friendly {wt_col}}.",
i = "Only numeric case weights are supported in recipes."
),
call = call
)
}
Expand Down
3 changes: 3 additions & 0 deletions R/colcheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ bake.check_cols <- function(object, new_data, ...) {
new_cols <- names(new_data)
missing <- setdiff(original_cols, new_cols)
if (length(missing) > 0) {
# This is functionally not reachable after we added ptype checking in
# https://github.com/tidymodels/recipes/pull/1330
# but it feels too harsh to deprecate this check function.
cli::cli_abort(c(
x = "{cli::qty(length(missing))}The following column{?s} {?is/are} \\
missing from {.arg new_data}:",
Expand Down
8 changes: 0 additions & 8 deletions R/count.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,6 @@ prep.step_count <- function(x, training, info = NULL, ...) {
col_name <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_name], types = c("string", "factor", "ordered"))

if (length(col_name) > 1) {
cli::cli_abort(c(
x = "The selector should select at most a single variable.",
i = "The following {length(col_name)} were selected: \\
{.and {.var {col_name}}}."
))
}

step_count_new(
terms = x$terms,
role = x$role,
Expand Down
26 changes: 7 additions & 19 deletions R/cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ prep.step_cut <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))

if (!is.numeric(x$breaks)) {
cli::cli_abort(
"{.arg breaks} must be a numeric vector, \\
not {.obj_type_friendly {breaks}}."
)
}

all_breaks <- vector("list", length(col_names))
names(all_breaks) <- col_names
for (col_name in col_names) {
Expand All @@ -133,19 +140,6 @@ prep.step_cut <- function(x, training, info = NULL, ...) {
}

create_full_breaks <- function(var, breaks) {
if (!is.numeric(var)) {
cli::cli_abort(
"{.arg var} must be a numeric vector, not {.obj_type_friendly {var}}."
)
}

if (!is.numeric(breaks)) {
cli::cli_abort(
"{.arg breaks} must be a numeric vector, \\
not {.obj_type_friendly {breaks}}."
)
}

if (min(var) < min(breaks)) {
breaks <- c(min(var), breaks)
}
Expand Down Expand Up @@ -204,12 +198,6 @@ cut_var <- function(var, breaks, include_outside_range) {
# the levels when bake.recipe itself is called. Moreover,
# it is cleaner to show it in this way.
adjust_levels_min_max <- function(x) {
if (!is.factor(x)) {
cli::cli_abort(
"{.arg x} must be a factor, not {.obj_type_friendly {x}}.",
.internal = TRUE
)
}
levs <- levels(x)
if (length(levs) == 1) {
return(factor(rep("[min,max]", length(x))))
Expand Down
3 changes: 1 addition & 2 deletions R/date.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ step_date <-
offenders <- features[!features %in% feat]

cli::cli_abort(c(
x = "Possible values of {.arg features} should include:",
"*" = "{.or {.val {feat}}}.",
x = "Possible values of {.arg features} are {.or {.val {feat}}}.",
i = "Invalid values were: {.val {offenders}}."
))
}
Expand Down
6 changes: 1 addition & 5 deletions R/dummy.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,13 +271,9 @@ bake.step_dummy <- function(object, new_data, ...) {
# the original (see the note above)
is_ordered <- attr(levels, "dataClasses") == "ordered"

if (is.null(levels_values)) {
cli::cli_abort("Factor level values not recorded in {.var col_name}.")
}

if (length(levels_values) == 1) {
cli::cli_abort(
"Only one factor level in {.var col_name}: {levels_values}."
"Only one factor level in {.var {col_name}}: {.val {levels_values}}."
)
}

Expand Down
19 changes: 0 additions & 19 deletions R/dummy_multi_choice.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,6 @@ prep.step_dummy_multi_choice <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("nominal", "logical"))

multi_dummy_check_type(training[, col_names])

levels <- purrr::map(training[, col_names], levels)
levels <- vctrs::list_unchop(levels, ptype = character(), name_spec = rlang::zap())
levels <- levels[!is.na(levels)]
Expand All @@ -165,23 +163,6 @@ prep.step_dummy_multi_choice <- function(x, training, info = NULL, ...) {
)
}

multi_dummy_check_type <- function(dat, call = rlang::caller_env()) {
is_good <- function(x) {
is.factor(x) | is.character(x) | all(is.na(x))
}

all_good <- vapply(dat, is_good, logical(1))
if (!all(all_good)) {
offenders <- names(dat)[!all_good]
cli::cli_abort(c(
"x" = "All columns selected for the step should be \\
factor, character, or NA. The following were not:",
"*" = "{.var {offenders}}."
), call = call)
}
invisible(all_good)
}

#' @export
bake.step_dummy_multi_choice <- function(object, new_data, ...) {
col_names <- object$input
Expand Down
2 changes: 1 addition & 1 deletion R/impute_knn.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ step_impute_knn <-
if (length(options) > 0) {
if (any(!(opt_nms %in% c("eps", "nthread")))) {
cli::cli_abort(
"Valid values for {.arg options} are {.val eps}, and {.val nthread}."
"Valid values for {.arg options} are {.val eps} and {.val nthread}."
)
}
if (all(opt_nms != "nthread")) {
Expand Down
6 changes: 0 additions & 6 deletions R/impute_mean.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,8 @@ trim <- function(x, trim) {
# Adapted from mean.default
x <- sort(x, na.last = TRUE)
na_ind <- is.na(x)
if (!is.numeric(trim) || length(trim) != 1L) {
cli::cli_abort("{.arg trim} must be numeric of length one.")
}
n <- length(x[!na_ind])
if (trim > 0 && n) {
if (is.complex(x)) {
cli::cli_abort("Trimmed means are not defined for complex data.")
}
if (trim >= 0.5)
return(stats::median(x[!na_ind], na.rm = FALSE))
lo <- floor(n * trim) + 1
Expand Down
15 changes: 11 additions & 4 deletions R/interact.R
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ prep.step_interact <- function(x, training, info = NULL, ...) {
)
if (!is_formula(tmp_terms)) {
cli::cli_abort(
"{.arg terms} must be a formula. Not {.obj_type_friendly {term}}."
"{.arg terms} must be a formula, not {.obj_type_friendly {term}}.",
.internal = TRUE
)
}

Expand Down Expand Up @@ -209,7 +210,7 @@ prep.step_interact <- function(x, training, info = NULL, ...) {
vars <-
unique(unlist(lapply(make_new_formula(int_terms), all.vars)))
var_check <- info[info$variable %in% vars, ]
if (any(var_check$type == "nominal")) {
if (any(vapply(var_check$type, function(x) "nominal" %in% x, logical(1)))) {
cli::cli_warn(
"Categorical variables used in {.fn step_interact} should probably be \\
avoided; This can lead to differences in dummy variable values that \\
Expand Down Expand Up @@ -401,7 +402,10 @@ find_selectors <- function(f) {
list()
} else {
# User supplied incorrect input
cli::cli_abort("Don't know how to handle type {typeof(f)}.")
cli::cli_abort(
"Don't know how to handle type {.code {typeof(f)}}.",
.internal = TRUE
)
}
}

Expand All @@ -418,7 +422,10 @@ replace_selectors <- function(x, elem, value) {
map_pairlist(x, replace_selectors, elem, value)
} else {
# User supplied incorrect input
cli::cli_abort("Don't know how to handle type {typeof(f)}.")
cli::cli_abort(
"Don't know how to handle type {.code {typeof(f)}}.",
.internal = TRUE
)
}
}

Expand Down
2 changes: 1 addition & 1 deletion R/kpca_rbf.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ prep.step_kpca_rbf <- function(x, training, info = NULL, ...) {
cli::cli_abort(c(
x = "Failed with error:",
i = as.character(kprc)
))
), .internal = TRUE)
}
} else {
kprc <- NULL
Expand Down
7 changes: 4 additions & 3 deletions R/nnmf_sparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,10 @@ prep.step_nnmf_sparse <- function(x, training, info = NULL, ...) {
} else {
na_w <- sum(is.na(nnm$w))
if (na_w > 0) {
cli::cli_abort(
"The NNMF loadings are missing. The penalty may have been to high."
)
cli::cli_abort(c(
x = "The NNMF loadings are missing.",
i = "The penalty may have been too high or missing values are present in data."
))
} else {
nnm <- list(x_vars = col_names, w = nnm$w)
rownames(nnm$w) <- col_names
Expand Down
2 changes: 1 addition & 1 deletion R/pls.R
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ prep.step_pls <- function(x, training, info = NULL, ...) {
res <- try(rlang::eval_tidy(cl), silent = TRUE)
if (inherits(res, "try-error")) {
cli::cli_warn(
"{.fn step_pls failed with error: {as.character(res))}.",
"{.fn step_pls} failed with error: {as.character(res)}.",
)
res <- list(x_vars = x_names, y_vars = y_names)
} else {
Expand Down
4 changes: 2 additions & 2 deletions R/profile.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@ step_profile <- function(recipe,
i = "See {.help [?step_profile](recipes::step_profile)} for information."
))
}
if (all(sort(names(grid)) == c("len", "ptcl"))) {
if (!identical(sort(names(grid)), c("len", "pctl"))) {
cli::cli_abort(c(
x = "`grid` should have two named elements {.field len} and \\
{.field ptcl}, not {sort(names(grid))}.",
{.field pctl}, not {.val {sort(names(grid))}}.",
i = "See {.help [?step_profile](recipes::step_profile)} for information."
))
}
Expand Down
9 changes: 1 addition & 8 deletions R/relevel.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,12 @@ step_relevel_new <-
#' @export
prep.step_relevel <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("string", "factor", "ordered"))
check_type(training[, col_names], types = c("string", "factor"))

# Get existing levels and their factor type (i.e. ordered)
objects <- lapply(training[, col_names], get_existing_values)
# Check to make sure that no ordered levels are provided
order_check <- map_lgl(objects, attr, "is_ordered")
if (any(order_check)) {
offenders <- names(order_check)[order_check]
cli::cli_abort(
"Columns contain ordered factors (which cannot be releveled) \\
{.val {x$ref_level}}: {offenders}."
)
}

# Check to make sure that the reference level exists in the factor
ref_check <- map_lgl(objects, function(x, y) !y %in% x,
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/_snaps/BoxCox.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,22 @@
-- Operations
* Box-Cox transformation on: <none> | Trained

---

Code
recipe(~ mpg + disp, data = mtcars) %>% step_BoxCox(mpg, disp) %>% prep() %>%
bake(new_data = tibble(mpg = -1, disp = -1))
Condition
Warning:
Applying Box-Cox transformation to non-positive data in column mpg.
Warning:
Applying Box-Cox transformation to non-positive data in column disp.
Output
# A tibble: 1 x 2
mpg disp
<dbl> <dbl>
1 NaN NaN

# bake method errors when needed non-standard role columns are missing

Code
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/_snaps/case-weight-functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,13 @@
! There should only be a single column with the role `case_weights`.
i In these data, there are 2 columns: `var1` and `var2`.

# get_case_weights() catches non-numeric case weights

Code
recipe(~., data = mtcars) %>% step_normalize(all_predictors()) %>% prep()
Condition
Error in `step_normalize()`:
Caused by error in `prep()`:
x vs has a `case_weights` role and should be numeric, but is a string.
i Only numeric case weights are supported in recipes.

8 changes: 8 additions & 0 deletions tests/testthat/_snaps/corr.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@
-- Operations
* Correlation filter on: V6, V1, V3 | Trained, ignored weights

# corr_filter() warns on many NA values

Code
tmp <- recipe(~., data = mtcars) %>% step_corr(all_predictors()) %>% prep()
Condition
Warning:
Too many correlations are `NA`; skipping correlation filter.

# empty printing

Code
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/_snaps/count.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@
! Name collision occurred. The following variable names already exist:
* `Sepal.Width`

# checks for grepl arguments

Code
recipe(~., data = mtcars) %>% step_count(options = list(not_real_option = TRUE))
Condition
Error in `step_count()`:
x The following elements of `options` are not allowed:
* "not_real_option".
i Valid options are: "ignore.case", "perl", "fixed", and "useBytes".

# bake method errors when needed non-standard role columns are missing

Code
Expand Down
23 changes: 23 additions & 0 deletions tests/testthat/_snaps/cut.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,29 @@
Warning:
This will create a factor with one value only.

# breaks argument are type checked

Code
recipe(~., data = mtcars) %>% step_cut(disp, hp, breaks = TRUE) %>% prep()
Condition
Error in `step_cut()`:
Caused by error:
! Could not evaluate cli `{}` expression: `breaks`.
Caused by error:
! object 'breaks' not found

---

Code
recipe(~., data = mtcars) %>% step_cut(disp, hp, breaks = c("100", "200")) %>%
prep()
Condition
Error in `step_cut()`:
Caused by error:
! Could not evaluate cli `{}` expression: `breaks`.
Caused by error:
! object 'breaks' not found

# bake method errors when needed non-standard role columns are missing

Code
Expand Down
Loading

0 comments on commit 5b29dc6

Please sign in to comment.