Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type checkers part 2 #1391

Merged
merged 15 commits into from
Nov 9, 2024
8 changes: 8 additions & 0 deletions R/YeoJohnson.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ step_YeoJohnson_new <-
prep.step_YeoJohnson <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_whole(x$num_unique, args = "num_unique")
check_bool(x$na_rm, arg = "na_rm")
if (!is.numeric(x$limits) | any(is.na(x$limits)) | length(x$limits) != 2) {
EmilHvitfeldt marked this conversation as resolved.
Show resolved Hide resolved
cli::cli_abort("{.arg limits} should be a numeric vector with two values,
not {.obj_type_friendly {x$limits}}")
}

x$limits <- sort(x$limits)

values <- vapply(
training[, col_names],
Expand Down
1 change: 1 addition & 0 deletions R/lag.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ prep.step_lag <- function(x, training, info = NULL, ...) {
not {.obj_type_friendly {lag}}."
)
}
check_string(x$prefix, arg = "prefix")

step_lag_new(
terms = x$terms,
Expand Down
1 change: 1 addition & 0 deletions R/lincomb.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ step_lincomb_new <-
prep.step_lincomb <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_whole(x$max_steps, arg = "max_steps", min = 1)

filter <- iter_lc_rm(
x = training[, col_names],
Expand Down
3 changes: 3 additions & 0 deletions R/log.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ step_log_new <-
prep.step_log <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_decimal(x$offset, arg = "offset")
check_bool(x$signed, arg = "signed")
check_number_decimal(x$base, arg = "base", min = 0)

step_log_new(
terms = x$terms,
Expand Down
1 change: 1 addition & 0 deletions R/logit.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ step_logit_new <-
prep.step_logit <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_decimal(x$offset, arg = "offset")

step_logit_new(
terms = x$terms,
Expand Down
1 change: 1 addition & 0 deletions R/newvalues.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ new_values_func <- function(x,
#' @export
prep.check_new_values <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_bool(x$ignore_NA, arg = "ignore_NA")

values <- lapply(training[, col_names], unique)

Expand Down
3 changes: 3 additions & 0 deletions R/nnmf_sparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ nnmf_pen_call <- function(x) {
prep.step_nnmf_sparse <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_whole(x$num_comp, arg = "num_comp", min = 0)
check_number_decimal(x$penalty, arg = "penalty", min = .Machine$double.eps)
check_string(x$prefix, arg = "prefix")

if (x$num_comp > 0 && length(col_names) > 0) {
x$num_comp <- min(x$num_comp, length(col_names))
Expand Down
1 change: 1 addition & 0 deletions R/normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ sd_check <- function(x) {
prep.step_normalize <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_bool(x$na_rm, arg = "na_rm")

wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
Expand Down
1 change: 1 addition & 0 deletions R/novel.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ get_existing_values <- function(x) {
prep.step_novel <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("string", "factor", "ordered"))
check_string(x$new_level, arg = "new_level")

# Get existing levels and their factor type (i.e. ordered)
objects <- lapply(training[, col_names], get_existing_values)
Expand Down
1 change: 1 addition & 0 deletions R/num2factor.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ get_ord_lvls_num <- function(x, foo) {
prep.step_num2factor <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_function(x$transform, arg = "transform")

res <- lapply(training[, col_names], get_ord_lvls_num, foo = x$transform)
res <- c(res, ..levels = list(x$levels))
Expand Down
4 changes: 3 additions & 1 deletion R/nzv.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ step_nzv <-
skip = FALSE,
id = rand_id("nzv")) {
exp_list <- list(freq_cut = 95 / 5, unique_cut = 10)

if (!isTRUE(all.equal(exp_list, options))) {
lifecycle::deprecate_stop(
"0.1.7",
Expand Down Expand Up @@ -140,6 +140,8 @@ step_nzv_new <-
#' @export
prep.step_nzv <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_number_decimal(x$unique_cut, arg = "unique_cut", min = 0, max = 100)
check_number_decimal(x$freq_cut, arg = "freq_cut", min = 0)

wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
Expand Down
1 change: 1 addition & 0 deletions R/ordinalscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ step_ordinalscore_new <-
prep.step_ordinalscore <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = "ordered")
check_function(x$convert, arg = "convert")

step_ordinalscore_new(
terms = x$terms,
Expand Down
20 changes: 13 additions & 7 deletions R/other.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,7 @@ step_other <-
objects = NULL,
skip = FALSE,
id = rand_id("other")) {
if (!is_tune(threshold)) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lot of moving things here and in #1387. My rationale is that, if a the tuning process inserts a value, we would not check it. For example, finalize_recipe() does use the constructor.

It does defer the checking until prep-time which is later than this but it seems like the only way to really be sure that we are getting corrrect inputs.

if (threshold >= 1) {
check_number_whole(threshold)
} else {
check_number_decimal(threshold, min = 0)
}
}

add_step(
recipe,
step_other_new(
Expand Down Expand Up @@ -152,6 +146,18 @@ prep.step_other <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("string", "factor", "ordered"))

if (!is.numeric(x$threshold)) {
cli::cli_abort("{.arg threshold} should be a single numeric value
{.obj_type_friendly {x$threshold}}")
}

if (x$threshold >= 1) {
check_number_whole(x$threshold, arg = "threshold", min = 1)
} else {
check_number_decimal(x$threshold, arg = "threshold", min = 0)
}


wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
if (isFALSE(were_weights_used)) {
Expand Down
11 changes: 6 additions & 5 deletions R/pca.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ step_pca <- function(recipe,
keep_original_cols = FALSE,
skip = FALSE,
id = rand_id("pca")) {
if (!is_tune(threshold)) {
check_number_decimal(threshold, min = 0, max = 1, allow_na = TRUE)
}

add_step(
recipe,
Expand Down Expand Up @@ -176,6 +173,10 @@ step_pca_new <-
prep.step_pca <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_decimal(x$threshold, arg = "threshold", min = 0, max = 1,
allow_na = TRUE)
check_string(x$prefix, arg = "prefix")
check_number_whole(x$num_comp, arg = "num_comp", min = 0)

wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
Expand Down Expand Up @@ -352,8 +353,8 @@ tidy.step_pca <- function(x, type = "coef", ...) {
)
} else {
type <- rlang::arg_match(
type,
c("coef", "variance"),
type,
c("coef", "variance"),
error_call = rlang::caller_env()
)
if (type == "coef") {
Expand Down
3 changes: 3 additions & 0 deletions R/pls.R
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ prep.step_pls <- function(x, training, info = NULL, ...) {
y_names <- recipes_eval_select(x$outcome, training, info)

check_type(training[, x_names], types = c("double", "integer"))
check_number_decimal(x$predictor_prop, arg = "predictor_prop", min = 0, max = 1)
check_string(x$prefix, arg = "prefix")
check_number_whole(x$num_comp, arg = "num_comp", min = 0)

if (length(y_names) > 1 && any(!map_lgl(training[y_names], is.numeric))) {
cli::cli_abort(
Expand Down
7 changes: 3 additions & 4 deletions R/poly.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,11 @@ step_poly <-
role = "predictor",
trained = FALSE,
objects = NULL,
degree = 2,
degree = 2L,
options = list(),
keep_original_cols = FALSE,
skip = FALSE,
id = rand_id("poly")) {
if (!is_tune(degree)) {
degree <- as.integer(degree)
}

if (any(names(options) == "degree")) {
degree <- options$degree
Expand Down Expand Up @@ -142,6 +139,8 @@ poly_wrapper <- function(x, args) {
prep.step_poly <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_whole(x$degree, arg = "degree", min = 1)
x$degree <- as.integer(x$degree)

opts <- x$options
opts$degree <- x$degree
Expand Down
2 changes: 2 additions & 0 deletions R/poly_bernstein.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ step_poly_bernstein_new <-
prep.step_poly_bernstein <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_bool(x$complete_set, arg = "complete_set")
check_number_whole(x$degree, arg = "degree", min = 0)

x$options <- c(x$options, degree = x$degree)

Expand Down
9 changes: 5 additions & 4 deletions R/range.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
#' numeric data to be within a pre-defined range of values.
#'
#' @inheritParams step_center
#' @param min A single numeric value for the smallest value in the
#' range.
#' @param max A single numeric value for the largest value in the
#' range.
#' @param min,max Single numeric values for the smallest (or largest) value in
#' the transformed data.
#' @param clipping A single logical value for determining whether
#' application of transformation onto new data should be forced
#' to be inside `min` and `max`. Defaults to TRUE.
Expand Down Expand Up @@ -106,6 +104,9 @@ step_range_new <-
prep.step_range <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_number_decimal(x$min, arg = "min")
check_number_decimal(x$max, arg = "max")
check_bool(x$clipping, arg = "clipping")

mins <-
vapply(training[, col_names], min, c(min = 0), na.rm = TRUE)
Expand Down
1 change: 1 addition & 0 deletions R/ratio.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ prep.step_ratio <- function(x, training, info = NULL, ...) {
training[, unique(c(col_names$top, col_names$bottom))],
types = c("double", "integer")
)
check_function(x$naming, arg = "naming")

step_ratio_new(
terms = x$terms,
Expand Down
5 changes: 2 additions & 3 deletions R/regex.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ step_regex <- function(recipe,
keep_original_cols = TRUE,
skip = FALSE,
id = rand_id("regex")) {
if (!is_tune(pattern)) {
check_string(pattern)
}

valid_args <- names(formals(grepl))[-(1:2)]
if (any(!(names(options) %in% valid_args))) {
cli::cli_abort(c(
Expand Down Expand Up @@ -123,6 +121,7 @@ step_regex_new <-
prep.step_regex <- function(x, training, info = NULL, ...) {
col_name <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_name], types = c("string", "factor", "ordered"))
check_string(x$pattern, arg = "pattern", allow_empty = FALSE)

step_regex_new(
terms = x$terms,
Expand Down
1 change: 1 addition & 0 deletions R/relevel.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ step_relevel_new <-
prep.step_relevel <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("string", "factor"))
check_string(x$ref_level, arg = "ref_level", allow_empty = FALSE)

# Get existing levels and their factor type (i.e. ordered)
objects <- lapply(training[, col_names], get_existing_values)
Expand Down
18 changes: 6 additions & 12 deletions R/relu.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,6 @@ step_relu <-
columns = NULL,
skip = FALSE,
id = rand_id("relu")) {
if (!is_tune(shift)) {
check_number_decimal(shift)
}
if (!is_tune(reverse)) {
check_bool(reverse)
}
if (!is_tune(smooth)) {
check_bool(smooth)
}
if (reverse & prefix == "right_relu_") {
prefix <- "left_relu_"
}
add_step(
recipe,
step_relu_new(
Expand Down Expand Up @@ -132,6 +120,12 @@ step_relu_new <-
prep.step_relu <- function(x, training, info = NULL, ...) {
columns <- recipes_eval_select(x$terms, training, info)
check_type(training[, columns], types = c("double", "integer"))
check_number_decimal(x$shift, arg = "shift")
check_bool(x$reverse, arg = "reverse")
check_bool(x$smooth, arg = "smooth")
if (x$reverse & x$prefix == "right_relu_") {
x$prefix <- "left_relu_"
}

step_relu_new(
terms = x$terms,
Expand Down
17 changes: 4 additions & 13 deletions R/sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,6 @@ step_sample <- function(recipe,
cli::cli_warn("Selectors are not used for this step.")
}

if (!is_tune(size)) {
check_number_decimal(size, min = 0, allow_null = TRUE)
}
if (!is_tune(replace)) {
if (!is.logical(replace)) {
cli::cli_abort(
"{.arg replace} should be a single logical, \\
not {.obj_type_friendly {replace}}."
)
}
}

add_step(
recipe,
step_sample_new(
Expand Down Expand Up @@ -113,11 +101,14 @@ step_sample_new <-

#' @export
prep.step_sample <- function(x, training, info = NULL, ...) {

check_number_decimal(x$size, min = 0, allow_null = TRUE, arg = "size")
check_bool(x$replace, arg = "replace")
if (is.null(x$size)) {
x$size <- nrow(training)
}

wts <- get_case_weights(info, training)
wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
if (isFALSE(were_weights_used)) {
wts <- NULL
Expand Down
13 changes: 7 additions & 6 deletions R/scale.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,19 +107,20 @@ step_scale_new <-
prep.step_scale <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_bool(x$na_rm, arg = "na_rm")
if (x$factor != 1 & x$factor != 2) {
cli::cli_warn(
"Scaling {.arg factor} should take either a value of 1 or 2, not
{.obj_type_friendly {x$factor}}."
)
}

wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
if (isFALSE(were_weights_used)) {
wts <- NULL
}

if (x$factor != 1 & x$factor != 2) {
cli::cli_warn(
"Scaling {.arg factor} should take either a value of 1 or 2, \\
not {x$factor}."
)
}

vars <- variances(training[, col_names], wts, na_rm = x$na_rm)
sds <- sqrt(vars)
Expand Down
1 change: 1 addition & 0 deletions R/spatialsign.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ step_spatialsign_new <-
prep.step_spatialsign <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
check_bool(x$na_rm, arg = "na_rm")

wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
Expand Down
5 changes: 4 additions & 1 deletion R/spline_b.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ step_spline_b_new <-
prep.step_spline_b <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))

check_bool(x$complete_set, arg = "complete_set")
check_number_whole(x$degree, arg = "degree", min = 0)
check_number_whole(x$deg_free, arg = "deg_free", min = 0)

res <- list()

for (col_name in col_names) {
Expand Down
Loading
Loading