From 6d30f61f5fbaccebdcb9c7d8fb33f2f1b71159d9 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 29 May 2024 11:51:11 -0700 Subject: [PATCH 1/2] add recipes_name_predictors() and recipes_outcomes() --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS.md | 2 ++ R/developer.R | 4 ++++ R/misc.R | 33 +++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + man/developer_functions.Rd | 4 ++++ man/recipes-role-indicator.Rd | 26 ++++++++++++++++++++++++++ man/roles.Rd | 13 ++++++------- 9 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 man/recipes-role-indicator.Rd diff --git a/DESCRIPTION b/DESCRIPTION index db8d5f076..d6dbf1605 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,6 +70,6 @@ Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.1.9000 Remotes: tidymodels/hardhat diff --git a/NAMESPACE b/NAMESPACE index 426a10ff3..7700f05bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -599,6 +599,8 @@ export(rand_id) export(recipe) export(recipes_eval_select) export(recipes_extension_check) +export(recipes_names_outcomes) +export(recipes_names_predictors) export(recipes_pkg_check) export(recipes_remove_cols) export(remove_original_cols) diff --git a/NEWS.md b/NEWS.md index c87c4ea95..2dde026f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ * Improved error message for misspelled argument in step functions. (#1318) +* Developer helper functions `recipes_names_predictors()` and `recipes_names_outcomes()` have been added to aid variable selection in steps. (#1026) + # recipes 1.0.10 ## Bug Fixes diff --git a/R/developer.R b/R/developer.R index d2310eff1..a9f1b841b 100644 --- a/R/developer.R +++ b/R/developer.R @@ -116,6 +116,10 @@ #' [recipes_remove_cols()] should be used in `prep.step_*()` functions, and is #' used to remove columns from the data set, either by using the #' `object$removals` field or by using the `col_names` argument. +#' +#' [recipes_names_predictors()] and [recipes_names_outcomes()] should be used in +#' `prep.step_*()` functions, and are used to get names of predictors and +#' outcomes. #' #' [get_case_weights()] and [are_weights_used()] are functions that help you #' extract case weights and help determine if they are used or not within the diff --git a/R/misc.R b/R/misc.R index 90249a6ba..8e1f4f6ac 100644 --- a/R/misc.R +++ b/R/misc.R @@ -955,3 +955,36 @@ recipes_remove_cols <- function(new_data, object, col_names = character()) { } new_data } + +#' Role indicators +#' +#' This helper function is meant to be used in `prep()` methods to identify +#' predictors and outcomes by names. +#' +#' @param info data.frame with variable information of columns. +#' +#' @return Character vector of column names. +#' @keywords internal +#' +#' @seealso [developer_functions] +#' +#' @name recipes-role-indicator +NULL + +#' @rdname recipes-role-indicator +#' @export +recipes_names_predictors <- function(info) { + get_from_info(info, "predictor") +} + +#' @rdname recipes-role-indicator +#' @export +recipes_names_outcomes <- function(info) { + get_from_info(info, "outcome") +} + +get_from_info <- function(info, role) { + res <- info$variable[info$role == role & !is.na(info$role)] + + res +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 9713e872b..3f8d9a43b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -165,6 +165,7 @@ reference: - prepper - recipes_eval_select - recipes_extension_check + - recipes-role-indicator - update.step - title: Tidy Methods contents: diff --git a/man/developer_functions.Rd b/man/developer_functions.Rd index d9a396aac..024200535 100644 --- a/man/developer_functions.Rd +++ b/man/developer_functions.Rd @@ -106,6 +106,10 @@ once and when possible should be put outside \code{for} loops. used to remove columns from the data set, either by using the \code{object$removals} field or by using the \code{col_names} argument. +\code{\link[=recipes_names_predictors]{recipes_names_predictors()}} and \code{\link[=recipes_names_outcomes]{recipes_names_outcomes()}} should be used in +\verb{prep.step_*()} functions, and are used to get names of predictors and +outcomes. + \code{\link[=get_case_weights]{get_case_weights()}} and \code{\link[=are_weights_used]{are_weights_used()}} are functions that help you extract case weights and help determine if they are used or not within the step. They will typically be used within the \verb{prep.step_*()} functions if the diff --git a/man/recipes-role-indicator.Rd b/man/recipes-role-indicator.Rd new file mode 100644 index 000000000..289fd2d12 --- /dev/null +++ b/man/recipes-role-indicator.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{recipes-role-indicator} +\alias{recipes-role-indicator} +\alias{recipes_names_predictors} +\alias{recipes_names_outcomes} +\title{Role indicators} +\usage{ +recipes_names_predictors(info) + +recipes_names_outcomes(info) +} +\arguments{ +\item{info}{data.frame with variable information of columns.} +} +\value{ +Character vector of column names. +} +\description{ +This helper function is meant to be used in \code{prep()} methods to identify +predictors and outcomes by names. +} +\seealso{ +\link{developer_functions} +} +\keyword{internal} diff --git a/man/roles.Rd b/man/roles.Rd index 1c666259f..8e4a9c770 100644 --- a/man/roles.Rd +++ b/man/roles.Rd @@ -50,9 +50,9 @@ functions for columns that are already present in the original data supplied to \code{recipe()}. See the \code{role} argument in some step functions to update roles for columns created by steps. -Variables can have any arbitrary role (see the examples) but there are two -special standard roles, \code{"predictor"} and \code{"outcome"}. These two roles are -typically required when fitting a model. +Variables can have any arbitrary role (see the examples) but there are three +special standard roles, \code{"predictor"}, \code{"outcome"}, and \code{"case_weights"}. +The first two roles are typically required when fitting a model. \code{update_role()} should be used when a variable doesn't currently have a role in the recipe, or to replace an \code{old_role} with a \code{new_role}. \code{add_role()} @@ -93,10 +93,9 @@ If you really aren't using \code{sample} in your recipe, we recommend that you i bake(rec, biomass_test) #> Error in `bake()`: -#> x The following required columns are missing from `new_data`: -#> `sample`. -#> i These columns have one of the following roles, which are required at -#> `bake()` time: `id variable`. +#> x The following required columns are missing from `new_data`: `sample`. +#> i These columns have one of the following roles, which are required at `bake()` +#> time: `id variable`. #> i If these roles are not required at `bake()` time, use #> `update_role_requirements(role = "your_role", bake = FALSE)`. }\if{html}{\out{}} From c96a690b56e0bd18db20b2c30c27475a0bf0731a Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 29 May 2024 11:55:20 -0700 Subject: [PATCH 2/2] undo dev roxygen --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d6dbf1605..db8d5f076 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,6 +70,6 @@ Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1.9000 +RoxygenNote: 7.3.1 Remotes: tidymodels/hardhat