From 2c2a9066279699786888d3fb8acd0df1b3d9332c Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Mon, 27 May 2024 11:11:31 +0200
Subject: [PATCH 1/2] Fix issue introduced in former commit (#507)

* Fix issue introduced in former commit

* add snapshot
---
 DESCRIPTION                           |  2 +-
 R/data_to_long.R                      |  6 +++-
 tests/testthat/_snaps/data_to_long.md | 43 +++++++++++++++++++++++++++
 tests/testthat/test-data_to_long.R    | 14 +++++++++
 4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f87e27ad4..7f0e48337 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.10.0.5
+Version: 0.10.0.6
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
diff --git a/R/data_to_long.R b/R/data_to_long.R
index 3d19e5bc2..e5a8b8ade 100644
--- a/R/data_to_long.R
+++ b/R/data_to_long.R
@@ -223,7 +223,11 @@ data_to_long <- function(data,
   # if columns in data frame have attributes (e.g. labelled data), `cbind()`
   # won't work, so we need to remove them. We'll set them back later
   not_stacked[] <- lapply(not_stacked, function(i) {
-    attributes(i) <- NULL
+    # we can't remove *all* attributes, this will convert factors into integers
+    attr(i, "label") <- NULL
+    attr(i, "labels") <- NULL
+    attr(i, "format.spss") <- NULL
+    class(i) <- setdiff(class(i), c("haven_labelled", "vctrs_vctr"))
     i
   })
 
diff --git a/tests/testthat/_snaps/data_to_long.md b/tests/testthat/_snaps/data_to_long.md
index cd7748df7..c863ccc02 100644
--- a/tests/testthat/_snaps/data_to_long.md
+++ b/tests/testthat/_snaps/data_to_long.md
@@ -11,3 +11,46 @@
        $ Item       : chr  "A1" "A2" "A3" "A4" ...
        $ Score      : int  2 4 3 4 4 2 3 3 4 4 ...
 
+# don't convert factors to integer
+
+    Code
+      print(mtcars_long)
+    Output
+         cyl  hp drat    wt vs am gear carb am_f cyl_f id    g  value
+      1    4  93 3.85 2.320  1  1    4    1    1     4  3  mpg  22.80
+      2    4  93 3.85 2.320  1  1    4    1    1     4  3 qsec  18.61
+      3    4  93 3.85 2.320  1  1    4    1    1     4  3 disp 108.00
+      4    8 245 3.21 3.570  0  0    3    4    0     8  7  mpg  14.30
+      5    8 245 3.21 3.570  0  0    3    4    0     8  7 qsec  15.84
+      6    8 245 3.21 3.570  0  0    3    4    0     8  7 disp 360.00
+      7    4  66 4.08 2.200  1  1    4    1    1     4 10  mpg  32.40
+      8    4  66 4.08 2.200  1  1    4    1    1     4 10 qsec  19.47
+      9    4  66 4.08 2.200  1  1    4    1    1     4 10 disp  78.70
+      10   8 264 4.22 3.170  0  1    5    4    1     8 11  mpg  15.80
+      11   8 264 4.22 3.170  0  1    5    4    1     8 11 qsec  14.50
+      12   8 264 4.22 3.170  0  1    5    4    1     8 11 disp 351.00
+      13   6 110 3.08 3.215  1  0    3    1    0     6  4  mpg  21.40
+      14   6 110 3.08 3.215  1  0    3    1    0     6  4 qsec  19.44
+      15   6 110 3.08 3.215  1  0    3    1    0     6  4 disp 258.00
+      16   8 175 3.15 3.440  0  0    3    2    0     8  5  mpg  18.70
+      17   8 175 3.15 3.440  0  0    3    2    0     8  5 qsec  17.02
+      18   8 175 3.15 3.440  0  0    3    2    0     8  5 disp 360.00
+      19   8 335 3.54 3.570  0  1    5    8    1     8 12  mpg  15.00
+      20   8 335 3.54 3.570  0  1    5    8    1     8 12 qsec  14.60
+      21   8 335 3.54 3.570  0  1    5    8    1     8 12 disp 301.00
+      22   6 110 3.90 2.620  0  1    4    4    1     6  1  mpg  21.00
+      23   6 110 3.90 2.620  0  1    4    4    1     6  1 qsec  16.46
+      24   6 110 3.90 2.620  0  1    4    4    1     6  1 disp 160.00
+      25   6 110 3.90 2.875  0  1    4    4    1     6  2  mpg  21.00
+      26   6 110 3.90 2.875  0  1    4    4    1     6  2 qsec  17.02
+      27   6 110 3.90 2.875  0  1    4    4    1     6  2 disp 160.00
+      28   4  95 3.92 3.150  1  0    4    2    0     4  9  mpg  22.80
+      29   4  95 3.92 3.150  1  0    4    2    0     4  9 qsec  22.90
+      30   4  95 3.92 3.150  1  0    4    2    0     4  9 disp 140.80
+      31   4  62 3.69 3.190  1  0    4    2    0     4  8  mpg  24.40
+      32   4  62 3.69 3.190  1  0    4    2    0     4  8 qsec  20.00
+      33   4  62 3.69 3.190  1  0    4    2    0     4  8 disp 146.70
+      34   6 105 2.76 3.460  1  0    3    1    0     6  6  mpg  18.10
+      35   6 105 2.76 3.460  1  0    3    1    0     6  6 qsec  20.22
+      36   6 105 2.76 3.460  1  0    3    1    0     6  6 disp 225.00
+
diff --git a/tests/testthat/test-data_to_long.R b/tests/testthat/test-data_to_long.R
index 37d926b11..ab8bf1ba2 100644
--- a/tests/testthat/test-data_to_long.R
+++ b/tests/testthat/test-data_to_long.R
@@ -488,3 +488,17 @@ test_that("works with labelled data", {
   expect_identical(nrow(out), 200L)
   expect_identical(attributes(out$e42dep)$label, "elder's dependency")
 })
+
+
+test_that("don't convert factors to integer", {
+  data("mtcars")
+  mtcars <- mtcars[c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 18L, 29L, 31L), ]
+  mtcars$am_f <- factor(mtcars$am)
+  mtcars$cyl_f <- factor(mtcars$cyl)
+
+  mtcars$id <- factor(seq_len(nrow(mtcars)))
+  mtcars_long <- data_to_long(mtcars,
+    select = c("mpg", "qsec", "disp"), names_to = "g"
+  )
+  expect_snapshot(print(mtcars_long))
+})

From a7d3c801701c495b4902aadef00d99756d1a308d Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 31 May 2024 13:46:58 +0200
Subject: [PATCH 2/2] Improve docs for `data_to_wide()` (#506)

* Improve docs for data_to_wide

* fix

* fix

* lintr

* update docs, deprecate arg, update test

* update

* update readme

* add examples

* also improve data_to_long

* update test

* wordlist

* update docs

* address comments

* apply suggestions

* docs

* update docs

* address suggestions

* address comments

* typo

* Update NEWS.md

* formatting news

* plural

* by -> id_cols

* news

* fix

* fix warning in test

* typo

* lintr, whitespace

* lintr (simplify else)

---------

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 NEWS.md                            |   2 +
 R/data_read.R                      |  39 ++++----
 R/data_restoretype.R               |   1 +
 R/data_to_long.R                   | 105 +++++++++++++++-----
 R/data_to_wide.R                   | 106 +++++++++++++++++---
 R/data_write.R                     |  22 ++---
 README.md                          |  60 ++++++++++++
 inst/WORDLIST                      |   1 +
 man/data_read.Rd                   |   2 +-
 man/data_restoretype.Rd            |   2 +-
 man/data_to_long.Rd                |  93 ++++++++++++++----
 man/data_to_wide.Rd                | 110 ++++++++++++++++++---
 tests/testthat/test-data_to_wide.R | 149 +++++++++++++++--------------
 13 files changed, 521 insertions(+), 171 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 7340b179d..7f56b8bc1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -23,6 +23,8 @@ CHANGES
   If you recode into a numeric variable, and one of the recode values is `NA`,
   you no longer need to use `NA_real_` for numeric `NA` values.
 
+* Improved documentation for some functions.
+
 BUG FIXES
 
 * `data_to_long()` did not work for data frame where columns had attributes
diff --git a/R/data_read.R b/R/data_read.R
index b02b7ca87..5137a7735 100644
--- a/R/data_read.R
+++ b/R/data_read.R
@@ -70,7 +70,7 @@
 #' factors, where imported value labels will be set as factor levels. If a
 #' numeric variable has _no_ value labels or less value labels than values, it
 #' is not converted to factor. In this case, value labels are preserved as
-#' `"labels"` attribute. Character vectors are preserved.  Use
+#' `"labels"` attribute. Character vectors are preserved. Use
 #' `convert_factors = FALSE` to remove the automatic conversion of numeric
 #' variables to factors.
 #'
@@ -105,7 +105,7 @@ data_read <- function(path,
     por = .read_spss(path, encoding, convert_factors, verbose, ...),
     dta = .read_stata(path, encoding, convert_factors, verbose, ...),
     sas7bdat = .read_sas(path, path_catalog, encoding, convert_factors, verbose, ...),
-    .read_unknown(path, convert_factors, verbose, ...)
+    .read_unknown(path, file_type, convert_factors, verbose, ...)
   )
 
   # tell user about empty columns
@@ -178,20 +178,18 @@ data_read <- function(path,
         if (is.character(i)) {
           # we need this to drop haven-specific class attributes
           i <- as.character(i)
-        } else {
+        } else if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
           # if all values are labelled, we assume factor. Use labels as levels
-          if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
-            if (is.numeric(i)) {
-              i <- factor(i, labels = names(value_labels))
-            } else {
-              i <- factor(as.character(i), labels = names(value_labels))
-            }
-            value_labels <- NULL
-            attr(i, "converted_to_factor") <- TRUE
+          if (is.numeric(i)) {
+            i <- factor(i, labels = names(value_labels))
           } else {
-            # else, fall back to numeric
-            i <- as.numeric(i)
+            i <- factor(as.character(i), labels = names(value_labels))
           }
+          value_labels <- NULL
+          attr(i, "converted_to_factor") <- TRUE
+        } else {
+          # else, fall back to numeric
+          i <- as.numeric(i)
         }
 
         # drop unused value labels
@@ -290,12 +288,18 @@ data_read <- function(path,
 }
 
 
-.read_unknown <- function(path, convert_factors, verbose, ...) {
-  insight::check_if_installed("rio", reason = paste0("to read files of type '", .file_ext(path), "'"))
+.read_unknown <- function(path, file_type, convert_factors, verbose, ...) {
+  insight::check_if_installed("rio", reason = paste0("to read files of type '", file_type, "'"))
   if (verbose) {
     insight::format_alert("Reading data...")
   }
-  out <- rio::import(file = path, ...)
+  # set up arguments. for RDS, we set trust = TRUE, to avoid warnings
+  rio_args <- list(file = path)
+  # check if we have RDS, and if so, add trust = TRUE
+  if (file_type == "rds") {
+    rio_args$trust <- TRUE
+  }
+  out <- do.call(rio::import, c(rio_args, list(...)))
 
   # for "unknown" data formats (like .RDS), which still can be imported via
   # "rio::import()", we must check whether we actually have a data frame or
@@ -310,9 +314,8 @@ data_read <- function(path,
         )
       }
       return(out)
-    } else {
-      out <- tmp
     }
+    out <- tmp
   }
 
   .post_process_imported_data(out, convert_factors, verbose)
diff --git a/R/data_restoretype.R b/R/data_restoretype.R
index 9b5eb71a9..d4119f340 100644
--- a/R/data_restoretype.R
+++ b/R/data_restoretype.R
@@ -1,5 +1,6 @@
 #' Restore the type of columns according to a reference data frame
 #'
+#' @param data A data frame for which to restore the column types.
 #' @inheritParams data_to_long
 #' @inheritParams data_rename
 #' @param reference A reference data frame from which to find the correct
diff --git a/R/data_to_long.R b/R/data_to_long.R
index e5a8b8ade..deffcc0cb 100644
--- a/R/data_to_long.R
+++ b/R/data_to_long.R
@@ -4,43 +4,101 @@
 #' the number of columns. This is a dependency-free base-R equivalent of
 #' `tidyr::pivot_longer()`.
 #'
-#' @param data A data frame to pivot.
-#' @param names_to The name of the new column that will contain the column
-#'   names.
+#' @param data A data frame to convert to long format, so that it has more
+#' rows and fewer columns after the operation.
+#' @param names_to The name of the new column (variable) that will contain the
+#' _names_ from columns in `select` as values, to identify the source of the
+#' values. `names_to` can be a character vector with more than one column name,
+#' in which case `names_sep` or `names_pattern` must be provided in order to
+#' identify which parts of the column names go into newly created columns.
+#' See also 'Examples'.
 #' @param names_prefix A regular expression used to remove matching text from
 #' the start of each variable name.
 #' @param names_sep,names_pattern If `names_to` contains multiple values, this
-#' argument controls how the column name is broken up.
-#' `names_pattern` takes a regular expression containing matching groups, i.e. "()".
-#' @param values_to The name of the new column that will contain the values of
-#'   the pivoted variables.
+#' argument controls how the column name is broken up. `names_pattern` takes a
+#' regular expression containing matching groups, i.e. "()".
+#' @param values_to The name of the new column that will contain the _values_ of
+#' the columns in `select`.
 #' @param values_drop_na If `TRUE`, will drop rows that contain only `NA` in the
-#'   `values_to` column. This effectively converts explicit missing values to
-#'   implicit missing values, and should generally be used only when missing values
-#'   in data were created by its structure.
+#' `values_to` column. This effectively converts explicit missing values to
+#' implicit missing values, and should generally be used only when missing values
+#' in data were created by its structure.
 #' @param rows_to The name of the column that will contain the row names or row
-#'   numbers from the original data. If `NULL`, will be removed.
+#' numbers from the original data. If `NULL`, will be removed.
 #' @param ... Currently not used.
 #' @inheritParams extract_column_names
 #' @param cols Identical to `select`. This argument is here to ensure compatibility
-#'   with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols`
-#'   is used.
+#' with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols`
+#' is used.
+#'
+#' @details
+#' Reshaping data into long format usually means that the input data frame is
+#' in _wide_ format, where multiple measurements taken on the same subject are
+#' stored in multiple columns (variables). The long format stores the same
+#' information in a single column, with each measurement per subject stored in
+#' a separate row. The values of all variables that are not in `select` will
+#' be repeated.
+#'
+#' The necessary information for `data_to_long()` is:
+#'
+#' - The columns that contain the repeated measurements (`select`).
+#' - The name of the newly created column that will contain the names of the
+#'   columns in `select` (`names_to`), to identify the source of the values.
+#'   `names_to` can also be a character vector with more than one column name,
+#'   in which case `names_sep` or `names_pattern` must be provided to specify
+#'   which parts of the column names go into the newly created columns.
+#' - The name of the newly created column that contains the values of the
+#'   columns in `select` (`values_to`).
+#'
+#' In other words: repeated measurements that are spread across several columns
+#' will be gathered into a single column (`values_to`), with the original column
+#' names, that identify the source of the gathered values, stored in one or more
+#' new columns (`names_to`).
 #'
 #' @return If a tibble was provided as input, `reshape_longer()` also returns a
 #' tibble. Otherwise, it returns a data frame.
 #'
 #' @examplesIf requireNamespace("psych") && requireNamespace("tidyr")
-#' wide_data <- data.frame(replicate(5, rnorm(10)))
+#' wide_data <- setNames(
+#'   data.frame(replicate(2, rnorm(8))),
+#'   c("Time1", "Time2")
+#' )
+#' wide_data$ID <- 1:8
+#' wide_data
 #'
-#' # Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:5))
+#' # Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:3))
+#' # probably doesn't make much sense to mix "time" and "id"
 #' data_to_long(wide_data)
 #'
 #' # Customizing the names
-#' data_to_long(wide_data,
-#'   select = c(1, 2),
-#'   names_to = "Column",
-#'   values_to = "Numbers",
-#'   rows_to = "Row"
+#' data_to_long(
+#'   wide_data,
+#'   select = c("Time1", "Time2"),
+#'   names_to = "Timepoint",
+#'   values_to = "Score"
+#' )
+#'
+#' # Reshape multiple columns into long format.
+#' mydat <- data.frame(
+#'   age = c(20, 30, 40),
+#'   sex = c("Female", "Male", "Male"),
+#'   score_t1 = c(30, 35, 32),
+#'   score_t2 = c(33, 34, 37),
+#'   score_t3 = c(36, 35, 38),
+#'   speed_t1 = c(2, 3, 1),
+#'   speed_t2 = c(3, 4, 5),
+#'   speed_t3 = c(1, 8, 6)
+#' )
+#' # The column names are split into two columns: "type" and "time". The
+#' # pattern for splitting column names is provided in `names_pattern`. Values
+#' # of all "score_*" and "speed_*" columns are gathered into a single column
+#' # named "count".
+#' data_to_long(
+#'   mydat,
+#'   select = 3:8,
+#'   names_to = c("type", "time"),
+#'   names_pattern = "(score|speed)_t(\\d+)",
+#'   values_to = "count"
 #' )
 #'
 #' # Full example
@@ -48,21 +106,22 @@
 #' data <- psych::bfi # Wide format with one row per participant's personality test
 #'
 #' # Pivot long format
-#' data_to_long(data,
+#' very_long_data <- data_to_long(data,
 #'   select = regex("\\d"), # Select all columns that contain a digit
 #'   names_to = "Item",
 #'   values_to = "Score",
 #'   rows_to = "Participant"
 #' )
+#' head(very_long_data)
 #'
-#' data_to_long(
+#' even_longer_data <- data_to_long(
 #'   tidyr::who,
 #'   select = new_sp_m014:newrel_f65,
 #'   names_to = c("diagnosis", "gender", "age"),
 #'   names_pattern = "new_?(.*)_(.)(.*)",
 #'   values_to = "count"
 #' )
-#'
+#' head(even_longer_data)
 #' @inherit data_rename
 #' @export
 data_to_long <- function(data,
diff --git a/R/data_to_wide.R b/R/data_to_wide.R
index e7aa8f7d1..151140490 100644
--- a/R/data_to_wide.R
+++ b/R/data_to_wide.R
@@ -4,11 +4,20 @@
 #' the number of rows. This is a dependency-free base-R equivalent of
 #' `tidyr::pivot_wider()`.
 #'
-#' @param data A data frame to pivot.
-#' @param id_cols The name of the column that identifies the rows. If `NULL`,
-#' it will use all the unique rows.
-#' @param names_from The name of the column that contains the levels to be
-#' used as future column names.
+#' @param data A data frame to convert to wide format, so that it has more
+#' columns and fewer rows post-widening than pre-widening.
+#' @param id_cols The name of the column that identifies the rows in the data
+#' by which observations are grouped and the gathered data is spread into new
+#' columns. Usually, this is a variable containing an ID for observations that
+#' have been repeatedly measured. If `NULL`, it will use all remaining columns
+#' that are not in `names_from` or `values_from` as ID columns. `id_cols` can
+#' also be a character vector with more than one name of identifier columns. See
+#' also 'Details' and 'Examples'.
+#' @param names_from The name of the column in the original data whose values
+#' will be used for naming the new columns created in the widened data. Each
+#' unique value in this column will become the name of one of these new columns.
+#' In case `names_prefix` is provided, column names will be concatenated with
+#' the string given in `names_prefix`.
 #' @param names_prefix String added to the start of every variable name. This is
 #'  particularly useful if `names_from` is a numeric vector and you want to create
 #'  syntactic variable names.
@@ -19,17 +28,37 @@
 #' [glue specification](https://glue.tidyverse.org/index.html) that uses the
 #' `names_from` columns to create custom column names. Note that the only
 #' delimiters supported by `names_glue` are curly brackets, `{` and `}`.
-#' @param values_from The name of the column that contains the values to be used
-#' as future variable values.
+#' @param values_from The name of the columns in the original data that contains
+#' the values used to fill the new columns created in the widened data.
 #' @param values_fill Optionally, a (scalar) value that will be used to replace
 #' missing values in the new columns created.
 #' @param verbose Toggle warnings.
 #' @param ... Not used for now.
 #'
-#' @return If a tibble was provided as input, `reshape_wider()` also returns a
+#' @return If a tibble was provided as input, `data_to_wide()` also returns a
 #' tibble. Otherwise, it returns a data frame.
 #'
-#' @examples
+#' @details
+#' Reshaping data into wide format usually means that the input data frame is
+#' in _long_ format, where multiple measurements taken on the same subject are
+#' stored in multiple rows. The wide format stores the same information in a
+#' single row, with each measurement stored in a separate column. Thus, the
+#' necessary information for `data_to_wide()` is:
+#'
+#' - The name of the column(s) that identify the groups or repeated measurements
+#'   (`id_cols`).
+#' - The name of the column whose _values_ will become the new column names
+#'   (`names_from`). Since these values may not necessarily reflect appropriate
+#'   column names, you can use `names_prefix` to add a prefix to each newly
+#'   created column name.
+#' - The name of the column that contains the values (`values_from`) for the
+#'   new columns that are created by `names_from`.
+#'
+#' In other words: repeated measurements, as indicated by `id_cols`, that are
+#' saved into the column `values_from` will be spread into new columns, which
+#' will be named after the values in `names_from`. See also 'Examples'.
+#'
+#' @examplesIf requireNamespace("lme4", quietly = TRUE)
 #' data_long <- read.table(header = TRUE, text = "
 #'  subject sex condition measurement
 #'        1   M   control         7.9
@@ -45,7 +74,7 @@
 #'        4   M     cond1        13.4
 #'        4   M     cond2        12.9")
 #'
-#'
+#' # converting long data into wide format
 #' data_to_wide(
 #'   data_long,
 #'   id_cols = "subject",
@@ -53,6 +82,7 @@
 #'   values_from = "measurement"
 #' )
 #'
+#' # converting long data into wide format with custom column names
 #' data_to_wide(
 #'   data_long,
 #'   id_cols = "subject",
@@ -62,13 +92,13 @@
 #'   names_sep = "."
 #' )
 #'
+#' # converting long data into wide format, combining multiple columns
 #' production <- expand.grid(
 #'   product = c("A", "B"),
 #'   country = c("AI", "EI"),
 #'   year = 2000:2014
 #' )
 #' production <- data_filter(production, (product == "A" & country == "AI") | product == "B")
-#'
 #' production$production <- rnorm(nrow(production))
 #'
 #' data_to_wide(
@@ -78,9 +108,59 @@
 #'   names_glue = "prod_{product}_{country}"
 #' )
 #'
+#' # using the "sleepstudy" dataset
+#' data(sleepstudy, package = "lme4")
+#'
+#' # the sleepstudy data contains repeated measurements of average reaction
+#' # times for each subjects over multiple days, in a sleep deprivation study.
+#' # It is in long-format, i.e. each row corresponds to a single measurement.
+#' # The variable "Days" contains the timepoint of the measurement, and
+#' # "Reaction" contains the measurement itself. Converting this data to wide
+#' # format will create a new column for each day, with the reaction time as the
+#' # value.
+#' head(sleepstudy)
+#'
+#' data_to_wide(
+#'   sleepstudy,
+#'   id_cols = "Subject",
+#'   names_from = "Days",
+#'   values_from = "Reaction"
+#' )
+#'
+#' # clearer column names
+#' data_to_wide(
+#'   sleepstudy,
+#'   id_cols = "Subject",
+#'   names_from = "Days",
+#'   values_from = "Reaction",
+#'   names_prefix = "Reaction_Day_"
+#' )
+#'
+#' # For unequal group sizes, missing information is filled with NA
+#' d <- subset(sleepstudy, Days %in% c(0, 1, 2, 3, 4))[c(1:9, 11:13, 16:17, 21), ]
+#'
+#' # long format, different number of "Subjects"
+#' d
+#'
+#' data_to_wide(
+#'   d,
+#'   id_cols = "Subject",
+#'   names_from = "Days",
+#'   values_from = "Reaction",
+#'   names_prefix = "Reaction_Day_"
+#' )
+#'
+#' # filling missing values with 0
+#' data_to_wide(
+#'   d,
+#'   id_cols = "Subject",
+#'   names_from = "Days",
+#'   values_from = "Reaction",
+#'   names_prefix = "Reaction_Day_",
+#'   values_fill = 0
+#' )
 #' @inherit data_rename seealso
 #' @export
-
 data_to_wide <- function(data,
                          id_cols = NULL,
                          values_from = "Value",
@@ -238,7 +318,7 @@ data_to_wide <- function(data,
   # stop if some column names would be duplicated (follow tidyr workflow)
   if (any(unstacked$col_order %in% current_colnames)) {
     insight::format_error(
-      "Some values of the columns specified in 'names_from' are already present as column names.",
+      "Some values of the columns specified in `names_from` are already present as column names.",
       paste0(
         "Either use `names_prefix` or rename the following columns: ",
         text_concatenate(current_colnames[which(current_colnames %in% unstacked$col_order)])
diff --git a/R/data_write.R b/R/data_write.R
index 7e4e543d6..83457d64b 100644
--- a/R/data_write.R
+++ b/R/data_write.R
@@ -244,22 +244,20 @@ data_write <- function(data,
         value_labels <- value_labels[value_labels %in% unique(i)]
 
         # guess variable type
-        if (!is.character(i)) {
+        if (is.character(i)) {
+          # we need this to drop haven-specific class attributes
+          i <- as.character(i)
+        } else if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
           # if all values are labelled, we assume factor. Use labels as levels
-          if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
-            if (is.numeric(i)) {
-              i <- factor(i, labels = names(value_labels))
-            } else {
-              i <- factor(as.character(i), labels = names(value_labels))
-            }
-            i <- as.character(i)
+          if (is.numeric(i)) {
+            i <- factor(i, labels = names(value_labels))
           } else {
-            # else, fall back to numeric
-            i <- as.numeric(as.character(i))
+            i <- factor(as.character(i), labels = names(value_labels))
           }
-        } else {
-          # we need this to drop haven-specific class attributes
           i <- as.character(i)
+        } else {
+          # else, fall back to numeric
+          i <- as.numeric(as.character(i))
         }
         # add back variable label
         attr(i, "label") <- variable_labels
diff --git a/README.md b/README.md
index 411ad4c72..54f91794d 100644
--- a/README.md
+++ b/README.md
@@ -137,6 +137,9 @@ columns, can be achieved using `extract_column_names()` or
 # find column names matching a pattern
 extract_column_names(iris, starts_with("Sepal"))
 #> [1] "Sepal.Length" "Sepal.Width"
+```
+
+``` r
 
 # return data columns matching a pattern
 data_select(iris, starts_with("Sepal")) |> head()
@@ -155,6 +158,9 @@ It is also possible to extract one or more variables:
 # single variable
 data_extract(mtcars, "gear")
 #>  [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5 4
+```
+
+``` r
 
 # more variables
 head(data_extract(iris, ends_with("Width")))
@@ -215,11 +221,17 @@ x
 #> 1 1 a 5  1
 #> 2 2 b 6  2
 #> 3 3 c 7  3
+```
+
+``` r
 y
 #>   c d   e id
 #> 1 6 f 100  2
 #> 2 7 g 101  3
 #> 3 8 h 102  4
+```
+
+``` r
 
 data_merge(x, y, join = "full")
 #>    a    b c id    d   e
@@ -227,32 +239,50 @@ data_merge(x, y, join = "full")
 #> 1  2    b 6  2    f 100
 #> 2  3    c 7  3    g 101
 #> 4 NA <NA> 8  4    h 102
+```
+
+``` r
 
 data_merge(x, y, join = "left")
 #>   a b c id    d   e
 #> 3 1 a 5  1 <NA>  NA
 #> 1 2 b 6  2    f 100
 #> 2 3 c 7  3    g 101
+```
+
+``` r
 
 data_merge(x, y, join = "right")
 #>    a    b c id d   e
 #> 1  2    b 6  2 f 100
 #> 2  3    c 7  3 g 101
 #> 3 NA <NA> 8  4 h 102
+```
+
+``` r
 
 data_merge(x, y, join = "semi", by = "c")
 #>   a b c id
 #> 2 2 b 6  2
 #> 3 3 c 7  3
+```
+
+``` r
 
 data_merge(x, y, join = "anti", by = "c")
 #>   a b c id
 #> 1 1 a 5  1
+```
+
+``` r
 
 data_merge(x, y, join = "inner")
 #>   a b c id d   e
 #> 1 2 b 6  2 f 100
 #> 2 3 c 7  3 g 101
+```
+
+``` r
 
 data_merge(x, y, join = "bind")
 #>    a    b c id    d   e
@@ -323,13 +353,22 @@ tmp
 #> 3  3  3 NA  3
 #> 4 NA NA NA NA
 #> 5  5  5 NA  5
+```
+
+``` r
 
 # indices of empty columns or rows
 empty_columns(tmp)
 #> c 
 #> 3
+```
+
+``` r
 empty_rows(tmp)
 #> [1] 4
+```
+
+``` r
 
 # remove empty columns or rows
 remove_empty_columns(tmp)
@@ -339,12 +378,18 @@ remove_empty_columns(tmp)
 #> 3  3  3  3
 #> 4 NA NA NA
 #> 5  5  5  5
+```
+
+``` r
 remove_empty_rows(tmp)
 #>   a  b  c  d
 #> 1 1  1 NA  1
 #> 2 2 NA NA NA
 #> 3 3  3 NA  3
 #> 5 5  5 NA  5
+```
+
+``` r
 
 # remove empty columns and rows
 remove_empty(tmp)
@@ -365,6 +410,9 @@ table(x)
 #> x
 #>  1  2  3  4  5  6  7  8  9 10 
 #>  2  3  5  3  7  5  5  2 11  7
+```
+
+``` r
 
 # cut into 3 groups, based on distribution (quantiles)
 table(categorize(x, split = "quantile", n_groups = 3))
@@ -398,6 +446,9 @@ summary(swiss)
 #>  Mean   : 41.144   Mean   :19.94   
 #>  3rd Qu.: 93.125   3rd Qu.:21.70   
 #>  Max.   :100.000   Max.   :26.60
+```
+
+``` r
 
 # after
 summary(standardize(swiss))
@@ -436,6 +487,9 @@ anscombe
 #> 9  12 12 12  8 10.84 9.13  8.15  5.56
 #> 10  7  7  7  8  4.82 7.26  6.42  7.91
 #> 11  5  5  5  8  5.68 4.74  5.73  6.89
+```
+
+``` r
 
 # after
 winsorize(anscombe)
@@ -487,6 +541,9 @@ head(trees)
 #> 4  10.5     72   16.4
 #> 5  10.7     81   18.8
 #> 6  10.8     83   19.7
+```
+
+``` r
 
 # after
 head(ranktransform(trees))
@@ -519,6 +576,9 @@ x
 #> Mazda RX4     21.0   6  160 110
 #> Mazda RX4 Wag 21.0   6  160 110
 #> Datsun 710    22.8   4  108  93
+```
+
+``` r
 
 data_rotate(x)
 #>      Mazda RX4 Mazda RX4 Wag Datsun 710
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 9d8f23406..a3dd80b42 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -83,6 +83,7 @@ partialization
 patilindrajeets
 platykurtic
 poorman
+pre
 pth
 px
 readr
diff --git a/man/data_read.Rd b/man/data_read.Rd
index 6eda842ae..1ae3cea8a 100644
--- a/man/data_read.Rd
+++ b/man/data_read.Rd
@@ -123,7 +123,7 @@ their most appropriate type. The major difference to \code{rio::import()} is tha
 factors, where imported value labels will be set as factor levels. If a
 numeric variable has \emph{no} value labels or less value labels than values, it
 is not converted to factor. In this case, value labels are preserved as
-\code{"labels"} attribute. Character vectors are preserved.  Use
+\code{"labels"} attribute. Character vectors are preserved. Use
 \code{convert_factors = FALSE} to remove the automatic conversion of numeric
 variables to factors.
 }
diff --git a/man/data_restoretype.Rd b/man/data_restoretype.Rd
index 39a745154..a0ddc5dd0 100644
--- a/man/data_restoretype.Rd
+++ b/man/data_restoretype.Rd
@@ -7,7 +7,7 @@
 data_restoretype(data, reference = NULL, ...)
 }
 \arguments{
-\item{data}{A data frame to pivot.}
+\item{data}{A data frame for which to restore the column types.}
 
 \item{reference}{A reference data frame from which to find the correct
 column types. If \code{NULL}, each column is converted to numeric if it doesn't
diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd
index ea478c545..741725d25 100644
--- a/man/data_to_long.Rd
+++ b/man/data_to_long.Rd
@@ -38,7 +38,8 @@ reshape_longer(
 )
 }
 \arguments{
-\item{data}{A data frame to pivot.}
+\item{data}{A data frame to convert to long format, so that it has more
+rows and fewer columns after the operation.}
 
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
@@ -72,18 +73,22 @@ If \code{NULL}, selects all columns. Patterns that found no matches are silently
 ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))}
 will just return \code{"Species"}.}
 
-\item{names_to}{The name of the new column that will contain the column
-names.}
+\item{names_to}{The name of the new column (variable) that will contain the
+\emph{names} from columns in \code{select} as values, to identify the source of the
+values. \code{names_to} can be a character vector with more than one column name,
+in which case \code{names_sep} or \code{names_pattern} must be provided in order to
+identify which parts of the column names go into newly created columns.
+See also 'Examples'.}
 
 \item{names_prefix}{A regular expression used to remove matching text from
 the start of each variable name.}
 
 \item{names_sep, names_pattern}{If \code{names_to} contains multiple values, this
-argument controls how the column name is broken up.
-\code{names_pattern} takes a regular expression containing matching groups, i.e. "()".}
+argument controls how the column name is broken up. \code{names_pattern} takes a
+regular expression containing matching groups, i.e. "()".}
 
-\item{values_to}{The name of the new column that will contain the values of
-the pivoted variables.}
+\item{values_to}{The name of the new column that will contain the \emph{values} of
+the columns in \code{select}.}
 
 \item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA} in the
 \code{values_to} column. This effectively converts explicit missing values to
@@ -121,19 +126,73 @@ This function "lengthens" data, increasing the number of rows and decreasing
 the number of columns. This is a dependency-free base-R equivalent of
 \code{tidyr::pivot_longer()}.
 }
+\details{
+Reshaping data into long format usually means that the input data frame is
+in \emph{wide} format, where multiple measurements taken on the same subject are
+stored in multiple columns (variables). The long format stores the same
+information in a single column, with each measurement per subject stored in
+a separate row. The values of all variables that are not in \code{select} will
+be repeated.
+
+The necessary information for \code{data_to_long()} is:
+\itemize{
+\item The columns that contain the repeated measurements (\code{select}).
+\item The name of the newly created column that will contain the names of the
+columns in \code{select} (\code{names_to}), to identify the source of the values.
+\code{names_to} can also be a character vector with more than one column name,
+in which case \code{names_sep} or \code{names_pattern} must be provided to specify
+which parts of the column names go into the newly created columns.
+\item The name of the newly created column that contains the values of the
+columns in \code{select} (\code{values_to}).
+}
+
+In other words: repeated measurements that are spread across several columns
+will be gathered into a single column (\code{values_to}), with the original column
+names, that identify the source of the gathered values, stored in one or more
+new columns (\code{names_to}).
+}
 \examples{
 \dontshow{if (requireNamespace("psych") && requireNamespace("tidyr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-wide_data <- data.frame(replicate(5, rnorm(10)))
+wide_data <- setNames(
+  data.frame(replicate(2, rnorm(8))),
+  c("Time1", "Time2")
+)
+wide_data$ID <- 1:8
+wide_data
 
-# Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:5))
+# Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:3))
+# probably doesn't make much sense to mix "time" and "id"
 data_to_long(wide_data)
 
 # Customizing the names
-data_to_long(wide_data,
-  select = c(1, 2),
-  names_to = "Column",
-  values_to = "Numbers",
-  rows_to = "Row"
+data_to_long(
+  wide_data,
+  select = c("Time1", "Time2"),
+  names_to = "Timepoint",
+  values_to = "Score"
+)
+
+# Reshape multiple columns into long format.
+mydat <- data.frame(
+  age = c(20, 30, 40),
+  sex = c("Female", "Male", "Male"),
+  score_t1 = c(30, 35, 32),
+  score_t2 = c(33, 34, 37),
+  score_t3 = c(36, 35, 38),
+  speed_t1 = c(2, 3, 1),
+  speed_t2 = c(3, 4, 5),
+  speed_t3 = c(1, 8, 6)
+)
+# The column names are split into two columns: "type" and "time". The
+# pattern for splitting column names is provided in `names_pattern`. Values
+# of all "score_*" and "speed_*" columns are gathered into a single column
+# named "count".
+data_to_long(
+  mydat,
+  select = 3:8,
+  names_to = c("type", "time"),
+  names_pattern = "(score|speed)_t(\\\\d+)",
+  values_to = "count"
 )
 
 # Full example
@@ -141,20 +200,22 @@ data_to_long(wide_data,
 data <- psych::bfi # Wide format with one row per participant's personality test
 
 # Pivot long format
-data_to_long(data,
+very_long_data <- data_to_long(data,
   select = regex("\\\\d"), # Select all columns that contain a digit
   names_to = "Item",
   values_to = "Score",
   rows_to = "Participant"
 )
+head(very_long_data)
 
-data_to_long(
+even_longer_data <- data_to_long(
   tidyr::who,
   select = new_sp_m014:newrel_f65,
   names_to = c("diagnosis", "gender", "age"),
   names_pattern = "new_?(.*)_(.)(.*)",
   values_to = "count"
 )
+head(even_longer_data)
 \dontshow{\}) # examplesIf}
 }
 \seealso{
diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd
index e0f36b7e6..8b781fc76 100644
--- a/man/data_to_wide.Rd
+++ b/man/data_to_wide.Rd
@@ -32,16 +32,25 @@ reshape_wider(
 )
 }
 \arguments{
-\item{data}{A data frame to pivot.}
-
-\item{id_cols}{The name of the column that identifies the rows. If \code{NULL},
-it will use all the unique rows.}
-
-\item{values_from}{The name of the column that contains the values to be used
-as future variable values.}
-
-\item{names_from}{The name of the column that contains the levels to be
-used as future column names.}
+\item{data}{A data frame to convert to wide format, so that it has more
+columns and fewer rows post-widening than pre-widening.}
+
+\item{id_cols}{The name of the column that identifies the rows in the data
+by which observations are grouped and the gathered data is spread into new
+columns. Usually, this is a variable containing an ID for observations that
+have been repeatedly measured. If \code{NULL}, it will use all remaining columns
+that are not in \code{names_from} or \code{values_from} as ID columns. \code{id_cols} can
+also be a character vector with more than one name of identifier columns. See
+also 'Details' and 'Examples'.}
+
+\item{values_from}{The name of the columns in the original data that contains
+the values used to fill the new columns created in the widened data.}
+
+\item{names_from}{The name of the column in the original data whose values
+will be used for naming the new columns created in the widened data. Each
+unique value in this column will become the name of one of these new columns.
+In case \code{names_prefix} is provided, column names will be concatenated with
+the string given in \code{names_prefix}.}
 
 \item{names_sep}{If \code{names_from} or \code{values_from} contains multiple variables,
 this will be used to join their values together into a single string to use
@@ -64,7 +73,7 @@ missing values in the new columns created.}
 \item{...}{Not used for now.}
 }
 \value{
-If a tibble was provided as input, \code{reshape_wider()} also returns a
+If a tibble was provided as input, \code{data_to_wide()} also returns a
 tibble. Otherwise, it returns a data frame.
 }
 \description{
@@ -72,7 +81,29 @@ This function "widens" data, increasing the number of columns and decreasing
 the number of rows. This is a dependency-free base-R equivalent of
 \code{tidyr::pivot_wider()}.
 }
+\details{
+Reshaping data into wide format usually means that the input data frame is
+in \emph{long} format, where multiple measurements taken on the same subject are
+stored in multiple rows. The wide format stores the same information in a
+single row, with each measurement stored in a separate column. Thus, the
+necessary information for \code{data_to_wide()} is:
+\itemize{
+\item The name of the column(s) that identify the groups or repeated measurements
+(\code{id_cols}).
+\item The name of the column whose \emph{values} will become the new column names
+(\code{names_from}). Since these values may not necessarily reflect appropriate
+column names, you can use \code{names_prefix} to add a prefix to each newly
+created column name.
+\item The name of the column that contains the values (\code{values_from}) for the
+new columns that are created by \code{names_from}.
+}
+
+In other words: repeated measurements, as indicated by \code{id_cols}, that are
+saved into the column \code{values_from} will be spread into new columns, which
+will be named after the values in \code{names_from}. See also 'Examples'.
+}
 \examples{
+\dontshow{if (requireNamespace("lme4", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 data_long <- read.table(header = TRUE, text = "
  subject sex condition measurement
        1   M   control         7.9
@@ -88,7 +119,7 @@ data_long <- read.table(header = TRUE, text = "
        4   M     cond1        13.4
        4   M     cond2        12.9")
 
-
+# converting long data into wide format
 data_to_wide(
   data_long,
   id_cols = "subject",
@@ -96,6 +127,7 @@ data_to_wide(
   values_from = "measurement"
 )
 
+# converting long data into wide format with custom column names
 data_to_wide(
   data_long,
   id_cols = "subject",
@@ -105,13 +137,13 @@ data_to_wide(
   names_sep = "."
 )
 
+# converting long data into wide format, combining multiple columns
 production <- expand.grid(
   product = c("A", "B"),
   country = c("AI", "EI"),
   year = 2000:2014
 )
 production <- data_filter(production, (product == "A" & country == "AI") | product == "B")
-
 production$production <- rnorm(nrow(production))
 
 data_to_wide(
@@ -121,6 +153,58 @@ data_to_wide(
   names_glue = "prod_{product}_{country}"
 )
 
+# using the "sleepstudy" dataset
+data(sleepstudy, package = "lme4")
+
+# the sleepstudy data contains repeated measurements of average reaction
+# times for each subjects over multiple days, in a sleep deprivation study.
+# It is in long-format, i.e. each row corresponds to a single measurement.
+# The variable "Days" contains the timepoint of the measurement, and
+# "Reaction" contains the measurement itself. Converting this data to wide
+# format will create a new column for each day, with the reaction time as the
+# value.
+head(sleepstudy)
+
+data_to_wide(
+  sleepstudy,
+  id_cols = "Subject",
+  names_from = "Days",
+  values_from = "Reaction"
+)
+
+# clearer column names
+data_to_wide(
+  sleepstudy,
+  id_cols = "Subject",
+  names_from = "Days",
+  values_from = "Reaction",
+  names_prefix = "Reaction_Day_"
+)
+
+# For unequal group sizes, missing information is filled with NA
+d <- subset(sleepstudy, Days \%in\% c(0, 1, 2, 3, 4))[c(1:9, 11:13, 16:17, 21), ]
+
+# long format, different number of "Subjects"
+d
+
+data_to_wide(
+  d,
+  id_cols = "Subject",
+  names_from = "Days",
+  values_from = "Reaction",
+  names_prefix = "Reaction_Day_"
+)
+
+# filling missing values with 0
+data_to_wide(
+  d,
+  id_cols = "Subject",
+  names_from = "Days",
+  values_from = "Reaction",
+  names_prefix = "Reaction_Day_",
+  values_fill = 0
+)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \itemize{
diff --git a/tests/testthat/test-data_to_wide.R b/tests/testthat/test-data_to_wide.R
index 88fbb699e..d716ff222 100644
--- a/tests/testthat/test-data_to_wide.R
+++ b/tests/testthat/test-data_to_wide.R
@@ -35,7 +35,7 @@ test_that("data_to_wide works", {
       values_from = "value",
       id_cols = "Row_ID"
     ),
-    regexp = "Some values of the columns specified in 'names_from'"
+    regexp = "Some values of the columns specified in `names_from`"
   )
 })
 
@@ -44,12 +44,12 @@ test_that("data_to_wide works", {
 test_that("data_to_wide, names_prefix works", {
   skip_if_not_installed("tidyr")
 
-  out <- tidyr::fish_encounters %>%
-    data_to_wide(
-      names_from = "station",
-      values_from = "seen",
-      names_prefix = "foo_"
-    )
+  out <- data_to_wide(
+    tidyr::fish_encounters,
+    names_from = "station",
+    values_from = "seen",
+    names_prefix = "foo_"
+  )
 
   expect_named(
     out,
@@ -67,12 +67,12 @@ test_that("data_to_wide, values_fill works", {
 
   ### Should be numeric
   expect_identical(
-    data %>%
-      data_to_wide(
-        names_from = "station",
-        values_from = "seen",
-        values_fill = 1
-      ),
+    data_to_wide(
+      data,
+      names_from = "station",
+      values_from = "seen",
+      values_fill = 1
+    ),
     tidyr::tibble(
       fish = factor(
         c("4842", "4843", "4844"),
@@ -91,21 +91,21 @@ test_that("data_to_wide, values_fill works", {
     )
   )
   expect_error(
-    data %>%
-      data_to_wide(
-        names_from = "station",
-        values_from = "seen",
-        values_fill = "a"
-      ),
+    data_to_wide(
+      data,
+      names_from = "station",
+      values_from = "seen",
+      values_fill = "a"
+    ),
     regexp = "must be of type numeric"
   )
   expect_error(
-    data %>%
-      data_to_wide(
-        names_from = "station",
-        values_from = "seen",
-        values_fill = factor("a")
-      ),
+    data_to_wide(
+      data,
+      names_from = "station",
+      values_from = "seen",
+      values_fill = factor("a")
+    ),
     regexp = "must be of type numeric"
   )
 
@@ -120,12 +120,12 @@ test_that("data_to_wide, values_fill works", {
   contacts$person_id <- cumsum(contacts$field == "name")
 
   expect_identical(
-    contacts %>%
-      data_to_wide(
-        names_from = "field",
-        values_from = "value",
-        values_fill = "foo"
-      ),
+    data_to_wide(
+      contacts,
+      names_from = "field",
+      values_from = "value",
+      values_fill = "foo"
+    ),
     tidyr::tibble(
       person_id = 1:3,
       name = c("Jiena McLellan", "John Smith", "Huxley Ratcliffe"),
@@ -133,42 +133,42 @@ test_that("data_to_wide, values_fill works", {
     )
   )
   expect_error(
-    contacts %>%
-      data_to_wide(
-        names_from = "field",
-        values_from = "value",
-        values_fill = 1
-      ),
+    data_to_wide(
+      contacts,
+      names_from = "field",
+      values_from = "value",
+      values_fill = 1
+    ),
     regexp = "must be of type character"
   )
   expect_error(
-    contacts %>%
-      data_to_wide(
-        names_from = "field",
-        values_from = "value",
-        values_fill = factor("a")
-      ),
+    data_to_wide(
+      contacts,
+      names_from = "field",
+      values_from = "value",
+      values_fill = factor("a")
+    ),
     regexp = "must be of type character"
   )
 
   ### Should be factor
   contacts$value <- as.factor(contacts$value)
   expect_error(
-    contacts %>%
-      data_to_wide(
-        names_from = "field",
-        values_from = "value",
-        values_fill = "a"
-      ),
+    data_to_wide(
+      contacts,
+      names_from = "field",
+      values_from = "value",
+      values_fill = "a"
+    ),
     regexp = "must be of type factor"
   )
   expect_error(
-    contacts %>%
-      data_to_wide(
-        names_from = "field",
-        values_from = "value",
-        values_fill = 1
-      ),
+    data_to_wide(
+      contacts,
+      names_from = "field",
+      values_from = "value",
+      values_fill = 1
+    ),
     regexp = "must be of type factor"
   )
 })
@@ -177,12 +177,12 @@ test_that("data_to_wide, values_fill errors when length > 1", {
   skip_if_not_installed("tidyr")
 
   expect_error(
-    tidyr::fish_encounters %>%
-      data_to_wide(
-        names_from = "station",
-        values_from = "seen",
-        values_fill = c(1, 2)
-      ),
+    data_to_wide(
+      tidyr::fish_encounters,
+      names_from = "station",
+      values_from = "seen",
+      values_fill = c(1, 2)
+    ),
     regexp = "must be of length 1"
   )
 })
@@ -302,15 +302,19 @@ test_that("data_to_wide, id_cols works correctly, #293", {
 test_that("data_to_wide equivalent to pivot_wider: ex 1", {
   skip_if_not_installed("tidyr")
 
-  x <- tidyr::fish_encounters %>%
-    tidyr::pivot_wider(names_from = "station", values_from = "seen", values_fill = 0)
+  x <- tidyr::pivot_wider(
+    tidyr::fish_encounters,
+    names_from = "station",
+    values_from = "seen",
+    values_fill = 0
+  )
 
-  y <- tidyr::fish_encounters %>%
-    data_to_wide(
-      names_from = "station",
-      values_from = "seen",
-      values_fill = 0
-    )
+  y <- data_to_wide(
+    tidyr::fish_encounters,
+    names_from = "station",
+    values_from = "seen",
+    values_fill = 0
+  )
 
   expect_equal(x, y, ignore_attr = TRUE)
 })
@@ -394,11 +398,8 @@ test_that("data_to_wide equivalent to pivot_wider: ex 5", {
   )
   contacts$person_id <- cumsum(contacts$field == "name")
 
-  x <- contacts %>%
-    tidyr::pivot_wider(names_from = field, values_from = value)
-
-  y <- contacts %>%
-    data_to_wide(names_from = "field", values_from = "value")
+  x <- tidyr::pivot_wider(contacts, names_from = field, values_from = value)
+  y <- data_to_wide(contacts, names_from = "field", values_from = "value")
 
   expect_identical(x, y)
 })