diff --git a/DESCRIPTION b/DESCRIPTION index 3afc557..cc5a67c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,6 @@ Imports: stringr, base, readr, - lifecycle, huxtable, crayon, data.table, @@ -61,7 +60,8 @@ Imports: sp, withr, cli, - purrr + purrr, + lifecycle RoxygenNote: 7.3.1 Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index f63564d..07bc09a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(convert_datetime_format) export(convert_long_to_utm) export(convert_utm_to_ll) export(create_datastore_script) +export(document_missing_values) export(fix_utc_offset) export(fuzz_location) export(generate_ll_from_utm) diff --git a/NEWS.md b/NEWS.md index c631921..cedb49b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # QCkit v0.1.8 (not yet released) +2024-07-16 +* Added experimental function `document_missing_values()`, which searches a file for multiple missing value codes, replaces them all with NA, and generates a new column with the missing value codes so that they can be properly documented in EML. This is a work-around for the fact that there is currently not a good way to get multiple missing value codes in a single column via EMLassemblyline. This function is still under development; expect substantial changes an improvements up to and including removing the function entirely. + 2024-07-09 * Added function `get_user_email()`, which accesses NPS active directory via a powershell function to return the user's email address. Probably won't work for non-NPS users and probably won't work for non-windows users. * Updated rest API from legacy v6 to current v7. diff --git a/R/replace_blanks.R b/R/replace_blanks.R index 6b9a835..43f9bc6 100644 --- a/R/replace_blanks.R +++ b/R/replace_blanks.R @@ -93,3 +93,86 @@ replace_blanks <- function(directory = here::here(), missing_val_code = NA) { } return(invisible()) } + + +#' Handles multiple missing values +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' `r lifecycle::badge("questioning")` +#' Given a file name (.csv only) and path, the function will search the +#' columns for any that contain multiple user-specified missing value codes. +#' For any column with multiple missing value codes, all the missing values +#' (including blanks) will be replaced with NA. A new column will be generated +#' and, populated with the given missing value code from the origin column. +#' Values that were not missing will be populated with "not_missing". The +#' newly generate column of categorical variables can be used do describe +#' the various/multiple reasons for why data is absent in the original column. +#' +#' The function will then write the new dataframe to a file, overwriting the +#' original file. If it is important to keep a copy of the original file, make +#' a copy prior to running the function. +#' +#' WARNING: this function will replace any blank cells in your data with NA! +#' +#' @details Blank cells will be treated as NA. +#' +#' @param file_name String. The name of the file to inspect +#' @param directory String. Location of file to read/write. Defaults to the current working directory. +#' @param colname `r lifecycle::badge("experimental")` String. The columns to inspect. CURRENTLY ONLY WORKS AS SET TO DEFAULT "NA". +#' @param missing_val_codes List. A list of strings containing the missing value code or codes to search for. +#' @param replace_value String. The value (singular) to replace multiple missing values with. Defaults to NA. +#' +#' @return writes a new dataframe to file. Return invisible. +#' @export +#' +#' @examples +#' \dontrun{ +#' document_missing_values(file_name = "mydata.csv", +#' directory = here::here(), +#' colname = NA, #do not change during function development +#' missing_val_codes = c("missing", "blank", "no data"), +#' replace_value = NA) +#' } +document_missing_values <- function(file_name, + directory = here::here(), + colname = NA, + missing_val_codes = NA, + replace_value = NA) { + + #read in a dataframe: + df <- readr::read_csv(paste0(directory, "/", file_name), + show_col_types = FALSE) + #generate list of missing values + missing_val_codes <- append(missing_val_codes, NA) + missing_val_codes <- unique(missing_val_codes) + + data_names <- colnames(df) + + if (is.na(colname)) { + y <- ncol(df) + for (i in 1:y) { + #if here are multiple missing value codes in a column: + if (sum(df[[data_names[i]]] %in% missing_val_codes) > + sum(is.na(df[[data_names[i]]]))) { + #generate new column of data: + df$x <- with(df, + ifelse(df[[data_names[i]]] %in% missing_val_codes, + df[[data_names[i]]], "not_missing")) + #replace old missing values with replacement value + df[[data_names[i]]] = ifelse(df[[data_names[i]]] %in% + missing_val_codes, + replace_value, df[[data_names[i]]]) + #rename new column: + names(df)[names(df) == "x"] <- paste0("custom_", + data_names[i], + "_MissingValues") + } + } + } + #write the file back out: + readr::write_csv(df, paste0(directory, "/", file_name)) + + return(invisible) + +} diff --git a/docs/index.html b/docs/index.html index 5fb3f58..2573281 100644 --- a/docs/index.html +++ b/docs/index.html @@ -123,7 +123,7 @@

Links

License

diff --git a/docs/news/index.html b/docs/news/index.html index fd274e8..9984134 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -65,6 +65,7 @@

Changelog

+

2024-07-16 * Added experimental function document_missing_values(), which searches a file for multiple missing value codes, replaces them all with NA, and generates a new column with the missing value codes so that they can be properly documented in EML. This is a work-around for the fact that there is currently not a good way to get multiple missing value codes in a single column via EMLassemblyline. This function is still under development; expect substantial changes an improvements up to and including removing the function entirely.

2024-07-09 * Added function get_user_email(), which accesses NPS active directory via a powershell function to return the user’s email address. Probably won’t work for non-NPS users and probably won’t work for non-windows users. * Updated rest API from legacy v6 to current v7.

2024-06-28 * Updated get_park_polygon() to use the new API (had been using a legacy API). Added documentation to specify that the function is getting the convexhull for the park, which may not work particularly well for some parks. 2024-06-27 * bug fixes for generate_ll_from_utm() * add function remove_empty_tables() (and associated unit tests) * update documentation for replace blanks() to indicate it can replace blanks with more than just NA

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6e19b1b..e07e1b3 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -5,5 +5,5 @@ articles: DRR_Purpose_and_Scope: DRR_Purpose_and_Scope.html Starting-a-DRR: Starting-a-DRR.html Using-the-DRR-Template: Using-the-DRR-Template.html -last_built: 2024-07-09T14:49Z +last_built: 2024-07-16T15:01Z diff --git a/docs/reference/document_missing_values.html b/docs/reference/document_missing_values.html new file mode 100644 index 0000000..c86f3d0 --- /dev/null +++ b/docs/reference/document_missing_values.html @@ -0,0 +1,174 @@ + +Handles multiple missing values — document_missing_values • QCkit + + +
+
+ + + +
+
+ + +
+

[Experimental] +[Questioning] +Given a file name (.csv only) and path, the function will search the +columns for any that contain multiple user-specified missing value codes. +For any column with multiple missing value codes, all the missing values +(including blanks) will be replaced with NA. A new column will be generated +and, populated with the given missing value code from the origin column. +Values that were not missing will be populated with "not_missing". The +newly generate column of categorical variables can be used do describe +the various/multiple reasons for why data is absent in the original column.

+

The function will then write the new dataframe to a file, overwriting the +original file. If it is important to keep a copy of the original file, make +a copy prior to running the function.

+

WARNING: this function will replace any blank cells in your data with NA!

+
+ +
+
document_missing_values(
+  file_name,
+  directory = here::here(),
+  colname = NA,
+  missing_val_codes = NA,
+  replace_value = NA
+)
+
+ +
+

Arguments

+
file_name
+

String. The name of the file to inspect

+ + +
directory
+

String. Location of file to read/write. Defaults to the current working directory.

+ + +
colname
+

[Experimental] String. The columns to inspect. CURRENTLY ONLY WORKS AS SET TO DEFAULT "NA".

+ + +
missing_val_codes
+

List. A list of strings containing the missing value code or codes to search for.

+ + +
replace_value
+

String. The value (singular) to replace multiple missing values with. Defaults to NA.

+ +
+
+

Value

+ + +

writes a new dataframe to file. Return invisible.

+
+
+

Details

+

Blank cells will be treated as NA.

+
+ +
+

Examples

+
if (FALSE) {
+document_missing_values(file_name = "mydata.csv",
+                        directory = here::here(),
+                        colname = NA, #do not change during function development
+                        missing_val_codes = c("missing", "blank", "no data"),
+                        replace_value = NA)
+                        }
+
+
+
+ +
+ + +
+ + + + + + + + diff --git a/docs/reference/figures/lifecycle-archived.svg b/docs/reference/figures/lifecycle-archived.svg index 48f72a6..745ab0c 100644 --- a/docs/reference/figures/lifecycle-archived.svg +++ b/docs/reference/figures/lifecycle-archived.svg @@ -1 +1,21 @@ - lifecyclelifecyclearchivedarchived \ No newline at end of file + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/docs/reference/figures/lifecycle-defunct.svg b/docs/reference/figures/lifecycle-defunct.svg index 01452e5..d5c9559 100644 --- a/docs/reference/figures/lifecycle-defunct.svg +++ b/docs/reference/figures/lifecycle-defunct.svg @@ -1 +1,21 @@ -lifecyclelifecycledefunctdefunct \ No newline at end of file + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/docs/reference/figures/lifecycle-deprecated.svg b/docs/reference/figures/lifecycle-deprecated.svg index 4baaee0..b61c57c 100644 --- a/docs/reference/figures/lifecycle-deprecated.svg +++ b/docs/reference/figures/lifecycle-deprecated.svg @@ -1 +1,21 @@ -lifecyclelifecycledeprecateddeprecated \ No newline at end of file + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/docs/reference/figures/lifecycle-experimental.svg b/docs/reference/figures/lifecycle-experimental.svg index d1d060e..5d88fc2 100644 --- a/docs/reference/figures/lifecycle-experimental.svg +++ b/docs/reference/figures/lifecycle-experimental.svg @@ -1 +1,21 @@ -lifecyclelifecycleexperimentalexperimental \ No newline at end of file + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/docs/reference/figures/lifecycle-maturing.svg b/docs/reference/figures/lifecycle-maturing.svg index df71310..897370e 100644 --- a/docs/reference/figures/lifecycle-maturing.svg +++ b/docs/reference/figures/lifecycle-maturing.svg @@ -1 +1,21 @@ -lifecyclelifecyclematuringmaturing \ No newline at end of file + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/docs/reference/figures/lifecycle-questioning.svg b/docs/reference/figures/lifecycle-questioning.svg index 08ee0c9..7c1721d 100644 --- a/docs/reference/figures/lifecycle-questioning.svg +++ b/docs/reference/figures/lifecycle-questioning.svg @@ -1 +1,21 @@ -lifecyclelifecyclequestioningquestioning \ No newline at end of file + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/docs/reference/figures/lifecycle-soft-deprecated.svg b/docs/reference/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 0000000..9c166ff --- /dev/null +++ b/docs/reference/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/docs/reference/figures/lifecycle-stable.svg b/docs/reference/figures/lifecycle-stable.svg index e015dc8..9bf21e7 100644 --- a/docs/reference/figures/lifecycle-stable.svg +++ b/docs/reference/figures/lifecycle-stable.svg @@ -1 +1,29 @@ -lifecyclelifecyclestablestable \ No newline at end of file + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/docs/reference/figures/lifecycle-superseded.svg b/docs/reference/figures/lifecycle-superseded.svg index 75f24f5..db8d757 100644 --- a/docs/reference/figures/lifecycle-superseded.svg +++ b/docs/reference/figures/lifecycle-superseded.svg @@ -1 +1,21 @@ - lifecyclelifecyclesupersededsuperseded \ No newline at end of file + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 268af56..b7d29f1 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -90,6 +90,10 @@

All functions create_datastore_script()

Turn a GitHub release into a DataStore Script Reference

+ +

document_missing_values()

+ +

Handles multiple missing values

.get_unit_boundary()

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 31e79b0..ec11d5b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -51,6 +51,9 @@ /reference/DC_col_check.html + + /reference/document_missing_values.html + /reference/dot-get_unit_boundary.html diff --git a/man/document_missing_values.Rd b/man/document_missing_values.Rd new file mode 100644 index 0000000..62aa6c2 --- /dev/null +++ b/man/document_missing_values.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/replace_blanks.R +\name{document_missing_values} +\alias{document_missing_values} +\title{Handles multiple missing values} +\usage{ +document_missing_values( + file_name, + directory = here::here(), + colname = NA, + missing_val_codes = NA, + replace_value = NA +) +} +\arguments{ +\item{file_name}{String. The name of the file to inspect} + +\item{directory}{String. Location of file to read/write. Defaults to the current working directory.} + +\item{colname}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} String. The columns to inspect. CURRENTLY ONLY WORKS AS SET TO DEFAULT "NA".} + +\item{missing_val_codes}{List. A list of strings containing the missing value code or codes to search for.} + +\item{replace_value}{String. The value (singular) to replace multiple missing values with. Defaults to NA.} +} +\value{ +writes a new dataframe to file. Return invisible. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#questioning}{\figure{lifecycle-questioning.svg}{options: alt='[Questioning]'}}}{\strong{[Questioning]}} +Given a file name (.csv only) and path, the function will search the +columns for any that contain multiple user-specified missing value codes. +For any column with multiple missing value codes, all the missing values +(including blanks) will be replaced with NA. A new column will be generated +and, populated with the given missing value code from the origin column. +Values that were not missing will be populated with "not_missing". The +newly generate column of categorical variables can be used do describe +the various/multiple reasons for why data is absent in the original column. + +The function will then write the new dataframe to a file, overwriting the +original file. If it is important to keep a copy of the original file, make +a copy prior to running the function. + +WARNING: this function will replace any blank cells in your data with NA! +} +\details{ +Blank cells will be treated as NA. +} +\examples{ +\dontrun{ +document_missing_values(file_name = "mydata.csv", + directory = here::here(), + colname = NA, #do not change during function development + missing_val_codes = c("missing", "blank", "no data"), + replace_value = NA) + } +} diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg index 48f72a6..745ab0c 100644 --- a/man/figures/lifecycle-archived.svg +++ b/man/figures/lifecycle-archived.svg @@ -1 +1,21 @@ - lifecyclelifecyclearchivedarchived \ No newline at end of file + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg index 01452e5..d5c9559 100644 --- a/man/figures/lifecycle-defunct.svg +++ b/man/figures/lifecycle-defunct.svg @@ -1 +1,21 @@ -lifecyclelifecycledefunctdefunct \ No newline at end of file + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg index 4baaee0..b61c57c 100644 --- a/man/figures/lifecycle-deprecated.svg +++ b/man/figures/lifecycle-deprecated.svg @@ -1 +1,21 @@ -lifecyclelifecycledeprecateddeprecated \ No newline at end of file + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg index d1d060e..5d88fc2 100644 --- a/man/figures/lifecycle-experimental.svg +++ b/man/figures/lifecycle-experimental.svg @@ -1 +1,21 @@ -lifecyclelifecycleexperimentalexperimental \ No newline at end of file + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg index df71310..897370e 100644 --- a/man/figures/lifecycle-maturing.svg +++ b/man/figures/lifecycle-maturing.svg @@ -1 +1,21 @@ -lifecyclelifecyclematuringmaturing \ No newline at end of file + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg index 08ee0c9..7c1721d 100644 --- a/man/figures/lifecycle-questioning.svg +++ b/man/figures/lifecycle-questioning.svg @@ -1 +1,21 @@ -lifecyclelifecyclequestioningquestioning \ No newline at end of file + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 0000000..9c166ff --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg index e015dc8..9bf21e7 100644 --- a/man/figures/lifecycle-stable.svg +++ b/man/figures/lifecycle-stable.svg @@ -1 +1,29 @@ -lifecyclelifecyclestablestable \ No newline at end of file + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg index 75f24f5..db8d757 100644 --- a/man/figures/lifecycle-superseded.svg +++ b/man/figures/lifecycle-superseded.svg @@ -1 +1,21 @@ - lifecyclelifecyclesupersededsuperseded \ No newline at end of file + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + +