AEstebanMar · AEstebanMar · Nov 14, 2023 · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,2 +1,4 @@
 ^data-raw$
 ^LICENSE\.md$
+^README\.Rmd$
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,29 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: DAGGER
 Title: Drug repositioning by Analysis of GWAS and Gene Expression in R
-Version: 1.0
+Version: 1.1
 Authors@R: person("Alvaro", "Esteban-Martos", , "[email protected]", role = c("aut", "cre"),
   comment = c(ORCID = "ORCID: 0000-0001-9791-7779"))
 Description: Tools to perform drug repositioning analysis in R by merging
@@ -11,10 +11,11 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Suggests: 
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0)
 VignetteBuilder: knitr
 Depends: 
-    R (>= 2.10)
+    R (>= 4.3.1)
 Imports:
     ggplot2,
     plyr
@@ -23,3 +24,4 @@ LazyDataCompression: xz
 URL: https://github.com/AEstebanMar/DAGGER
 BugReports: https://github.com/AEstebanMar/DAGGER/issues
 License: MIT + file LICENSE
+Config/testthat/edition: 3
diff --git a/R/input_filter.R b/R/input_filter.R
@@ -55,9 +55,9 @@ remove_duplicate_rs <- function(df) {
 
 filter_significance <- function(df, value = 0.05) {
 	if(is.null(df$p_value)) {
-		message("No statistical significance column found in input or improperly
+		warning("No statistical significance column found in input or improperly
 			parsed. You might want to run it through DAGGER::parse_column_names
-			first. Returning it as-is")
+			first. Returning it as-is", immediate. = TRUE)
 		return(df)
 	}
 	res <- df[as.numeric(df$p_value) <= value, ]

diff --git a/R/merge_gene_var_drug.R b/R/merge_gene_var_drug.R
@@ -21,16 +21,29 @@ merge_gene_var_drug <- function(GWAS, GTEx, DGIdb) {
 	GWAS <- remove_duplicate_rs(
 				filter_significance(parse_column_names(GWAS), 0.05)
 				)
+	GWAS_p_val_column <- grep("p_value", colnames(GWAS))
+	if (length(GWAS_p_val_column) > 0) {
+		colnames(GWAS)[GWAS_p_val_column] <- "p_val_variant"
+	}
 	message('\n\nParsing expression data\n\n')
 	GTEx <- filter_significance(parse_column_names(GTEx), 0.05)
+	GTEx_p_val_column <- grep("p_value", colnames(GTEx))
+	if (length(GTEx_p_val_column) > 0) {
+		colnames(GTEx)[GTEx_p_val_column] <- "p_val_nominal"
+	}
 	message('\n\nParsing drug-gene data\n\n')
 	DGIdb <- parse_column_names(DGIdb)
 
 	message('Merging genes and variants')
 	gene_variants <- merge(GWAS, GTEx, by = "rs_id")
-	colnames(gene_variants)[c(2, 10)] <- c("p_val_variant", "p_val_nominal")
 
 	message('Merging with drug database')
 	res <- merge(gene_variants, DGIdb, by = "gene_symbol")
+	if (!is.null(res$beta_number)) {
+		res$beta_number <- as.numeric(res$beta_number)
+	}
+	if (!is.null(res$slope)) {
+		res$slope <- as.numeric(res$slope)
+	}
 	return(res)
 }
diff --git a/R/repositioning.R b/R/repositioning.R
@@ -25,10 +25,9 @@
 #' @export
 
 predict_effect <- function(gene_variant_df) {
-
 	message('Predicting beneficial drug effect')
-	betas <- gene_variant_df$beta_number < 0
-	slopes <- gene_variant_df$slope > 0
+	betas <- as.numeric(gene_variant_df$beta_number) < 0
+	slopes <- as.numeric(gene_variant_df$slope) > 0
 	prediction <- betas == slopes
 	prediction[prediction == TRUE] <- "activator"
 	prediction[prediction == FALSE] <- "inhibitor"

diff --git a/README.Rmd b/README.Rmd
@@ -0,0 +1,56 @@
+---
+output: github_document
+---
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "man/figures/README-",
+  out.width = "100%"
+)
+```
+
+# DAGGER
+
+<!-- badges: start -->
+[![R-CMD-check](https://github.com/AEstebanMar/DAGGER/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/AEstebanMar/DAGGER/actions/workflows/R-CMD-check.yaml)
+<!-- badges: end -->
+
+The goal of DAGGER is to ...
+
+## Installation
+
+You can install the development version of DAGGER from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("AEstebanMar/DAGGER")
+```
+
+## Example
+
+This is a basic example which shows you how to solve a common problem:
+
+```{r example}
+library(DAGGER)
+## basic example code
+```
+
+What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so:
+
+```{r cars}
+summary(cars)
+```
+
+You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date. `devtools::build_readme()` is handy for this.
+
+You can also embed plots, for example:
+
+```{r pressure, echo = FALSE}
+plot(pressure)
+```
+
+In that case, don't forget to commit and push the resulting figure files, so they display on GitHub and CRAN.
diff --git a/README.md b/README.md
@@ -0,0 +1,54 @@
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+# DAGGER
+
+<!-- badges: start -->
+
+[![R-CMD-check](https://github.com/AEstebanMar/DAGGER/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/AEstebanMar/DAGGER/actions/workflows/R-CMD-check.yaml)
+<!-- badges: end -->
+
+The goal of DAGGER is to …
+
+## Installation
+
+You can install the development version of DAGGER from
+[GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("AEstebanMar/DAGGER")
+```
+
+## Example
+
+This is a basic example which shows you how to solve a common problem:
+
+``` r
+library(DAGGER)
+## basic example code
+```
+
+What is special about using `README.Rmd` instead of just `README.md`?
+You can include R chunks like so:
+
+``` r
+summary(cars)
+#>      speed           dist       
+#>  Min.   : 4.0   Min.   :  2.00  
+#>  1st Qu.:12.0   1st Qu.: 26.00  
+#>  Median :15.0   Median : 36.00  
+#>  Mean   :15.4   Mean   : 42.98  
+#>  3rd Qu.:19.0   3rd Qu.: 56.00  
+#>  Max.   :25.0   Max.   :120.00
+```
+
+You’ll still need to render `README.Rmd` regularly, to keep `README.md`
+up-to-date. `devtools::build_readme()` is handy for this.
+
+You can also embed plots, for example:
+
+<img src="man/figures/README-pressure-1.png" width="100%" />
+
+In that case, don’t forget to commit and push the resulting figure
+files, so they display on GitHub and CRAN.
diff --git a/data-raw/GTEx.R b/data-raw/GTEx.R
@@ -9,6 +9,7 @@ GTEx <- read.table ("Merged_eQTL.txt", header=TRUE,
 					"gene_name", "strand", "variant_id", "tss_distance",
 					"rs_id_dbSNP151_GRCh38p7","pval_nominal","slope")]
 GTEx$pval_nominal <- as.numeric(GTEx$pval_nominal)
+GTEx$slope <- as.numeric(GTEx$slope)
 colnames(GTEx)[4] <- "variant_information"
 
 

diff --git a/data/GTEx.rda b/data/GTEx.rda
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(DAGGER)
+
+test_check("DAGGER")
diff --git a/tests/testthat/test-DAGGER.R b/tests/testthat/test-DAGGER.R
@@ -0,0 +1,30 @@
+test_that("DAGGER basic pipeline works", {
+  test_GWAS <- data.frame(snp = c("rs01", "rs02", "rs03", "rs04"),
+                          beta_number = c(1, -1, 1, -1),
+                          p_value = rep(2e-7, 4))
+  test_GTEx <- data.frame(rs = c("rs01", "rs02", "rs03", "rs04"),
+                          gene_symbol = c("gene01", "gene02", "gene03", "gene04"),
+                          p_value = rep(1e-08, 4),
+                          slope = c(1, 1, -1, -1))
+  test_DGIdb <- data.frame(drug_name = c("drug01", "drug02",
+                                         "drug03", "drug04"),
+                           interaction_types = c("activator", "agonist",
+                                                 "antagonist", "inhibitor"),
+                           gene_symbol = c("gene01", "gene02", "gene03", "gene04"))
+  test_results <- suppressWarnings(DAGGER(test_GWAS, test_GTEx, test_DGIdb))
+  expected_results <- data.frame(gene_symbol = c("gene01", "gene02",
+                                                 "gene03", "gene04"),
+                                 rs_id = c("rs01", "rs02", "rs03", "rs04"),
+                                 beta_number = c(1, -1, 1, -1),
+                                 p_val_variant = rep(2e-7, 4), 
+                                 p_val_nominal = rep(1e-08, 4),
+                                 slope = c(1, 1, -1, -1),
+                                 drug_name = c("drug01", "drug02",
+                                               "drug03", "drug04"),
+                                 interaction_types = c("activator", "agonist",
+                                                    "antagonist", "inhibitor"),
+                                 prediction = c("inhibitor", "activator",
+                                                "activator", "inhibitor"),
+                                 candidate = c(FALSE, TRUE, FALSE, TRUE))
+  expect_equal(test_results, expected_results)
+})
diff --git a/tests/testthat/test-input_filter.R b/tests/testthat/test-input_filter.R
@@ -0,0 +1,33 @@
+test_that("remove_duplicate_rs fails if no rs_id column exists", {
+  test_df <- data.frame(kingdoms=c("Daggerfall", "Sentinel",
+                                    "Wayrest", "Orsinium"))
+  expect_error(remove_duplicate_rs(test_df), "column not found")
+})
+
+test_that("remove_duplicate_rs sorts by statistical significance if possible", {
+  test_df <- data.frame(rs_id = c("rs4", "rs3", "rs2", "rs1"),
+                        p_value = 4:1)
+  test_df <- print(test_df, row.names=FALSE)
+  expected_df <- data.frame(rs_id = c ("rs1", "rs2", "rs3", "rs4"),
+                            p_value = 1:4)
+  rownames(expected_df) <- 4:1
+  expect_equal(remove_duplicate_rs(test_df), expected_df)
+  })
+
+test_that("remove_duplicate_rs works as intended and chooses most significant one", {
+  test_df <- data.frame(rs_id = c("rs1", "rs2", "rs1", "rs3"), p_value = 4:1)
+  expected_df <- data.frame(rs_id = c("rs3", "rs1", "rs2"), p_value = 1:3)
+  rownames(expected_df) <- 4:2
+  expect_equal(remove_duplicate_rs(test_df), expected_df)
+  })
+
+test_that("filter_significance filters correctly by p-value", {
+  test_df <- data.frame(foo = rep("bar", 50), p_value = 1:50)
+  expected_df <- data.frame(foo = rep("bar", 25), p_value = 1:25)
+  expect_equal(filter_significance(test_df, 25), expected_df)
+  })
+
+test_that("filter_significance fails if no p-value column exists", {
+  test_df <- data.frame(not_p_value = integer(0))
+  expect_warning(filter_significance(test_df, "No statistical significance"))
+  })
diff --git a/tests/testthat/test-merge_gene_var_drug.R b/tests/testthat/test-merge_gene_var_drug.R
@@ -0,0 +1,71 @@
+test_that("merge_gene_var_drug works as intended", {
+  dummy_GWAS <- data.frame(rs_id = c("rs01", "rs02", "rs03", "rs04"),
+                           p_value = rep(0, 4))
+  dummy_GWAS$p_value[3] <- 1
+  dummy_GTEx <- data.frame(rs_id = c("rs01", "rs02", "rs03"),
+                           gene_symbol = c("gene1", "gene2", "gene3"),
+                           p_value = rep(0, 3))
+  dummy_DGIdb <- data.frame(gene_symbol = c("gene2", "gene3"),
+                            drug_name = c("drug2", "drug3"))
+  expected_df <- data.frame(gene_symbol = c("gene2"),
+                            rs_id = c("rs02"),
+                            p_val_variant = 0,
+                            p_val_nominal = 0,
+                            drug_name = c("drug2"))
+  expect_equal(merge_gene_var_drug(dummy_GWAS, dummy_GTEx, dummy_DGIdb),
+               expected_df)
+})
+
+test_that("merge_gene_var_drug returns nothing if no matches are found", {
+  lonely_GWAS <- data.frame(rs_id = c("rslonely"), p_value = 0)
+  lonely_GTEx <- data.frame(rs_id = c("rsalsolonely"),
+                            gene_symbol = "lonelygene", p_value = 0)
+  lonely_DGIdb <- data.frame(gene_symbol = "noneyouknow", drug_name = "lembas")
+  expected_df <- data.frame(gene_symbol = character(0), rs_id = character(0),
+                            p_val_variant = numeric(0),
+                            p_val_nominal = numeric(0),
+                            drug_name = character(0))
+  expect_equal(merge_gene_var_drug(lonely_GWAS, lonely_GTEx, lonely_DGIdb),
+               expected_df)
+  })
+
+test_that("merge_gene_var_drug with missing GWAS p-value", {
+  test_GWAS <- data.frame(rs_id = "rs01",
+                          beta_value = 1)
+  test_GTEx <- data.frame(rs = "rs01",
+                          gene_symbol = "gene01",
+                          p_value = 1e-08,
+                          slope = 1)
+  test_DGIdb <- data.frame(drug_name = "drug01",
+                           interaction_types = "activator",
+                           gene_symbol = "gene01")
+  expect_no_error(suppressWarnings(merge_gene_var_drug(test_GWAS,
+                                                      test_GTEx, test_DGIdb)))
+  })
+
+test_that("merge_gene_var_drug with missing GTEx p-value", {
+  test_GWAS <- data.frame(rs_id = "rs01",
+                          p_value = 1e-08,
+                          beta_value = 1)
+  test_GTEx <- data.frame(rs = "rs01",
+                          gene_symbol = "gene01",
+                          slope = 1)
+  test_DGIdb <- data.frame(drug_name = "drug01",
+                           interaction_types = "activator",
+                           gene_symbol = "gene01")
+  expect_no_error(suppressWarnings(merge_gene_var_drug(test_GWAS,
+                                                      test_GTEx, test_DGIdb)))
+  })
+
+test_that("merge_gene_var_drug with no p-values", {
+    test_GWAS <- data.frame(rs_id = "rs01",
+                          beta_value = 1)
+  test_GTEx <- data.frame(rs = "rs01",
+                          gene_symbol = "gene01",
+                          slope = 1)
+  test_DGIdb <- data.frame(drug_name = "drug01",
+                           interaction_types = "activator",
+                           gene_symbol = "gene01")
+  expect_no_error(suppressWarnings(merge_gene_var_drug(test_GWAS,
+                                                      test_GTEx, test_DGIdb)))
+  })