Skip to content

Commit

Permalink
Changes for new variable types (#358)
Browse files Browse the repository at this point in the history
* changes for new variable types when reading options from a jasp file

* start on fixing unit tests

* fix some more tests
  • Loading branch information
vandenman authored Jul 11, 2024
1 parent 4fd6ed6 commit 1d2842e
Show file tree
Hide file tree
Showing 25 changed files with 127 additions and 53 deletions.
6 changes: 3 additions & 3 deletions R/commonMachineLearningClustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
predictors <- unlist(options[["predictors"]])
predictors <- predictors[predictors != ""]
if (is.null(dataset)) {
dataset <- .readAndAddCompleteRowIndices(dataset, predictors)
dataset <- .readAndAddCompleteRowIndices(options, "predictors")
}
if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
dataset <- .scaleNumericData(dataset)
Expand Down Expand Up @@ -374,8 +374,8 @@
ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) +
jaspGraphs::geom_rangeframe() +
jaspGraphs::themeJaspRaw(legend.position = if (options[["tsneClusterPlotLegend"]]) "right" else "none") +
ggplot2::theme(axis.ticks = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
ggplot2::theme(axis.ticks = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.text.y = ggplot2::element_blank())
if (options[["tsneClusterPlotLabels"]]) {
p <- p + ggrepel::geom_text_repel(ggplot2::aes(label = rownames(dataset), x = x, y = y), hjust = -1, vjust = 1, data = plotData, seed = 1)
Expand Down
28 changes: 14 additions & 14 deletions R/commonMachineLearningRegression.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,23 @@
}

.readDataClassificationRegressionAnalyses <- function(dataset, options) {
target <- NULL
if (options[["target"]] != "") {
target <- options[["target"]]
}
predictors <- NULL
if (length(options[["predictors"]]) > 0) {
predictors <- unlist(options[["predictors"]])
}

testSetIndicator <- NULL
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
testSetIndicator <- options[["testSetIndicatorVariable"]]
}
return(.readAndAddCompleteRowIndices(dataset, columns = c(target, predictors), columnsAsNumeric = testSetIndicator))
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
testSetIndicator <- "testSetIndicatorVariable"

return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator))
}

.readAndAddCompleteRowIndices <- function(dataset, columns = NULL, columnsAsNumeric = NULL) {
dataset <- .readDataSetToEnd(columns = columns, columns.as.numeric = columnsAsNumeric)
.readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) {

if (!is.null(optionNamesAsNumeric))
for (name in optionNamesAsNumeric) {
name2 <- paste(name, ".types")
if (is.null(options[[name]]))
options[[name2]] <- rep("scale", length(options[[name]]))
}
dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric))
complete.index <- which(complete.cases(dataset))
dataset <- na.omit(dataset)
rownames(dataset) <- as.character(complete.index)
Expand Down
8 changes: 4 additions & 4 deletions R/mlRegressionRegularized.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
if (options[["weights"]] != "") {
weights <- options[["weights"]]
}
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
testSetIndicator <- options[["testSetIndicatorVariable"]]
}
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
testSetIndicator <- "testSetIndicatorVariable"

predictors <- unlist(options["predictors"])
predictors <- predictors[predictors != ""]
if (is.null(dataset)) {
dataset <- .readAndAddCompleteRowIndices(dataset, columns = predictors, columnsAsNumeric = c(target, weights, testSetIndicator))
dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator)
}
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/test-mlclassificationboosting.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
options$dataSplitPlot <- FALSE
Expand Down Expand Up @@ -38,12 +40,14 @@ options$noOfFolds <- 5
options$deviancePlot <- TRUE
options$outOfBagImprovementPlot <- TRUE
options$relativeInfluencePlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$rocCurve <- TRUE
options$setSeed <- TRUE
options$target <- "Type"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/test-mlclassificationdecisiontree.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
options$dataSplitPlot <- FALSE
Expand All @@ -33,11 +35,13 @@ options$noOfFolds <- 5
options$decisionTreePlot <- TRUE
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$saveModel <- FALSE
options$savePath <- ""
options$setSeed <- TRUE
options$featureImportanceTable <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/test-mlclassificationknn.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
options$dataSplitPlot <- FALSE
Expand All @@ -36,12 +38,14 @@ options$modelValid <- "validationManual"
options$noOfFolds <- 5
options$errorVsKPlot <- TRUE
options$weightsPlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$rocCurve <- TRUE
options$setSeed <- TRUE
options$target <- "Type"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/test-mlclassificationlda.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$multicolTable <- TRUE
options$noOfFolds <- 5
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$priorTable <- TRUE
options$rocCurve <- TRUE
options$setSeed <- TRUE
options$target <- "Type"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-mlclassificationnaivebayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ options$modelValid <- "validationManual"
options$noOfFolds <- 5
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$saveModel <- FALSE
options$savePath <- ""
options$setSeed <- TRUE
options$supportVectorsTable <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-mlclassificationneuralnetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$saveModel <- FALSE
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/test-mlclassificationrandomforest.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
options$dataSplitPlot <- FALSE
Expand Down Expand Up @@ -37,13 +39,15 @@ options$noOfFolds <- 5
options$accuracyDecreasePlot <- TRUE
options$purityIncreasePlot <- TRUE
options$treesVsModelErrorPlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$rocCurve <- TRUE
options$setSeed <- TRUE
options$featureImportanceTable <- TRUE
options$target <- "Type"
options$target.types <- "scale"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/test-mlclassificationsvm.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
options$modelValid <- "validationManual"
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$savePath <- ""
options$setSeed <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
options$dataSplitPlot <- FALSE
Expand All @@ -32,11 +34,13 @@ options$modelValid <- "validationManual"
options$noOfFolds <- 5
options$predictionsColumn <- ""
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", 4)
options$saveModel <- FALSE
options$savePath <- ""
options$setSeed <- TRUE
options$supportVectorsTable <- TRUE
options$target <- "Species"
options$target.types <- "nominal"
options$testDataManual <- 0.2
options$testIndicatorColumn <- ""
options$testSetIndicatorVariable <- ""
Expand Down
5 changes: 3 additions & 2 deletions tests/testthat/test-mlclusteringdensitybased.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ options$distance <- "normalDensities"
options[["kDistancePlot"]] <- TRUE
options$modelOptimization <- "manual"
options$tsneClusterPlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
options$tableClusterInformationSilhouetteScore <- TRUE
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/test-mlclusteringfuzzycmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ context("Machine Learning Fuzzy C-Means Clustering")
# Test fixed model #############################################################
options <- initMlOptions("mlClusteringFuzzyCMeans")
options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", length(options$predictors))
options$modelOptimization <- "manual"
options$predictionsColumn <- ""
options$setSeed <- TRUE
Expand All @@ -21,9 +22,10 @@ options$predictionsColumn <- ""
options$validationMeasures <- TRUE
options$modelOptimization <- "optimized"
options$tsneClusterPlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
options$tableClusterInformationCentroids <- TRUE
Expand Down
10 changes: 8 additions & 2 deletions tests/testthat/test-mlclusteringhierarchical.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ context("Machine Learning Hierarchical Clustering")
# Test fixed model #############################################################
options <- initMlOptions("mlClusteringHierarchical")
options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", length(options$predictors))
options$modelOptimization <- "manual"
options$predictionsColumn <- ""
options$setSeed <- TRUE
Expand All @@ -22,9 +23,10 @@ options$validationMeasures <- TRUE
options$dendrogram <- TRUE
options$modelOptimization <- "optimized"
options$tsneClusterPlot <- TRUE
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
options$tableClusterInformationSilhouetteScore <- TRUE
Expand Down Expand Up @@ -98,6 +100,7 @@ options$validationMeasures <- TRUE
options$linkage <- "wardD"
options$modelOptimization <- "optimized"
options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterMeans <- TRUE
set.seed(1)
Expand Down Expand Up @@ -163,6 +166,7 @@ options$validationMeasures <- TRUE
options$linkage <- "wardD"
options$modelOptimization <- "optimized"
options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterMeans <- TRUE
set.seed(1)
Expand Down Expand Up @@ -228,6 +232,7 @@ options$validationMeasures <- TRUE
options$linkage <- "median"
options$modelOptimization <- "optimized"
options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterMeans <- TRUE
set.seed(1)
Expand Down Expand Up @@ -277,6 +282,7 @@ options$validationMeasures <- TRUE
options$linkage <- "mcquitty"
options$modelOptimization <- "optimized"
options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$setSeed <- TRUE
options$tableClusterMeans <- TRUE
set.seed(1)
Expand Down
16 changes: 10 additions & 6 deletions tests/testthat/test-mlclusteringkmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ context("Machine Learning K-Means Clustering")
# Test fixed model #############################################################
options <- initMlOptions("mlClusteringKMeans")
options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
options$predictors.types <- rep("scale", length(options$predictors))
options$modelOptimization <- "manual"
options$predictionsColumn <- ""
options$setSeed <- TRUE
Expand All @@ -17,9 +18,10 @@ jaspTools::expect_equal_tables(table,

# Test optimized model #########################################################
options <- initMlOptions("mlClusteringKMeans")
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$tableClusterInformationSilhouetteScore <- TRUE
options$tableClusterInformationCentroids <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
Expand Down Expand Up @@ -102,9 +104,10 @@ test_that("t-SNE Cluster Plot matches", {
context("Machine Learning K-Medians Clustering")

options <- initMlOptions("mlClusteringKMeans")
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$tableClusterInformationSilhouetteScore <- TRUE
options$tableClusterInformationCentroids <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
Expand Down Expand Up @@ -183,9 +186,10 @@ test_that("Elbow Method Plot matches", {
context("Machine Learning K-Medoids Clustering")

options <- initMlOptions("mlClusteringKMeans")
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
"Hue", "Dilution", "Proline")
options$predictors.types <- rep("scale", length(options$predictors))
options$tableClusterInformationSilhouetteScore <- TRUE
options$tableClusterInformationCentroids <- TRUE
options$tableClusterInformationBetweenSumOfSquares <- TRUE
Expand Down
Loading

0 comments on commit 1d2842e

Please sign in to comment.