From 1d2842e144eff75e6792d51d803548f199ce319b Mon Sep 17 00:00:00 2001 From: Don van den Bergh Date: Thu, 11 Jul 2024 11:44:02 +0200 Subject: [PATCH] Changes for new variable types (#358) * changes for new variable types when reading options from a jasp file * start on fixing unit tests * fix some more tests --- R/commonMachineLearningClustering.R | 6 ++-- R/commonMachineLearningRegression.R | 28 +++++++++---------- R/mlRegressionRegularized.R | 8 +++--- .../testthat/test-mlclassificationboosting.R | 8 ++++-- .../test-mlclassificationdecisiontree.R | 4 +++ tests/testthat/test-mlclassificationknn.R | 8 ++++-- tests/testthat/test-mlclassificationlda.R | 6 ++-- .../test-mlclassificationnaivebayes.R | 2 ++ .../test-mlclassificationneuralnetwork.R | 2 ++ .../test-mlclassificationrandomforest.R | 8 ++++-- tests/testthat/test-mlclassificationsvm.R | 4 +++ .../testthat/test-mlclusteringdensitybased.R | 5 ++-- tests/testthat/test-mlclusteringfuzzycmeans.R | 6 ++-- .../testthat/test-mlclusteringhierarchical.R | 10 +++++-- tests/testthat/test-mlclusteringkmeans.R | 16 +++++++---- tests/testthat/test-mlclusteringmodelbased.R | 6 ++-- .../testthat/test-mlclusteringrandomforest.R | 6 ++-- tests/testthat/test-mlregressionboosting.R | 11 ++++++-- .../testthat/test-mlregressiondecisiontree.R | 4 +++ tests/testthat/test-mlregressionknn.R | 8 ++++-- tests/testthat/test-mlregressionlinear.R | 2 ++ .../testthat/test-mlregressionneuralnetwork.R | 2 ++ .../testthat/test-mlregressionrandomforest.R | 8 ++++-- tests/testthat/test-mlregressionregularized.R | 8 ++++-- tests/testthat/test-mlregressionsvm.R | 4 +++ 25 files changed, 127 insertions(+), 53 deletions(-) diff --git a/R/commonMachineLearningClustering.R b/R/commonMachineLearningClustering.R index b5e166b8..16b6519f 100644 --- a/R/commonMachineLearningClustering.R +++ b/R/commonMachineLearningClustering.R @@ -34,7 +34,7 @@ predictors <- unlist(options[["predictors"]]) predictors <- predictors[predictors != ""] if (is.null(dataset)) { - dataset <- .readAndAddCompleteRowIndices(dataset, predictors) + dataset <- .readAndAddCompleteRowIndices(options, "predictors") } if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) { dataset <- .scaleNumericData(dataset) @@ -374,8 +374,8 @@ ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + jaspGraphs::geom_rangeframe() + jaspGraphs::themeJaspRaw(legend.position = if (options[["tsneClusterPlotLegend"]]) "right" else "none") + - ggplot2::theme(axis.ticks = ggplot2::element_blank(), - axis.text.x = ggplot2::element_blank(), + ggplot2::theme(axis.ticks = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), axis.text.y = ggplot2::element_blank()) if (options[["tsneClusterPlotLabels"]]) { p <- p + ggrepel::geom_text_repel(ggplot2::aes(label = rownames(dataset), x = x, y = y), hjust = -1, vjust = 1, data = plotData, seed = 1) diff --git a/R/commonMachineLearningRegression.R b/R/commonMachineLearningRegression.R index 3a8bb3e9..7180bc26 100644 --- a/R/commonMachineLearningRegression.R +++ b/R/commonMachineLearningRegression.R @@ -55,23 +55,23 @@ } .readDataClassificationRegressionAnalyses <- function(dataset, options) { - target <- NULL - if (options[["target"]] != "") { - target <- options[["target"]] - } - predictors <- NULL - if (length(options[["predictors"]]) > 0) { - predictors <- unlist(options[["predictors"]]) - } + testSetIndicator <- NULL - if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") { - testSetIndicator <- options[["testSetIndicatorVariable"]] - } - return(.readAndAddCompleteRowIndices(dataset, columns = c(target, predictors), columnsAsNumeric = testSetIndicator)) + if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") + testSetIndicator <- "testSetIndicatorVariable" + + return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator)) } -.readAndAddCompleteRowIndices <- function(dataset, columns = NULL, columnsAsNumeric = NULL) { - dataset <- .readDataSetToEnd(columns = columns, columns.as.numeric = columnsAsNumeric) +.readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) { + + if (!is.null(optionNamesAsNumeric)) + for (name in optionNamesAsNumeric) { + name2 <- paste(name, ".types") + if (is.null(options[[name]])) + options[[name2]] <- rep("scale", length(options[[name]])) + } + dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric)) complete.index <- which(complete.cases(dataset)) dataset <- na.omit(dataset) rownames(dataset) <- as.character(complete.index) diff --git a/R/mlRegressionRegularized.R b/R/mlRegressionRegularized.R index 92fe1bb2..b2a11b6d 100644 --- a/R/mlRegressionRegularized.R +++ b/R/mlRegressionRegularized.R @@ -69,13 +69,13 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) { if (options[["weights"]] != "") { weights <- options[["weights"]] } - if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") { - testSetIndicator <- options[["testSetIndicatorVariable"]] - } + if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") + testSetIndicator <- "testSetIndicatorVariable" + predictors <- unlist(options["predictors"]) predictors <- predictors[predictors != ""] if (is.null(dataset)) { - dataset <- .readAndAddCompleteRowIndices(dataset, columns = predictors, columnsAsNumeric = c(target, weights, testSetIndicator)) + dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator) } if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) { dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE]) diff --git a/tests/testthat/test-mlclassificationboosting.R b/tests/testthat/test-mlclassificationboosting.R index dbe7aa21..2e85154c 100644 --- a/tests/testthat/test-mlclassificationboosting.R +++ b/tests/testthat/test-mlclassificationboosting.R @@ -7,9 +7,11 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" options$dataSplitPlot <- FALSE @@ -38,12 +40,14 @@ options$noOfFolds <- 5 options$deviancePlot <- TRUE options$outOfBagImprovementPlot <- TRUE options$relativeInfluencePlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$rocCurve <- TRUE options$setSeed <- TRUE options$target <- "Type" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationdecisiontree.R b/tests/testthat/test-mlclassificationdecisiontree.R index 2031c29b..2358282c 100644 --- a/tests/testthat/test-mlclassificationdecisiontree.R +++ b/tests/testthat/test-mlclassificationdecisiontree.R @@ -7,9 +7,11 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" options$dataSplitPlot <- FALSE @@ -33,11 +35,13 @@ options$noOfFolds <- 5 options$decisionTreePlot <- TRUE options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$featureImportanceTable <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationknn.R b/tests/testthat/test-mlclassificationknn.R index 42eedf8f..f684e420 100644 --- a/tests/testthat/test-mlclassificationknn.R +++ b/tests/testthat/test-mlclassificationknn.R @@ -7,9 +7,11 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" options$dataSplitPlot <- FALSE @@ -36,12 +38,14 @@ options$modelValid <- "validationManual" options$noOfFolds <- 5 options$errorVsKPlot <- TRUE options$weightsPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$rocCurve <- TRUE options$setSeed <- TRUE options$target <- "Type" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationlda.R b/tests/testthat/test-mlclassificationlda.R index da256e77..176bafc8 100644 --- a/tests/testthat/test-mlclassificationlda.R +++ b/tests/testthat/test-mlclassificationlda.R @@ -19,13 +19,15 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$multicolTable <- TRUE options$noOfFolds <- 5 -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$priorTable <- TRUE options$rocCurve <- TRUE options$setSeed <- TRUE options$target <- "Type" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationnaivebayes.R b/tests/testthat/test-mlclassificationnaivebayes.R index 36e610da..1dbfd97e 100644 --- a/tests/testthat/test-mlclassificationnaivebayes.R +++ b/tests/testthat/test-mlclassificationnaivebayes.R @@ -11,11 +11,13 @@ options$modelValid <- "validationManual" options$noOfFolds <- 5 options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$supportVectorsTable <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationneuralnetwork.R b/tests/testthat/test-mlclassificationneuralnetwork.R index f6ab54a0..2b4c97e0 100644 --- a/tests/testthat/test-mlclassificationneuralnetwork.R +++ b/tests/testthat/test-mlclassificationneuralnetwork.R @@ -14,10 +14,12 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationrandomforest.R b/tests/testthat/test-mlclassificationrandomforest.R index df066c3c..2f182217 100644 --- a/tests/testthat/test-mlclassificationrandomforest.R +++ b/tests/testthat/test-mlclassificationrandomforest.R @@ -7,9 +7,11 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" options$dataSplitPlot <- FALSE @@ -37,13 +39,15 @@ options$noOfFolds <- 5 options$accuracyDecreasePlot <- TRUE options$purityIncreasePlot <- TRUE options$treesVsModelErrorPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$rocCurve <- TRUE options$setSeed <- TRUE options$featureImportanceTable <- TRUE options$target <- "Type" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclassificationsvm.R b/tests/testthat/test-mlclassificationsvm.R index e4e1f4c7..91d696a4 100644 --- a/tests/testthat/test-mlclassificationsvm.R +++ b/tests/testthat/test-mlclassificationsvm.R @@ -7,9 +7,11 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$savePath <- "" options$setSeed <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" options$dataSplitPlot <- FALSE @@ -32,11 +34,13 @@ options$modelValid <- "validationManual" options$noOfFolds <- 5 options$predictionsColumn <- "" options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$supportVectorsTable <- TRUE options$target <- "Species" +options$target.types <- "nominal" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlclusteringdensitybased.R b/tests/testthat/test-mlclusteringdensitybased.R index aef95c02..c26459a0 100644 --- a/tests/testthat/test-mlclusteringdensitybased.R +++ b/tests/testthat/test-mlclusteringdensitybased.R @@ -9,9 +9,10 @@ options$distance <- "normalDensities" options[["kDistancePlot"]] <- TRUE options$modelOptimization <- "manual" options$tsneClusterPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE options$tableClusterInformationSilhouetteScore <- TRUE diff --git a/tests/testthat/test-mlclusteringfuzzycmeans.R b/tests/testthat/test-mlclusteringfuzzycmeans.R index d2cd4c1c..802ea21f 100644 --- a/tests/testthat/test-mlclusteringfuzzycmeans.R +++ b/tests/testthat/test-mlclusteringfuzzycmeans.R @@ -3,6 +3,7 @@ context("Machine Learning Fuzzy C-Means Clustering") # Test fixed model ############################################################# options <- initMlOptions("mlClusteringFuzzyCMeans") options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", length(options$predictors)) options$modelOptimization <- "manual" options$predictionsColumn <- "" options$setSeed <- TRUE @@ -21,9 +22,10 @@ options$predictionsColumn <- "" options$validationMeasures <- TRUE options$modelOptimization <- "optimized" options$tsneClusterPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE options$tableClusterInformationCentroids <- TRUE diff --git a/tests/testthat/test-mlclusteringhierarchical.R b/tests/testthat/test-mlclusteringhierarchical.R index 3d3f3ca9..87c86d88 100644 --- a/tests/testthat/test-mlclusteringhierarchical.R +++ b/tests/testthat/test-mlclusteringhierarchical.R @@ -3,6 +3,7 @@ context("Machine Learning Hierarchical Clustering") # Test fixed model ############################################################# options <- initMlOptions("mlClusteringHierarchical") options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", length(options$predictors)) options$modelOptimization <- "manual" options$predictionsColumn <- "" options$setSeed <- TRUE @@ -22,9 +23,10 @@ options$validationMeasures <- TRUE options$dendrogram <- TRUE options$modelOptimization <- "optimized" options$tsneClusterPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE options$tableClusterInformationSilhouetteScore <- TRUE @@ -98,6 +100,7 @@ options$validationMeasures <- TRUE options$linkage <- "wardD" options$modelOptimization <- "optimized" options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterMeans <- TRUE set.seed(1) @@ -163,6 +166,7 @@ options$validationMeasures <- TRUE options$linkage <- "wardD" options$modelOptimization <- "optimized" options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterMeans <- TRUE set.seed(1) @@ -228,6 +232,7 @@ options$validationMeasures <- TRUE options$linkage <- "median" options$modelOptimization <- "optimized" options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterMeans <- TRUE set.seed(1) @@ -277,6 +282,7 @@ options$validationMeasures <- TRUE options$linkage <- "mcquitty" options$modelOptimization <- "optimized" options$predictors <- c("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterMeans <- TRUE set.seed(1) diff --git a/tests/testthat/test-mlclusteringkmeans.R b/tests/testthat/test-mlclusteringkmeans.R index ce3e5ad9..1176b19a 100644 --- a/tests/testthat/test-mlclusteringkmeans.R +++ b/tests/testthat/test-mlclusteringkmeans.R @@ -3,6 +3,7 @@ context("Machine Learning K-Means Clustering") # Test fixed model ############################################################# options <- initMlOptions("mlClusteringKMeans") options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", length(options$predictors)) options$modelOptimization <- "manual" options$predictionsColumn <- "" options$setSeed <- TRUE @@ -17,9 +18,10 @@ jaspTools::expect_equal_tables(table, # Test optimized model ######################################################### options <- initMlOptions("mlClusteringKMeans") -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$tableClusterInformationSilhouetteScore <- TRUE options$tableClusterInformationCentroids <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE @@ -102,9 +104,10 @@ test_that("t-SNE Cluster Plot matches", { context("Machine Learning K-Medians Clustering") options <- initMlOptions("mlClusteringKMeans") -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$tableClusterInformationSilhouetteScore <- TRUE options$tableClusterInformationCentroids <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE @@ -183,9 +186,10 @@ test_that("Elbow Method Plot matches", { context("Machine Learning K-Medoids Clustering") options <- initMlOptions("mlClusteringKMeans") -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$tableClusterInformationSilhouetteScore <- TRUE options$tableClusterInformationCentroids <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE diff --git a/tests/testthat/test-mlclusteringmodelbased.R b/tests/testthat/test-mlclusteringmodelbased.R index d4c6b209..6358b63c 100644 --- a/tests/testthat/test-mlclusteringmodelbased.R +++ b/tests/testthat/test-mlclusteringmodelbased.R @@ -3,6 +3,7 @@ context("Machine Learning K-Means Clustering") # Test fixed model ############################################################# options <- initMlOptions("mlClusteringModelBased") options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", length(options$predictors)) options$modelOptimization <- "manual" options$predictionsColumn <- "" options$setSeed <- TRUE @@ -16,9 +17,10 @@ jaspTools::expect_equal_tables(table, # Test optimized model ######################################################### options <- initMlOptions("mlClusteringModelBased") -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$tableClusterInformationSilhouetteScore <- TRUE options$tableClusterInformationCentroids <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE diff --git a/tests/testthat/test-mlclusteringrandomforest.R b/tests/testthat/test-mlclusteringrandomforest.R index 22e25fc1..571775ec 100644 --- a/tests/testthat/test-mlclusteringrandomforest.R +++ b/tests/testthat/test-mlclusteringrandomforest.R @@ -3,6 +3,7 @@ context("Machine Learning Random Forest Clustering") # Test fixed model ############################################################# options <- initMlOptions("mlClusteringRandomForest") options$predictors <- list("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", length(options$predictors)) options$modelOptimization <- "manual" options$predictionsColumn <- "" options$setSeed <- TRUE @@ -34,9 +35,10 @@ options$validationMeasures <- TRUE options$featureImportanceTable <- TRUE options$modelOptimization <- "optimized" options$tsneClusterPlot <- TRUE -options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", - "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", +options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", + "Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$tableClusterInformationBetweenSumOfSquares <- TRUE options$tableClusterInformationSilhouetteScore <- TRUE diff --git a/tests/testthat/test-mlregressionboosting.R b/tests/testthat/test-mlregressionboosting.R index c0777eee..3f49fb8c 100644 --- a/tests/testthat/test-mlregressionboosting.R +++ b/tests/testthat/test-mlregressionboosting.R @@ -3,7 +3,9 @@ context("Machine Learning Boosting Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionBoosting") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -47,11 +49,16 @@ options$deviancePlot <- TRUE options$outOfBagImprovementPlot <- TRUE options$relativeInfluencePlot <- TRUE options$predictedPerformancePlot <- TRUE -options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", - "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", +options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", + "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) +options$predictors.types <- rep("scale", length(options$predictors)) +options$predictors.types <- rep("scale", length(options$predictors)) +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$target <- "Alcohol" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressiondecisiontree.R b/tests/testthat/test-mlregressiondecisiontree.R index e8cf7ebc..4b5d18d3 100644 --- a/tests/testthat/test-mlregressiondecisiontree.R +++ b/tests/testthat/test-mlregressiondecisiontree.R @@ -3,7 +3,9 @@ context("Machine Learning Decision Tree Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionDecisionTree") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -32,11 +34,13 @@ options$decisionTreePlot <- TRUE options$predictedPerformancePlot <- TRUE options$predictionsColumn <- "" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width", "Species") +options$predictors.types <- c("scale", "scale", "scale", "nominal") options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$featureImportanceTable <- TRUE options$target <- "Sepal.Length" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionknn.R b/tests/testthat/test-mlregressionknn.R index b82b8f7d..9abfd551 100644 --- a/tests/testthat/test-mlregressionknn.R +++ b/tests/testthat/test-mlregressionknn.R @@ -3,7 +3,9 @@ context("Machine Learning KNN Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionKnn") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -44,11 +46,13 @@ options$modelValid <- "validationManual" options$noOfFolds <- 5 options$errorVsKPlot <- TRUE options$predictedPerformancePlot <- TRUE -options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", - "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", +options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", + "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$target <- "Alcohol" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionlinear.R b/tests/testthat/test-mlregressionlinear.R index 035a0bea..c3b7131f 100644 --- a/tests/testthat/test-mlregressionlinear.R +++ b/tests/testthat/test-mlregressionlinear.R @@ -10,8 +10,10 @@ options$holdoutData <- "holdoutManual" options$modelOptimization <- "manual" options$predictedPerformancePlot <- TRUE options$predictors <- list("Petal.Width", "Sepal.Length", "Sepal.Width", "Species") +options$predictors.types <- c("scale", "scale", "scale", "nominal") options$setSeed <- TRUE options$target <- "Petal.Length" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionneuralnetwork.R b/tests/testthat/test-mlregressionneuralnetwork.R index a802b4a7..16981496 100644 --- a/tests/testthat/test-mlregressionneuralnetwork.R +++ b/tests/testthat/test-mlregressionneuralnetwork.R @@ -13,10 +13,12 @@ options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$saveModel <- FALSE options$savePath <- "" options$setSeed <- TRUE options$target <- "Sepal.Length" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionrandomforest.R b/tests/testthat/test-mlregressionrandomforest.R index 6dd5fcd5..75ad09e3 100644 --- a/tests/testthat/test-mlregressionrandomforest.R +++ b/tests/testthat/test-mlregressionrandomforest.R @@ -3,7 +3,9 @@ context("Machine Learning Random Forest Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionRandomForest") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -46,12 +48,14 @@ options$accuracyDecreasePlot <- TRUE options$purityIncreasePlot <- TRUE options$treesVsModelErrorPlot <- TRUE options$predictedPerformancePlot <- TRUE -options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", - "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", +options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", + "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$featureImportanceTable <- TRUE options$target <- "Alcohol" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionregularized.R b/tests/testthat/test-mlregressionregularized.R index 33a12823..16971dc1 100644 --- a/tests/testthat/test-mlregressionregularized.R +++ b/tests/testthat/test-mlregressionregularized.R @@ -3,7 +3,9 @@ context("Machine Learning Regularized Linear Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionRegularized") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -33,11 +35,13 @@ options$modelOptimization <- "optMin" options$modelValid <- "validationManual" options$noOfFolds <- 5 options$predictedPerformancePlot <- TRUE -options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", - "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", +options$predictors <- list("Malic", "Ash", "Alcalinity", "Magnesium", "Phenols", "Flavanoids", + "Nonflavanoids", "Proanthocyanins", "Color", "Hue", "Dilution", "Proline") +options$predictors.types <- rep("scale", length(options$predictors)) options$setSeed <- TRUE options$target <- "Alcohol" +options$target.types <- "scale" options$testDataManual <- 0.2 options$testIndicatorColumn <- "" options$testSetIndicatorVariable <- "" diff --git a/tests/testthat/test-mlregressionsvm.R b/tests/testthat/test-mlregressionsvm.R index 102ca70e..13929cb5 100644 --- a/tests/testthat/test-mlregressionsvm.R +++ b/tests/testthat/test-mlregressionsvm.R @@ -3,7 +3,9 @@ context("Machine Learning SVM Regression") # Test fixed model ############################################################# options <- initMlOptions("mlRegressionSvm") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$modelOptimization <- "manual" options$holdoutData <- "holdoutManual" options$modelValid <- "validationManual" @@ -23,7 +25,9 @@ jaspTools::expect_equal_tables(table, # Test optimized model ######################################################### options <- initMlOptions("mlRegressionSvm") options$target <- "Sepal.Length" +options$target.types <- "scale" options$predictors <- c("Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 3) options$validationMeasures <- TRUE options$supportVectorsTable <- TRUE options$predictedPerformancePlot <- TRUE